diff --git a/.ci/lanl/gitlab-darwin-ci.yml b/.ci/lanl/gitlab-darwin-ci.yml index 3c850ce2b7c..b65cd577152 100644 --- a/.ci/lanl/gitlab-darwin-ci.yml +++ b/.ci/lanl/gitlab-darwin-ci.yml @@ -1,5 +1,5 @@ variables: - SCHEDULER_PARAMETERS: "-pgeneral -t 1:00:00 -N 1 --ntasks-per-node=16" + SCHEDULER_PARAMETERS: "-pgeneral -t 4:00:00 -N 1 --ntasks-per-node=16" GIT_STRATEGY: clone NPROCS: 4 @@ -11,10 +11,12 @@ build:intel: stage: build tags: [darwin-slurm-shared] script: - - module load intel + - module load intel/2022.0.1 + - rm .gitmodules + - cp $GITSUBMODULEPATCH .gitmodules - git submodule update --init - ./autogen.pl - - ./configure CC=icc FC=ifort CXX=icpc --prefix=$PWD/install_test --with-libevent=internal + - ./configure CC=icx FC=ifx CXX=icpx --prefix=$PWD/install_test --with-libevent=internal - make -j 8 install - make check - export PATH=$PWD/install_test/bin:$PATH @@ -32,9 +34,11 @@ build:ibm: stage: build tags: [darwin-slurm-shared] variables: - SCHEDULER_PARAMETERS: "-ppower9 -t 1:00:00 -N 1 --ntasks-per-node=16" + SCHEDULER_PARAMETERS: "-ppower9 -t 4:00:00 -N 1 --ntasks-per-node=16" script: - module load ibm + - rm .gitmodules + - cp $GITSUBMODULEPATCH .gitmodules - git submodule update --init - ./autogen.pl - ./configure CC=xlc FC=xlf CXX=xlc++ --prefix=$PWD/install_test --with-libevent=internal @@ -55,9 +59,11 @@ build:amd: stage: build tags: [darwin-slurm-shared] variables: - SCHEDULER_PARAMETERS: "-pamd-rome -t 1:00:00 -N 1 --ntasks-per-node=16" + SCHEDULER_PARAMETERS: "-pamd-rome -t 4:00:00 -N 1 --ntasks-per-node=16" script: - module load aocc/3.0.0 + - rm .gitmodules + - cp $GITSUBMODULEPATCH .gitmodules - git submodule update --init - ./autogen.pl - ./configure CC=clang FC=flang CXX=clang++ --prefix=$PWD/install_test --with-libevent=internal @@ -79,6 +85,8 @@ build:gnu: tags: [darwin-slurm-shared] script: - module load gcc + - rm .gitmodules + - cp $GITSUBMODULEPATCH .gitmodules - git submodule update --init - ./autogen.pl - ./configure --prefix=$PWD/install_test --with-libevent=internal @@ -104,7 +112,7 @@ test:intel: script: - pwd - ls - - module load intel + - module load intel/2022.0.1 - export PATH=$PWD/install_test/bin:$PATH - which mpirun - cd examples @@ -126,7 +134,7 @@ test:ibm: stage: test tags: [darwin-slurm-shared] variables: - SCHEDULER_PARAMETERS: "-ppower9 -t 1:00:00 -N 1 --ntasks-per-node=16" + SCHEDULER_PARAMETERS: "-ppower9 -t 2:00:00 -N 1 --ntasks-per-node=16" dependencies: - build:ibm needs: ["build:ibm"] @@ -136,7 +144,7 @@ test:ibm: - module load ibm - export PATH=$PWD/install_test/bin:$PATH - which mpirun - - cd examples + - pushd examples - mpirun -np 4 hostname - mpirun -np 4 ./hello_c - mpirun -np 4 ./ring_c @@ -147,6 +155,24 @@ test:ibm: - mpirun -np 4 ./hello_usempif08 - mpirun -np 4 ./ring_usempif08 - mpirun -np 4 ./connectivity_c + - popd + - mkdir osu-tests + - pushd osu-tests + - cp -p -r $OSU_TESTS_FOLDER/* . + - ./configure CC=mpicc FC=mpifort F77=mpifort CXX=mpiCC && make -j 8 clean && make -j 8 + - pushd mpi/pt2pt + - mpirun -np 2 ./osu_latency + - mpirun -np 2 ./osu_latency D H + - mpirun -np 2 ./osu_latency H D + - mpirun -np 2 ./osu_latency H H + - mpirun -np 2 ./osu_bw + - mpirun -np 2 ./osu_bw D H + - mpirun -np 2 ./osu_bw H D + - mpirun -np 2 ./osu_bw H H + - mpirun -np 2 ./osu_bibw + - mpirun -np 2 ./osu_bibw D H + - mpirun -np 2 ./osu_bibw H D + - mpirun -np 2 ./osu_bibw H H artifacts: name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME" expire_in: 1 week @@ -155,7 +181,7 @@ test:amd: stage: test tags: [darwin-slurm-shared] variables: - SCHEDULER_PARAMETERS: "-pamd-rome -t 1:00:00 -N 1 --ntasks-per-node=16" + SCHEDULER_PARAMETERS: "-pamd-rome -t 2:00:00 -N 1 --ntasks-per-node=16" dependencies: - build:amd needs: ["build:amd"] diff --git a/.gitignore b/.gitignore index c8192f1aa3f..089d60d9614 100644 --- a/.gitignore +++ b/.gitignore @@ -118,6 +118,12 @@ ltoptions.m4 3rd-party/romio341/mpl/include/mpl_timer.h 3rd-party/romio341/mpl/localdefs 3rd-party/romio341/test/runtests +3rd-party/romio341/test/fcoll_test.f +3rd-party/romio341/test/fmisc.f +3rd-party/romio341/test/fperf.f +3rd-party/romio341/test/large_file.c +3rd-party/romio341/test/misc.c +3rd-party/romio341/test/pfcoll_test.f 3rd-party/romio341/util/romioinstall config/project_list.m4 @@ -341,6 +347,7 @@ ompi/tools/mpisync/mpisync ompi/tools/mpisync/mpirun_prof ompi/tools/mpisync/ompi_timing_post ompi/tools/mpisync/mpisync.1 +ompi/tools/mpirun/mpirun ompi/tools/ompi_info/ompi_info ompi/tools/ompi_info/ompi_info.1 @@ -588,6 +595,10 @@ oshmem/tools/oshmem_info/oshmem_info.1 oshmem/tools/wrappers/oshcc.1 oshmem/tools/wrappers/oshfort.1 oshmem/tools/wrappers/oshrun.1 +oshmem/tools/wrappers/oshmem-c.pc +oshmem/tools/wrappers/oshmem-cxx.pc +oshmem/tools/wrappers/oshmem-fort.pc +oshmem/tools/wrappers/oshmem.pc oshmem/tools/wrappers/shmemcc.1 oshmem/tools/wrappers/shmemfort.1 oshmem/tools/wrappers/shmemrun.1 diff --git a/.gitmodules b/.gitmodules index 81400e0d6de..75db8c1efff 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,8 +1,8 @@ [submodule "prrte"] path = 3rd-party/prrte - url = https://github.com/openpmix/prrte + url = ../../openpmix/prrte branch = master [submodule "openpmix"] path = 3rd-party/openpmix - url = https://github.com/openpmix/openpmix.git + url = ../../openpmix/openpmix.git branch = master diff --git a/.mailmap b/.mailmap index 4caee793c7c..dfd5c277c37 100644 --- a/.mailmap +++ b/.mailmap @@ -121,3 +121,10 @@ Nick Papior Matthew G. F. Dosanjh Wei-keng Liao + +Samuel K. Gutierrez +Samuel K. Gutierrez + +Tomislav Janjusic Tomislavj Janjusic + +William P. LePera diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index c4069c3cfae..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,82 +0,0 @@ -# Use "required" for sudo, because we want to use the "trusty" Debian -# distro, which is (currently) only available in the legacy Travis -# infrastructure (i.e., if we put "sudo: false" to use the new container- -# based Travis infrastructure, then "trusty" is not available). We -# need the "trusty" distro because it has more recent versions of the -# GNU Autotools (i.e., autogen.pl will fail if you use the regular -# distro because the GNU Autotools are too old). -sudo: required -dist: trusty -language: c - -# Iterate over 2 different compilers -compiler: - - gcc - - clang - -# Test only linux now -os: - - linux - -addons: - # For Linux, make sure we have some extra packages that we like to - # build with - apt: - packages: - - autoconf - - automake - - libtool - - libnl-3-200 - - libnl-3-dev - - libnl-route-3-200 - - libnl-route-3-dev - - libibverbs-dev - - librdmacm-dev - - libhwloc-dev - sources: - - ubuntu-toolchain-r-test - -env: - global: - - AM_MAKEFLAGS="-j4" - - LD_LIBRARY_PATH="$HOME/bogus/lib" - matrix: - - GCC_VERSION=default - - GCC_VERSION=6 - -# Install dependencies for the verbs and usnic providers. Open MPI is -# not currently using the verbs provider in Libfabric, so we might as -# well not build it. -before_install: - - if [[ "GCC_VERSION" == "6" ]]; then COMPILERS="CC=gcc-6 CXX=g++-6 FC=gfortran-6"; fi - - export CONFIGURE_ARGS="--prefix=$HOME/bogus $COMPILERS CPPFLAGS=-I$HOME/bogus/include LDFLAGS=-L$HOME/bogus/lib" DISTCHECK_CONFIGURE_FLAGS="$CONFIGURE_ARGS" - - export DISTCHECK_CONFIGURE_FLAGS="$CONFIGURE_ARGS" - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then git clone https://github.com/ofiwg/libfabric.git ; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$GCC_VERSION" == "6" ]] ; then sudo apt-get --assume-yes install gcc-6 g++-6 gfortran-6; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then cd libfabric && ./autogen.sh && ./configure --prefix=$HOME/bogus --enable-usnic --disable-verbs $COMPILERS && make install && cd .. ; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; brew upgrade automake || true; brew upgrade libtool || true; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew tap homebrew/science || true; brew install hwloc || true; brew upgrade hwloc || true ; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$GCC_VERSION" == "6" ]] ; then brew install gcc || true; brew upgrade gcc || true ; fi - - mkdir -p $HOME/bogus/include $HOME/bogus/lib - -# Note that we use "make -k" to do the entire build, even if there was a -# build error in there somewhere. This prevents us from needing to submit -# to Travis, see the first error, fix that first error, submit again, ...etc. -install: - - m4 --version - - autoconf --version - - automake --version - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then libtool --version; else glibtool --version; fi - - ./autogen.pl - - if [[ "$TRAVIS_OS_NAME" == "linux" && "$CC" == "gcc" ]]; then ./configure $CONFIGURE_ARGS --with-libfabric=$HOME/bogus --with-usnic --with-verbs; else ./configure $CONFIGURE_ARGS; fi - - make -k - -# We only need to distcheck on one OS / compiler combination (this is just -# a minor optimization to make the overall set of builds faster). -script: - - if [[ "$TRAVIS_OS_NAME" == "linux" && "$CC" == "gcc" ]]; then make distcheck; else make check; fi - -matrix: - exclude: - - env: GCC_VERSION=6 - compiler: clang diff --git a/3rd-party/hwloc-2.4.0.tar.gz b/3rd-party/hwloc-2.4.0.tar.gz deleted file mode 100644 index da43f233082..00000000000 Binary files a/3rd-party/hwloc-2.4.0.tar.gz and /dev/null differ diff --git a/3rd-party/hwloc-2.7.0.tar.gz b/3rd-party/hwloc-2.7.0.tar.gz new file mode 100644 index 00000000000..cd09be57bcc Binary files /dev/null and b/3rd-party/hwloc-2.7.0.tar.gz differ diff --git a/3rd-party/openpmix b/3rd-party/openpmix index 79fadef7f7e..68033177392 160000 --- a/3rd-party/openpmix +++ b/3rd-party/openpmix @@ -1 +1 @@ -Subproject commit 79fadef7f7e75fd52e983321711a307a8f23ca2c +Subproject commit 680331773926b62c245626dbc9cf78aed2d641d3 diff --git a/3rd-party/prrte b/3rd-party/prrte index f35d66e576e..78825642e85 160000 --- a/3rd-party/prrte +++ b/3rd-party/prrte @@ -1 +1 @@ -Subproject commit f35d66e576e65405c19ee74b0665818c438bb06e +Subproject commit 78825642e8594ebffda0942fa04e375077819732 diff --git a/3rd-party/treematch/tm_topology.c b/3rd-party/treematch/tm_topology.c index dd55e02899b..92143f7a52d 100644 --- a/3rd-party/treematch/tm_topology.c +++ b/3rd-party/treematch/tm_topology.c @@ -68,7 +68,13 @@ tm_topology_t * tgt_to_tm(char *filename) printf("Reading TGT file: %s\n",filename); - fgets(line,1024,pf); + if (NULL == fgets(line,1024,pf)) { + /* either an error has occurred (and is in an unknown state) or + we hit EOF and line is empty. Either way, make line the + empty string to avoid errors later */ + line[0] = '\0'; + } + fclose(pf); s = strstr(line,"tleaf"); @@ -159,7 +165,13 @@ double ** topology_to_arch(hwloc_topology_t topology) double **arch = NULL; nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); - arch = (double**)MALLOC(sizeof(double*)*nb_proc); + if (nb_proc < 0) { + return NULL; + } + arch = (double**)malloc(sizeof(double*)*nb_proc); + if (NULL == arch) { + return NULL; + } for( i = 0 ; i < nb_proc ; i++ ){ obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i); arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc); @@ -534,7 +546,9 @@ int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology /* compute the size of the array to store the constraints*/ n = 0; - fgets(line, LINE_SIZE, pf); + if (NULL == fgets(line, LINE_SIZE, pf)) { + line[0] = '\0'; + } l = line; while((ptr=strtok(l," \t"))){ l = NULL; @@ -545,7 +559,9 @@ int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology tab = (int*)MALLOC(n*sizeof(int)); rewind(pf); - fgets(line, LINE_SIZE, pf); + if (NULL == fgets(line, LINE_SIZE, pf)) { + line[0] = '\0'; + } fclose(pf); l = line; i = 0; diff --git a/HACKING.md b/HACKING.md index fe045ecf0e4..fab5b1191a7 100644 --- a/HACKING.md +++ b/HACKING.md @@ -7,6 +7,24 @@ source code form, most likely through a developer's tree (i.e., a Git clone). +## Obtaining Open MPI + +Open MPI is available from many distributions, however some users +prefer to obtain it directly from the Open MPI community via +prepackaged tarball (see: https://www.open-mpi.org/software/ompi/). +The Open MPI tarball includes manpages, and openpmix and openprrte +components, along with an auto-generated configure script. + +Some developers prefer to obtain Open MPI by directly cloning it +from https://github.com/open-mpi/ompi. It is recommended that users +who choose to clone the source directly, use the git clone flag +`--recurse-submodules`, to also obtain the openpmix, and openprrte. + +Regardless of how openpmix and openprrte are obtained, the +configure logic in Open MPI v5.0+ prefer externally installed +components. Please see `configure --help` for more details. + + ## Developer Builds: Compiler Pickyness by Default If you are building Open MPI from a Git clone (i.e., there is a `.git` diff --git a/LICENSE b/LICENSE index 283cb9ad4a7..6626c0ab405 100644 --- a/LICENSE +++ b/LICENSE @@ -5,37 +5,37 @@ team who have contributed code to this release. The copyrights for code used under license from other parties are included in the corresponding files. -Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana +Copyright (c) 2004-2012 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. -Copyright (c) 2004-2017 The University of Tennessee and The University +Copyright (c) 2004-2021 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. -Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, +Copyright (c) 2004-2018 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2008 The Regents of the University of California. All rights reserved. Copyright (c) 2006-2018 Los Alamos National Security, LLC. All rights reserved. -Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006-2010 Voltaire, Inc. All rights reserved. -Copyright (c) 2006-2017 Sandia National Laboratories. All rights reserved. +Copyright (c) 2006-2021 Sandia National Laboratories. All rights reserved. Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. -Copyright (c) 2006-2017 The University of Houston. All rights reserved. +Copyright (c) 2006-2021 The University of Houston. All rights reserved. Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. Copyright (c) 2007-2017 UT-Battelle, LLC. All rights reserved. -Copyright (c) 2007-2020 IBM Corporation. All rights reserved. +Copyright (c) 2007-2021 IBM Corporation. All rights reserved. Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing Centre, Federal Republic of Germany Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany Copyright (c) 2007 Evergrid, Inc. All rights reserved. -Copyright (c) 2008 Chelsio, Inc. All rights reserved. +Copyright (c) 2008-2016 Chelsio, Inc. All rights reserved. Copyright (c) 2008-2009 Institut National de Recherche en Informatique. All rights reserved. Copyright (c) 2007 Lawrence Livermore National Security, LLC. All rights reserved. -Copyright (c) 2007-2017 Mellanox Technologies. All rights reserved. +Copyright (c) 2007-2019 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. Copyright (c) 2008-2017 Oak Ridge National Labs. All rights reserved. Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. @@ -43,24 +43,27 @@ Copyright (c) 2009-2015 Bull SAS. All rights reserved. Copyright (c) 2010 ARM ltd. All rights reserved. Copyright (c) 2016 ARM, Inc. All rights reserved. Copyright (c) 2010-2011 Alex Brick . All rights reserved. -Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights - reserved. +Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights reserved. Copyright (c) 2013-2020 Intel, Inc. All rights reserved. -Copyright (c) 2011-2017 NVIDIA Corporation. All rights reserved. -Copyright (c) 2016 Broadcom Limited. All rights reserved. -Copyright (c) 2011-2017 Fujitsu Limited. All rights reserved. +Copyright (c) 2011-2021 NVIDIA Corporation. All rights reserved. +Copyright (c) 2016-2018 Broadcom Limited. All rights reserved. +Copyright (c) 2011-2021 Fujitsu Limited. All rights reserved. Copyright (c) 2014-2015 Hewlett-Packard Development Company, LP. All rights reserved. -Copyright (c) 2013-2017 Research Organization for Information Science (RIST). +Copyright (c) 2013-2021 Research Organization for Information Science (RIST). All rights reserved. -Copyright (c) 2017-2020 Amazon.com, Inc. or its affiliates. All Rights +Copyright (c) 2017-2021 Amazon.com, Inc. or its affiliates. All Rights reserved. Copyright (c) 2018 DataDirect Networks. All rights reserved. -Copyright (c) 2018-2020 Triad National Security, LLC. All rights reserved. -Copyright (c) 2020 Google, LLC. All rights reserved. +Copyright (c) 2018-2021 Triad National Security, LLC. All rights reserved. +Copyright (c) 2019-2021 Hewlett Packard Enterprise Development, LP. +Copyright (c) 2020-2021 Google, LLC. All rights reserved. Copyright (c) 2002 University of Chicago Copyright (c) 2001 Argonne National Laboratory Copyright (c) 2020-2021 Cornelis Networks, Inc. All rights reserved. +Copyright (c) 2021 Nanook Consulting +Copyright (c) 2017-2019 Iowa State University Research Foundation, Inc. + All rights reserved. $COPYRIGHT$ diff --git a/Makefile.am b/Makefile.am index 63613685ffb..23a5253b268 100644 --- a/Makefile.am +++ b/Makefile.am @@ -12,7 +12,7 @@ # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. # Copyright (c) 2014-2019 Intel, Inc. All rights reserved. -# Copyright (c) 2017-2020 Amazon.com, Inc. or its affiliates. +# Copyright (c) 2017-2021 Amazon.com, Inc. or its affiliates. # All Rights reserved. # Copyright (c) 2020 IBM Corporation. All rights reserved. # $COPYRIGHT$ @@ -34,6 +34,14 @@ dist-hook: echo "AUTHORS file is empty; aborting distribution"; \ exit 1; \ fi + @if test -n "$(OPAL_MAKEDIST_DISABLE)" ; then \ + echo "#########################################################################"; \ + echo "#"; \ + echo "# make dist is disabled due to the following packages: $(OPAL_MAKEDIST_DISABLE)"; \ + echo "#"; \ + echo "#########################################################################"; \ + exit 1; \ + fi # Check for common symbols. Use a "-hook" to increase the odds that a # developer will see it at the end of their installation process. diff --git a/NEWS b/NEWS index cf25a034832..19ea94368be 100644 --- a/NEWS +++ b/NEWS @@ -8,17 +8,17 @@ Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2006 The Regents of the University of California. All rights reserved. -Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006 Voltaire, Inc. All rights reserved. Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights reserved. -Copyright (c) 2010-2017 IBM Corporation. All rights reserved. +Copyright (c) 2010-2021 IBM Corporation. All rights reserved. Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. Copyright (c) 2012 Sandia National Laboratories. All rights reserved. Copyright (c) 2012 University of Houston. All rights reserved. -Copyright (c) 2013 NVIDIA Corporation. All rights reserved. +Copyright (c) 2013-2021 NVIDIA Corporation. All rights reserved. Copyright (c) 2013-2018 Intel, Inc. All rights reserved. Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved. @@ -75,13 +75,406 @@ Master (not on release branches yet) * launcher. ********************************************************************** -- Fix rank-by algorithms to properly rank by object and span -- Do not build Open SHMEM layer when there are no SPMLs available. - Currently, this means the Open SHMEM layer will only build if - a MXM or UCX library is found. -- Remove all vestiges of the C/R support +v5.0.0rc2 -- October, 2021 +-------------------- + +- ORTE, the underlying OMPI launcher has been removed, and replaced + with PRTE. +- Reworked how Open MPI integrates with 3rd party packages. + The decision was made to stop building 3rd-party packages + such as Libevent, HWLOC, PMIx, and PRRTE as MCA components + and instead 1) start relying on external libraries whenever + possible and 2) Open MPI builds the 3rd party libraries (if needed) + as independent libraries, rather than linked into libopen-pal. +- Update to use PMIx v4.1.1rc2 +- Update to use PRRTE v2.0.1rc2 - Change the default component build behavior to prefer building components as part of libmpi.so instead of individual DSOs. +- Remove pml/yalla, mxm, mtl/psm, and ikrit components. +- Remove all vestiges of the C/R support. +- Various ROMIO v3.4.1 updates. +- Use Pandoc to generate manpages +- 32 bit atomics are now only supported via C11 compliant compilers. +- Explicitly disable support for GNU gcc < v4.8.1 (note: the default + gcc compiler that is included in RHEL 7 is v4.8.5). +- Do not build Open SHMEM layer when there are no SPMLs available. + Currently, this means the Open SHMEM layer will only build if + the UCX library is found. +- Fix rank-by algorithms to properly rank by object and span. +- Updated the "-mca pml" option to only accept one pml, not a list. +- vprotocol/pessimist: Updated to support MPI_THREAD_MULLTIPLE. +- btl/tcp: Updated to use reachability and graph solving for global + interface matching. This has been shown to improve MPI_Init() + performance under btl/tcp. +- fs/ime: Fixed compilation errors due to missing header inclusion + Thanks to Sylvain Didelot for finding + and fixing this issue. +- Fixed bug where MPI_Init_thread can give wrong error messages by + delaying error reporting until all infrastructure is running. +- Atomics support removed: S390/s390x, Sparc v9, ARMv4 and ARMv5 CMA + support. +- autogen.pl now supports a "-j" option to run multi-threaded. + Users can also use environment variable "AUTOMAKE_JOBS". +- PMI support has been removed for Open MPI apps. +- Legacy btl/sm has been removed, and replaced with btl/vader, which + was renamed to "btl/sm". +- Update btl/sm to not use CMA in user namespaces. +- C++ bindings have been removed. +- The "--am" and "--amca" options have been deprecated. +- opal/mca/threads framework added. Currently supports + argobots, qthreads, and pthreads. See the --with-threads=x option + in configure. +- Various README.md fixes - thanks to: + Yixin Zhang , + Samuel Cho , + rlangefe , + Alex Ross , + Sophia Fang , + mitchelltopaloglu , + Evstrife , and + Hao Tong for their + contributions. +- osc/pt2pt: Removed. Users can use osc/rdma + btl/tcp + for OSC support using TCP, or other providers. +- Open MPI now links -levent_core instead of -levent. +- MPI-4: Added ERRORS_ABORT infrastructure. +- common/cuda docs: Various fixes. Thanks to + Simon Byrne for finding and fixing. +- osc/ucx: Add support for acc_single_intrinsic. +- Fixed buildrpm.sh "-r" option used for RPM options specification. + Thanks to John K. McIver III for + reporting and fixing. +- configure: Added support for setting the wrapper C compiler. + Adds new option "--with-wrapper-cc=" . +- mpi_f08: Fixed Fortran-8-byte-INTEGER vs. C-4-byte-int issue. + Thanks to @ahaichen for reporting the bug. +- MPI-4: Added support for 'initial error handler'. +- opal/thread/tsd: Added thread-specific-data (tsd) api. +- MPI-4: Added error handling for 'unbound' errors to MPI_COMM_SELF. +- Add missing MPI_Status conversion subroutines: + MPI_Status_c2f08(), MPI_Status_f082c(), MPI_Status_f082f(), + MPI_Status_f2f08() and the PMPI_* related subroutines. +- patcher: Removed the Linux component. +- opal/util: Fixed typo in error string. Thanks to + NARIBAYASHI Akira for finding + and fixing the bug. +- fortran/use-mpi-f08: Generate PMPI bindings from the MPI bindings. +- Converted man pages to markdown. + Thanks to Fangcong Yin for their contribution + to this effort. +- Fixed ompi_proc_world error string and some comments in pml/ob1. + Thanks to Julien EMMANUEL for + finding and fixing these issues. +- oshmem/tools/oshmem_info: Fixed Fortran keyword issue when + compiling param.c. Thanks to Pak Lui for + finding and fixing the bug. +- autogen.pl: Patched libtool.m4 for OSX Big Sur. Thanks to + @fxcoudert for reporting the issue. +- Updgraded to HWLOC v2.4.0. +- Removed config/opal_check_pmi.m4. + Thanks to Zach Osman for the contribution. +- opal/atomics: Added load-linked, store-conditional atomics for + AArch6. +- Fixed envvar names to OMPI_MCA_orte_precondition_transports. + Thanks to Marisa Roman + for the contribution. +- fcoll/two_phase: Removed the component. All scenerios it was + used for has been replaced. +- btl/uct: Bumped UCX allowed version to v1.9.x. +- ULFM Fault Tolerance has been added. See README.FT.ULFM.md. +- Fixed a crash during CUDA initialization. + Thanks to Yaz Saito for finding + and fixing the bug. +- Added CUDA support to the OFI MTL. +- ompio: Added atomicity support. +- Singleton comm spawn support has been fixed. +- Autoconf v2.7 support has been updated. +- fortran: Added check for ISO_FORTRAN_ENV:REAL16. Thanks to + Jeff Hammond for reporting this issue. +- Changed the MCA component build style default to static. +- PowerPC atomics: Force usage of opal/ppc assembly. +- Removed C++ compiler requirement to build Open MPI. +- Fixed .la files leaking into wrapper compilers. +- Fixed bug where the cache line size was not set soon enough in + MPI_Init(). +- coll/ucc and scoll/ucc components were added. +- coll/ucc: Added support for allgather and reduce collective + operations. +- autogen.pl: Fixed bug where it would not ignore all + excluded components. +- Various datatype bugfixes and performance improvements +- Various pack/unpack bugfixes and performance improvements +- Fix mmap infinite recurse in memory patcher +- Fix C to Fortran error code conversions. +- osc/ucx: Fix data corruption with non-contiguous accumulates +- Update coll/tuned selection rules +- Fix non-blocking collective ops +- btl/portals4: Fix flow control +- Various oshmem:ucx bugfixes and performance improvements +- common/ofi: Disable new monitor API until libfabric 1.14.0 +- Fix AVX detection with icc +- mpirun option "--mca ompi_display_comm mpi_init/mpi_finalize" + has been added. Enables a communication protocol report: + when MPI_Init is invoked (using the 'mpi_init' value) and/or + when MPI_Finalize is invoked (using the 'mpi_finalize' value). +- New algorithm for Allgather and Allgatherv added, based on the + paper "Sparbit: a new logarithmic-cost and data locality-aware MPI + Allgather algorithm". Default algorithm selection rules are + un-changed, to use these algorithms add: + "--mca coll_tuned_allgather_algorithm sparbit" and/or + "--mca coll_tuned_allgatherv_algorithm sparbit" + Thanks to: Wilton Jaciel Loch , + and Guilherme Koslovski for their contribution. +- MPI-4: Persistent collectives have been moved to the MPI + namespace from MPIX. +- OFI: Delay patcher initialization until needed. It will now + be initialized only after the component is officially selected. +- MPI-4: Make MPI_Comm_get_info, MPI_File_get_info, and + MPI_Win_get_info compliant to the standard. +- Portable_platform file has been updated from GASNet. +- GCC versions < 4.8.1 are no longer supported. +- coll: Fix a bug with the libnbc MPI_AllReduce ring algorithm + when using MPI_IN_PLACE. +- Updated the usage of .gitmodules to use relative paths from + absolute paths. This allows the submodule cloning to use the same + protocol as OMPI cloning. Thanks to Felix Uhl + for the contribution. +- osc/rdma: Add local leader pid in shm file name to make it unique. +- ofi: Fix memory handler unregistration. This change fixes a + segfault during shutdown if the common/ofi component was built + as a dynamic object. +- osc/rdma: Add support for MPI minimum alignment key. +- memory_patcher: Add ability to detect patched memory. Thanks + to Rich Welch for the contribution. +- build: Improve handling of compiler version string. This + fixes a compiler error with clang and armclang. +- Fix bug where the relocation of OMPI packages caused + the launch to fail. +- Various improvements to MPI_AlltoAll algorithms for both + performance and memory usage. +- coll/basic: Fix segmentation fault in MPI_Alltoallw with + MPI_IN_PLACE. + NOTE: This patch either caused or exposed a regression + in MPI_AlltoAllv() using MPI_IN_PLACE. See github issue #9501. + This will be fixed prior to v5.0.0 release. + +4.1.2 -- November, 2021 +----------------------- + +- ROMIO portability fix for OpenBSD +- Fix handling of MPI_IN_PLACE with MPI_ALLTOALLW and improve performance + of MPI_ALLTOALL and MPI_ALLTOALLV for MPI_IN_PLACE. +- Fix one-sided issue with empty groups in Post-Start-Wait-Complete + synchronization mode. +- Fix Fortran status returns in certain use cases involving + Generalized Requests +- Romio datatype bug fixes. +- Fix oshmem_shmem_finalize() when main() returns non-zero value. +- Fix wrong affinity under LSF with the membind option. +- Fix count==0 cases in MPI_REDUCE and MPI_IREDUCE. +- Fix ssh launching on Bourne-flavored shells when the user has "set + -u" set in their shell startup files. +- Correctly process 0 slots with the mpirun --host option. +- Ensure to unlink and rebind socket when the Open MPI session + directory already exists. +- Fix a segv in mpirun --disable-dissable-map. +- Fix a potential hang in the memory hook handling. +- Slight performance improvement in MPI_WAITALL when running in + MPI_THREAD_MULTIPLE. +- Fix hcoll datatype mapping and rooted operation behavior. +- Correct some operations modifying MPI_Status.MPI_ERROR when it is + disallowed by the MPI standard. +- UCX updates: + - Fix datatype reference count issues. + - Detach dynamic window memory when freeing a window. + - Fix memory leak in datatype handling. +- Fix various atomic operations issues. +- mpirun: try to set the curses winsize to the pty of the spawned + task. Thanks to Stack Overflow user @Seriously for reporting the + issue. +- PMIx updates: + - Fix compatibility with external PMIx v4.x installations. + - Fix handling of PMIx v3.x compiler/linker flags. Thanks to Erik + Schnetter for reporting the issue. + - Skip SLURM-provided PMIx detection when appropriate. Thanks to + Alexander Grund for reporting the issue. +- Fix handling by C++ compilers when they #include the STL "" + header file, which ends up including Open MPI's text VERSION file + (which is not C code). Thanks to @srpgilles for reporting the + issue. +- Fix MPI_Op support for MPI_LONG. +- Make the MPI C++ bindings library (libmpi_cxx) explicitly depend on + the OPAL internal library (libopen-pal). Thanks to Ye Luo for + reporting the issue. +- Fix configure handling of "--with-libevent=/usr". +- Fix memory leak when opening Lustre files. Thanks to Bert Wesarg + for submitting the fix. +- Fix MPI_SENDRECV_REPLACE to correctly process datatype errors. + Thanks to Lisandro Dalcin for reporting the issue. +- Fix MPI_SENDRECV_REPLACE to correctly handle large data. Thanks + Jakub Benda for reporting this issue and suggesting a fix. +- Add workaround for TCP "dropped connection" errors to drastically + reduce the possibility of this happening. +- OMPIO updates: + - Fix handling when AMODE is not set. Thanks to Rainer Keller for + reporting the issue and supplying the fix. + - Fix FBTL "posix" component linking issue. Thanks for Honggang Li + for reporting the issue. + - Fixed segv with MPI_FILE_GET_BYTE_OFFSET on 0-sized file view. + - Thanks to GitHub user @shanedsnyder for submitting the issue. +- OFI updates: + - Multi-plane / Multi-Nic nic selection cleanups + - Add support for exporting Open MPI memory monitors into + Libfabric. + - Ensure that Cisco usNIC devices are never selected by the OFI + MTL. + - Fix buffer overflow in OFI networking setup. Thanks to Alexander + Grund for reporting the issue and supplying the fix. +- Fix SSEND on tag matching networks. +- Fix error handling in several MPI collectives. +- Fix the ordering of MPI_COMM_SPLIT_TYPE. Thanks to Wolfgang + Bangerth for raising the issue. +- No longer install the orted-mpir library (it's an internal / Libtool + convenience library). Thanks to Andrew Hesford for the fix. +- PSM2 updates: + - Allow advanced users to disable PSM2 version checking. + - Fix to allow non-default installation locations of psm2.h. + +4.1.1 -- April, 2021 +-------------------- + +- Fix a number of datatype issues, including an issue with + improper handling of partial datatypes that could lead to + an unexpected application failure. +- Change UCX PML to not warn about MPI_Request leaks during + MPI_FINALIZE by default. The old behavior can be restored with + the mca_pml_ucx_request_leak_check MCA parameter. +- Reverted temporary solution that worked around launch issues in + SLURM v20.11.{0,1,2}. SchedMD encourages users to avoid these + versions and to upgrade to v20.11.3 or newer. +- Updated PMIx to v3.2.2. +- Fixed configuration issue on Apple Silicon observed with + Homebrew. Thanks to François-Xavier Coudert for reporting the issue. +- Disabled gcc built-in atomics by default on aarch64 platforms. +- Disabled UCX PML when UCX v1.8.0 is detected. UCX version 1.8.0 has a bug that + may cause data corruption when its TCP transport is used in conjunction with + the shared memory transport. UCX versions prior to v1.8.0 are not affected by + this issue. Thanks to @ksiazekm for reporting the issue. +- Fixed detection of available UCX transports/devices to better inform PML + prioritization. +- Fixed SLURM support to mark ORTE daemons as non-MPI tasks. +- Improved AVX detection to more accurately detect supported + platforms. Also improved the generated AVX code, and switched to + using word-based MCA params for the op/avx component (vs. numeric + big flags). +- Improved OFI compatibility support and fixed memory leaks in error + handling paths. +- Improved HAN collectives with support for Barrier and Scatter. Thanks + to @EmmanuelBRELLE for these changes and the relevant bug fixes. +- Fixed MPI debugger support (i.e., the MPIR_Breakpoint() symbol). + Thanks to @louisespellacy-arm for reporting the issue. +- Fixed ORTE bug that prevented debuggers from reading MPIR_Proctable. +- Removed PML uniformity check from the UCX PML to address performance + regression. +- Fixed MPI_Init_thread(3) statement about C++ binding and update + references about MPI_THREAD_MULTIPLE. Thanks to Andreas Lösel for + bringing the outdated docs to our attention. +- Added fence_nb to Flux PMIx support to address segmentation faults. +- Ensured progress of AIO requests in the POSIX FBTL component to + prevent exceeding maximum number of pending requests on MacOS. +- Used OPAL's mutli-thread support in the orted to leverage atomic + operations for object refcounting. +- Fixed segv when launching with static TCP ports. +- Fixed --debug-daemons mpirun CLI option. +- Fixed bug where mpirun did not honor --host in a managed job + allocation. +- Made a managed allocation filter a hostfile/hostlist. +- Fixed bug to marked a generalized request as pending once initiated. +- Fixed external PMIx v4.x check. +- Fixed OSHMEM build with `--enable-mem-debug`. +- Fixed a performance regression observed with older versions of GCC when + __ATOMIC_SEQ_CST is used. Thanks to @BiplabRaut for reporting the issue. +- Fixed buffer allocation bug in the binomial tree scatter algorithm when + non-contiguous datatypes are used. Thanks to @sadcat11 for reporting the issue. +- Fixed bugs related to the accumulate and atomics functionality in the + osc/rdma component. +- Fixed race condition in MPI group operations observed with + MPI_THREAD_MULTIPLE threading level. +- Fixed a deadlock in the TCP BTL's connection matching logic. +- Fixed pml/ob1 compilation error when CUDA support is enabled. +- Fixed a build issue with Lustre caused by unnecessary header includes. +- Fixed a build issue with IMB LSF workload manager. +- Fixed linker error with UCX SPML. + +4.1.0 -- December, 2020 +----------------------- + +- collectives: Add HAN and ADAPT adaptive collectives components. + Both components are off by default and can be enabled by specifying + "mpirun --mca coll_adapt_priority 100 --mca coll_han_priority 100 ...". + We intend to enable both by default in Open MPI 5.0. +- OMPIO is now the default for MPI-IO on all filesystems, including + Lustre (prior to this, ROMIO was the default for Lustre). Many + thanks to Mark Dixon for identifying MPI I/O issues and providing + access to Lustre systems for testing. +- Updates for macOS Big Sur. Thanks to FX Coudert for reporting this + issue and pointing to a solution. +- Minor MPI one-sided RDMA performance improvements. +- Fix hcoll MPI_SCATTERV with MPI_IN_PLACE. +- Add AVX support for MPI collectives. +- Updates to mpirun(1) about "slots" and PE=x values. +- Fix buffer allocation for large environment variables. Thanks to + @zrss for reporting the issue. +- Upgrade the embedded OpenPMIx to v3.2.2. +- Take more steps towards creating fully Reproducible builds (see + https://reproducible-builds.org/). Thanks Bernhard M. Wiedemann for + bringing this to our attention. +- Fix issue with extra-long values in MCA files. Thanks to GitHub + user @zrss for bringing the issue to our attention. +- UCX: Fix zero-sized datatype transfers. +- Fix --cpu-list for non-uniform modes. +- Fix issue in PMIx callback caused by missing memory barrier on Arm platforms. +- OFI MTL: Various bug fixes. +- Fixed issue where MPI_TYPE_CREATE_RESIZED would create a datatype + with unexpected extent on oddly-aligned datatypes. +- collectives: Adjust default tuning thresholds for many collective + algorithms +- runtime: fix situation where rank-by argument does not work +- Portals4: Clean up error handling corner cases +- runtime: Remove --enable-install-libpmix option, which has not + worked since it was added +- opal: Disable memory patcher component on MacOS +- UCX: Allow UCX 1.8 to be used with the btl uct +- UCX: Replace usage of the deprecated NB API of UCX with NBX +- OMPIO: Add support for the IME file system +- OFI/libfabric: Added support for multiple NICs +- OFI/libfabric: Added support for Scalable Endpoints +- OFI/libfabric: Added btl for one-sided support +- OFI/libfabric: Multiple small bugfixes +- libnbc: Adding numerous performance-improving algorithms + +4.0.6 -- March, 2021 +----------------------- +- Update embedded PMIx to 3.2.2. This update addresses several + MPI_COMM_SPAWN problems. +- Fix a problem when using Flux PMI and UCX. Thanks to Sami Ilvonen + for reporting and supplying a fix. +- Fix a problem with MPIR breakpoint being compiled out using PGI + compilers. Thanks to @louisespellacy-arm for reporting. +- Fix some ROMIO issues when using Lustre. Thanks to Mark Dixon for + reporting. +- Fix a problem using an external PMIx 4 to build Open MPI 4.0.x. +- Fix a compile problem when using the enable-timing configure option + and UCX. Thanks to Jan Bierbaum for reporting. +- Fix a symbol name collision when using the Cray compiler to build + Open SHMEM. Thanks to Pak Lui for reporting and fixing. +- Correct an issue encountered when building Open MPI under OSX Big Sur. + Thanks to FX Coudert for reporting. +- Various fixes to the OFI MTL. +- Fix an issue with allocation of sufficient memory for parsing long + environment variable values. Thanks to @zrss for reporting. +- Improve reproducibility of builds to assist Open MPI packages. + Thanks to Bernhard Wiedmann for bringing this to our attention. 4.0.5 -- August, 2020 --------------------- diff --git a/README.md b/README.md index 67f7c277ebd..01c26399159 100644 --- a/README.md +++ b/README.md @@ -94,8 +94,6 @@ The rest of this file contains: frameworks](#the-modular-component-architecture-mca) * [MPI layer frameworks](#mpi-layer-frameworks) * [OpenSHMEM component frameworks](#openshmem-component-frameworks) - * [Run-time environment - frameworks](#back-end-run-time-environment-rte-component-frameworks) * [Miscellaneous frameworks](#miscellaneous-frameworks) * [Other notes about frameworks](#framework-notes) * [How to get more help](#questions--problems) @@ -203,7 +201,7 @@ base as of this writing (April 2020): * On platforms other than x86-64, AArch64 (64-bit ARM), and PPC64, Open MPI requires a compiler that either supports C11 atomics or - the GCC `__atomic` atomics (e.g., GCC >= v4.7.2). + the GCC `__atomic` atomics (e.g., GCC >= v4.8.1). * 32-bit platforms are only supported with a recent compiler that supports C11 atomics. This includes gcc 4.9.x+ (gcc 6.x or newer @@ -352,6 +350,22 @@ base as of this writing (April 2020): version of the Intel 12.1 Linux compiler suite, the problem will go away. +* [Users have reported](https://github.com/open-mpi/ompi/issues/7615) + that the Intel Fortran compiler will fail to link Fortran-based MPI + applications on macOS with linker errors similar to this: + ``` + Undefined symbols for architecture x86_64: + "_ompi_buffer_detach_f08", referenced from: + import-atom in libmpi_usempif08.dylib + ld: symbol(s) not found for architecture x86_64 + ``` + It appears that setting the environment variable + `lt_cx_ld_force_load=no` before invoking Open MPI's `configure` + script works around the issue. For example: + ``` + shell$ lt_cv_ld_force_load=no ./configure ... + ``` + * The Portland Group compilers prior to version 7.0 require the `-Msignextend` compiler flag to extend the sign bit when converting from a shorter to longer integer. This is is different than other @@ -490,7 +504,10 @@ base as of this writing (April 2020): ``` shell$ ompi_info | grep -i fort ``` - +* As of October 2021, the NVIDIA 'nvfortran' compiler version 21.1 + fails to link `libmpi_usempif08.la` (see + https://github.com/open-mpi/ompi/issues/8919). One can work-around + this issue by explicitely setting `FCFLAGS=-fPIC` during configure. ### General Run-Time Support Notes @@ -1229,9 +1246,6 @@ Additionally, if a search directory is specified in the form so that executables such as `mpicc` and `mpirun` can be found without needing to type long path names. -* `--enable-orte-static-ports`: - Enable ORTE static ports for TCP OOB (default: enabled). - * `--with-alps`: Force the building of for the Cray Alps run-time environment. If Alps support cannot be found, configure will abort. @@ -2040,16 +2054,17 @@ Open MPI: * `bml`: BTL management layer * `coll`: MPI collective algorithms -* `fbtl`: file byte transfer layer: abstraction for individual - read: collective read and write operations for MPI I/O -* `fs`: file system functions for MPI I/O +* `fbtl`: file byte transfer layer: abstraction for individual blocking and non-blocking read and write operations +* `fcoll`: Collective read and write operations for MPI I/O. +* `fs`: File system functions for MPI I/O. +* `hook`: Make calls at various points of MPI process life-cycle. * `io`: MPI I/O * `mtl`: Matching transport layer, used for MPI point-to-point messages on some types of networks * `op`: Back end computations for intrinsic MPI_Op operators * `osc`: MPI one-sided communications * `pml`: MPI point-to-point management layer -* `rte`: Run-time environment operations +* `part`: MPI Partitioned communication. * `sharedfp`: shared file pointer operations for MPI I/O * `topo`: MPI topology routines * `vprotocol`: Protocols for the "v" PML @@ -2064,33 +2079,12 @@ Open MPI: point-to-point operations * `sshmem`: OpenSHMEM shared memory backing facility -### Back-end run-time environment (RTE) component frameworks: - -* `dfs`: Distributed file system -* `errmgr`: RTE error manager -* `ess`: RTE environment-specific services -* `filem`: Remote file management -* `grpcomm`: RTE group communications -* `iof`: I/O forwarding -* `notifier`: System-level notification support -* `odls`: OpenRTE daemon local launch subsystem -* `oob`: Out of band messaging -* `plm`: Process lifecycle management -* `ras`: Resource allocation system -* `rmaps`: Resource mapping system -* `rml`: RTE message layer -* `routed`: Routing table for the RML -* `rtc`: Run-time control framework -* `schizo`: OpenRTE personality framework -* `state`: RTE state machine - ### Miscellaneous frameworks: * `allocator`: Memory allocator * `backtrace`: Debugging call stack backtrace support * `btl`: Point-to-point Byte Transfer Layer * `dl`: Dynamic loading library interface -* `event`: Event library (libevent) versioning support * `hwloc`: Hardware locality (hwloc) versioning support * `if`: OS IP interface support * `installdirs`: Installation directory relocation services @@ -2100,12 +2094,17 @@ Open MPI: * `mpool`: Memory pooling * `patcher`: Symbol patcher hooks * `pmix`: Process management interface (exascale) -* `pstat`: Process status * `rcache`: Memory registration cache -* `sec`: Security framework +* `reachable`: Reachability matrix between endpoints of a given pair of hosts * `shmem`: Shared memory support (NOT related to OpenSHMEM) +* `smsc`: Shared Memory Single Copy +* `threads`: Thread management and support. * `timer`: High-resolution timers +### Back-end run-time environment (PRTE): + +See: https://github.com/openpmix/prrte + ### Framework notes Each framework typically has one or more components that are used at diff --git a/autogen.pl b/autogen.pl index 67361f0ec81..92329ff3def 100755 --- a/autogen.pl +++ b/autogen.pl @@ -5,7 +5,7 @@ # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2013-2020 Intel, Inc. All rights reserved. -# Copyright (c) 2015-2020 Research Organization for Information Science +# Copyright (c) 2015-2021 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2015-2021 IBM Corporation. All rights reserved. # Copyright (c) 2020 Amazon.com, Inc. or its affiliates. @@ -77,7 +77,7 @@ # version of packages we ship as tarballs my $libevent_version="2.1.12-stable"; -my $hwloc_version="2.4.0"; +my $hwloc_version="2.7.0"; # One-time setup my $username; @@ -134,6 +134,18 @@ sub debug_dump { ############################################################################## +sub list_contains { + my $searched_string = shift; + foreach my $str (@_) { + if ($searched_string eq $str) { + return 1; + } + } + return 0; +} + +############################################################################## + sub read_config_params { my ($filename, $dir_prefix) = @_; @@ -348,10 +360,18 @@ sub mca_process_framework { verbose "--- Found $pname / $framework / $d component\n"; # Skip if specifically excluded - if (exists($exclude_list->{$framework}) && - $exclude_list->{$framework}[0] eq $d) { - verbose " => Excluded\n"; - next; + if (exists($exclude_list->{$framework})) { + my $tst = 0; + foreach my $ck (@{$exclude_list->{$framework}}) { + if ($ck eq $d) { + verbose " => Excluded\n"; + $tst = 1; + last; + } + } + if ($tst) { + next; + } } # Skip if the framework is on the include list, but @@ -1045,10 +1065,28 @@ sub patch_autotools_output { whole_archive_flag_spec${tag}= tmp_sharedflag='--shared' ;; nagfor*) # NAGFOR 5.3 - tmp_sharedflag='-Wl,-shared';; + tmp_sharedflag='-Wl,-shared' ;; + xl"; + + push(@verbose_out, $indent_str . "Patching configure for NAG compiler #1 ($tag)\n"); + $c =~ s/$search_string/$replace_string/; + + # Newer versions of Libtool have the previous patch already. Therefore, + # we add the support for convenience libraries separetly + my $search_string = "whole_archive_flag_spec${tag}=" . '\n\s+' . + "tmp_sharedflag='--shared' ;;" . '\n\s+' . + 'nagfor\052.*# NAGFOR 5.3\n\s+' . + "tmp_sharedflag='-Wl,-shared' ;;" . '\n\s+' . + 'xl'; + my $replace_string = "whole_archive_flag_spec${tag}= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + whole_archive_flag_spec${tag}='\$wl--whole-archive`for conv in \$convenience\\\"\\\"; do test -n \\\"\$conv\\\" && new_convenience=\\\"\$new_convenience,\$conv\\\"; done; func_echo_all \\\"\$new_convenience\\\"` \$wl--no-whole-archive' + compiler_needs_object=yes + tmp_sharedflag='-Wl,-shared' ;; xl"; - push(@verbose_out, $indent_str . "Patching configure for NAG compiler ($tag)\n"); + push(@verbose_out, $indent_str . "Patching configure for NAG compiler #2 ($tag)\n"); $c =~ s/$search_string/$replace_string/; } @@ -1563,7 +1601,8 @@ sub replace_config_sub_guess { dnl 3rd-party package information\n"; # Extract the OMPI options to exclude them when processing PMIx and PRRTE -if ( ! ("pmix" ~~ @disabled_3rdparty_packages && "prrte" ~~ @disabled_3rdparty_packages) ) { +if ( ! (list_contains("pmix", @disabled_3rdparty_packages) && + list_contains("prrte", @disabled_3rdparty_packages))) { safe_system("./config/extract-3rd-party-configure.pl -p . -n \"OMPI\" -l > config/auto-generated-ompi-exclude.ini"); } @@ -1571,7 +1610,7 @@ sub replace_config_sub_guess { # generic. Sorry :). verbose "=== Libevent\n"; -if ("libevent" ~~ @disabled_3rdparty_packages) { +if (list_contains("libevent", @disabled_3rdparty_packages)) { verbose "--- Libevent disabled\n"; } else { my $libevent_directory = "libevent-" . $libevent_version; @@ -1586,7 +1625,7 @@ sub replace_config_sub_guess { } verbose "=== hwloc\n"; -if ("hwloc" ~~ @disabled_3rdparty_packages) { +if (list_contains("hwloc", @disabled_3rdparty_packages)) { verbose "--- hwloc disabled\n"; } else { my $hwloc_directory = "hwloc-" . $hwloc_version; @@ -1597,11 +1636,11 @@ sub replace_config_sub_guess { $m4 .= "m4_define([package_hwloc], [1])\n"; $m4 .= "m4_define([hwloc_tarball], [" . $hwloc_tarball . "])\n"; $m4 .= "m4_define([hwloc_directory], [" . $hwloc_directory . "])\n"; - verbose "--- hwloc enabled\n"; + verbose "--- hwloc enabled (" . $hwloc_version . ")\n"; } verbose "=== PMIx\n"; -if ("pmix" ~~ @disabled_3rdparty_packages) { +if (list_contains("pmix", @disabled_3rdparty_packages)) { verbose "--- PMIx disabled\n"; } else { # sanity check pmix files exist @@ -1620,7 +1659,7 @@ sub replace_config_sub_guess { } verbose "=== PRRTE\n"; -if ("prrte" ~~ @disabled_3rdparty_packages) { +if (list_contains("prrte", @disabled_3rdparty_packages)) { verbose "--- PRRTE disabled\n"; } else { # sanity check prrte files exist diff --git a/config/c_get_alignment.m4 b/config/c_get_alignment.m4 index a300cc3d1a3..339018712d1 100644 --- a/config/c_get_alignment.m4 +++ b/config/c_get_alignment.m4 @@ -14,6 +14,7 @@ dnl Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2021 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -21,10 +22,10 @@ dnl dnl $HEADER$ dnl -# OPAL_C_GET_ALIGN(type, config_var) +# OPAL_C_GET_ALIGNMENT(c_type, c_macro_name) # ---------------------------------- # Determine datatype alignment. -# First arg is type, 2nd arg is config var to define. +# First arg is type, 2nd arg is macro name to define. # Now that we require C99 compilers, we include stdbool.h # in the alignment test so that we can find the definition # of "bool" when we test for its alignment. We might be able @@ -33,41 +34,26 @@ dnl # what we use. Yes, they should be the same - but "should" and # "are" frequently differ AC_DEFUN([OPAL_C_GET_ALIGNMENT],[ - AC_CACHE_CHECK([alignment of $1], - [AS_TR_SH([opal_cv_c_align_$1])], - [AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT - #include ], -[[ - struct foo { char c; $1 x; }; - struct foo *p = (struct foo *) malloc(sizeof(struct foo)); - int diff; - FILE *f=fopen("conftestval", "w"); - if (!f) exit(1); - diff = ((char *)&p->x) - ((char *)&p->c); - free(p); - fprintf(f, "%d\n", (diff >= 0) ? diff : -diff); -]])], [AS_TR_SH([opal_cv_c_align_$1])=`cat conftestval`], - [AC_MSG_WARN([*** Problem running configure test!]) - AC_MSG_WARN([*** See config.log for details.]) - AC_MSG_ERROR([*** Cannot continue.])], - [ # cross compile - do a non-executable test. Trick - # taken from the Autoconf 2.59c. Switch to using - # AC_CHECK_ALIGNOF when we can require Autoconf 2.60. - _AC_COMPUTE_INT([(long int) offsetof (opal__type_alignof_, y)], - [AS_TR_SH([opal_cv_c_align_$1])], - [AC_INCLUDES_DEFAULT -#include + OPAL_VAR_SCOPE_PUSH([opal_align_value]) + AC_LANG_PUSH([C]) -#ifndef offsetof -# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0) -#endif -typedef struct { char x; $1 y; } opal__type_alignof_; -], - [AC_MSG_WARN([*** Problem running configure test!]) - AC_MSG_WARN([*** See config.log for details.]) - AC_MSG_ERROR([*** Cannot continue.])])])]) + AC_CHECK_ALIGNOF([$1], [AC_INCLUDES_DEFAULT + #include + ]) -AC_DEFINE_UNQUOTED([$2], [$AS_TR_SH([opal_cv_c_align_$1])], [Alignment of type $1]) -eval "$2=$AS_TR_SH([opal_cv_c_align_$1])" + # Put the value determined from AC CHECK_ALIGNOF into an + # easy-to-access shell variable. + AS_VAR_COPY([opal_align_value], + [ac_cv_alignof_]AS_TR_SH([$1])) -rm -rf conftest* ]) dnl + # This $opal_cv_c_align_* shell variable is used elsewhere in + # configure.ac + AS_VAR_COPY([opal_cv_c_align_]AS_TR_SH([$1]), + [opal_align_value]) + + # This #define is used in C code. + AC_DEFINE_UNQUOTED([$2], [$opal_align_value], [Alignment of $1]) + + AC_LANG_POP([C]) + OPAL_VAR_SCOPE_POP +]) diff --git a/config/extract-3rd-party-configure.pl b/config/extract-3rd-party-configure.pl index 15ca89323d0..1c8be0e4b58 100755 --- a/config/extract-3rd-party-configure.pl +++ b/config/extract-3rd-party-configure.pl @@ -1,6 +1,7 @@ #!/usr/bin/env perl # # Copyright (c) 2021 IBM Corporation. All rights reserved. +# Copyright (c) 2021 Cisco Systems. All rights reserved. # # $COPYRIGHT$ # @@ -216,7 +217,7 @@ ($) # Argument separator # We only care about counting arguments for the outermost # function (e.g., AC_ARG_ENABLE), not the innermost - # function (e.g., AC_HELP_STRING) - which we take all of. + # function (e.g., AS_HELP_STRING) - which we take all of. # We know we are in the outermost because the '(' will be # only thing on the stack. elsif( scalar(@the_stack) == 1 && $char eq "," ) { diff --git a/config/ltmain_nag_pthread.diff b/config/ltmain_nag_pthread.diff index 927b671f9ae..c1fbd0582df 100644 --- a/config/ltmain_nag_pthread.diff +++ b/config/ltmain_nag_pthread.diff @@ -1,18 +1,40 @@ --- config/ltmain.sh +++ config/ltmain.sh -@@ -6417,8 +6417,14 @@ - func_source "$lib" - +@@ -7862,6 +7862,13 @@ func_mode_link () # Convert "-framework foo" to "foo.ltframework" -+ # and "-pthread" to "-Wl,-pthread" if NAG compiler if test -n "$inherited_linker_flags"; then -- tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` -+ case "$CC" in -+ *nagfor*) -+ tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g' | $SED 's/-pthread/-Wl,-pthread/g'`;; -+ *) -+ tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'`;; -+ esac + tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` ++ ++ # Additionally convert " -pthread" to " -Wl,-pthread" for nagfor ++ func_cc_basename $CC ++ case $func_cc_basename_result in ++ nagfor*) tmp_inherited_linker_flags=`$ECHO "$tmp_inherited_linker_flags" | $SED 's/ -pthread/ -Wl,-pthread/g'` ;; ++ esac ++ for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do case " $new_inherited_linker_flags " in *" $tmp_inherited_linker_flag "*) ;; +@@ -8881,7 +8888,8 @@ func_mode_link () + xlcverstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + # On Darwin other compilers +- case $CC in ++ func_cc_basename $CC ++ case $func_cc_basename_result in + nagfor*) + verstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + ;; +@@ -9493,6 +9501,13 @@ EOF + ;; + esac + ++ # Time to revert the changes made for nagfor. ++ func_cc_basename $CC ++ case $func_cc_basename_result in ++ nagfor*) ++ new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% -Wl,-pthread% -pthread%g'` ;; ++ esac ++ + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= diff --git a/config/ompi_check_ime.m4 b/config/ompi_check_ime.m4 index 870bba8a143..adbfc42663b 100644 --- a/config/ompi_check_ime.m4 +++ b/config/ompi_check_ime.m4 @@ -1,6 +1,8 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2018 DataDirect Networks. All rights reserved. +dnl Copyright (c) 2021 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -15,13 +17,7 @@ dnl # support, otherwise executes action-if-not-found AC_DEFUN([OMPI_CHECK_IME],[ - check_ime_CPPFLAGS= - check_ime_LDFLAGS= - check_ime_LIBS= - - check_ime_configuration="none" - ompi_check_ime_happy="yes" - + OPAL_VAR_SCOPE_PUSH([ompi_check_ime_happy ompi_check_ime_dir]) # Get some configuration information AC_ARG_WITH([ime], @@ -30,33 +26,21 @@ AC_DEFUN([OMPI_CHECK_IME],[ OPAL_CHECK_WITHDIR([ime], [$with_ime], [include/ime_native.h]) AS_IF([test "$with_ime" = "no"], - [ompi_check_ime_happy="no"], - [AS_IF([test -z "$with_ime"], - [ompi_check_ime_dir="/usr/local"], - [ompi_check_ime_dir=$with_ime]) - - if test -e "$ompi_check_ime_dir/lib64" ; then - ompi_check_ime_libdir="$ompi_check_ime_dir/lib64" - else - ompi_check_ime_libdir="$ompi_check_ime_dir/lib" - fi - - # Add correct -I and -L flags - OPAL_CHECK_PACKAGE([$1], [ime_native.h], [im_client], [ime_client_native2_init], [], - [$ompi_check_ime_dir], [$ompi_check_ime_libdir], - [ompi_check_ime_happy="yes"], - [OPAL_CHECK_PACKAGE([$1], [ime_native.h], [im_client], [ime_native_init], [], - [$ompi_check_ime_dir], [$ompi_check_ime_libdir], - [ompi_check_ime_happy="yes"], - [ompi_check_ime_happy="no"]) - ]) - ]) + [ompi_check_ime_happy="no"], + [AS_IF([test -n "$with_ime" && test "$with_ime" != "yes"], + [ompi_check_ime_dir=$with_ime]) + + OPAL_CHECK_PACKAGE([$1], [ime_native.h], [im_client], [ime_client_native2_init], + [], [$ompi_check_ime_dir], [], + [ompi_check_ime_happy="yes"], + [ompi_check_ime_happy="no"])]) AS_IF([test "$ompi_check_ime_happy" = "yes"], - [$2], - [AS_IF([test ! -z "$with_ime" && test "$with_ime" != "no"], - [echo IME support not found]) - $3]) - - ]) + [$2], + [AS_IF([test ! -z "$with_ime" && test "$with_ime" != "no"], + [AC_MSG_ERROR([IME support requested but not found. Aborting])]) + $3]) + + OPAL_VAR_SCOPE_POP +]) diff --git a/config/ompi_check_lustre.m4 b/config/ompi_check_lustre.m4 index 7421ba01c67..b6e34400250 100644 --- a/config/ompi_check_lustre.m4 +++ b/config/ompi_check_lustre.m4 @@ -12,7 +12,7 @@ dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved dnl Copyright (c) 2008-2018 University of Houston. All rights reserved. -dnl Copyright (c) 2015-2018 Research Organization for Information Science +dnl Copyright (c) 2015-2021 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2020 Triad National Security, LLC. All rights dnl reserved. @@ -30,15 +30,7 @@ dnl # support, otherwise executes action-if-not-found AC_DEFUN([OMPI_CHECK_LUSTRE],[ - check_lustre_CPPFLAGS= - check_lustre_LDFLAGS= - check_lustre_LIBS= - - check_lustre_save_LIBS="$LIBS" - check_lustre_save_LDFLAGS="$LDFLAGS" - check_lustre_save_CPPFLAGS="$CPPFLAGS" - - ompi_check_lustre_happy="yes" + OPAL_VAR_SCOPE_PUSH([ompi_check_lustre_happy ompi_check_lustre_dir]) # Get some configuration information AC_ARG_WITH([lustre], @@ -47,25 +39,15 @@ AC_DEFUN([OMPI_CHECK_LUSTRE],[ OPAL_CHECK_WITHDIR([lustre], [$with_lustre], [include/lustre/lustreapi.h]) AS_IF([test "$with_lustre" = "no"], - [ompi_check_lustre_happy=no]) + [ompi_check_lustre_happy="no"], + [AS_IF([test -n "$with_lustre" && test "$with_lustre" != "yes"], + [ompi_check_lustre_dir=$with_lustre]) - AS_IF([test "$ompi_check_lustre_happy" != "no" ], - [AC_MSG_CHECKING([looking for lustre libraries and header files in]) - AS_IF([test "$with_lustre" != "yes"], - [ompi_check_lustre_dir=$with_lustre - AC_MSG_RESULT([($ompi_check_lustre_dir)])], - [AC_MSG_RESULT([(default search paths)])]) - AS_IF([test -n "$with_lustre_libdir" && \ - test "$with_lustre_libdir" != "yes"], - [ompi_check_lustre_libdir=$with_lustre_libdir]) - ]) + OPAL_CHECK_PACKAGE([$1], [lustre/lustreapi.h], [lustreapi], [llapi_file_create], + [], [$ompi_check_lustre_dir], [], + [ompi_check_lustre_happy="yes"], + [ompi_check_lustre_happy="no"])]) - AS_IF([test "$ompi_check_lustre_happy" != "no" ], - [OPAL_CHECK_PACKAGE([$1], [lustre/lustreapi.h], [lustreapi], [llapi_file_create], - [], [$ompi_check_lustre_dir], [$ompi_check_lustre_libdir], - [ompi_check_lustre_happy="yes"], - [ompi_check_lustre_happy="no"])]) - AS_IF([test "$ompi_check_lustre_happy" = "yes"], [AC_MSG_CHECKING([for required lustre data structures]) cat > conftest.c <]) AC_CHECK_DECLS([ucp_ep_flush_nb, ucp_worker_flush_nb, ucp_request_check_status, ucp_put_nb, ucp_get_nb, - ucp_put_nbx, ucp_get_nbx, ucp_atomic_op_nbx], + ucp_put_nbx, ucp_get_nbx, ucp_atomic_op_nbx, + ucp_ep_flush_nbx], [], [], [#include ]) AC_CHECK_DECLS([ucm_test_events, @@ -137,7 +138,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[ UCP_ATOMIC_FETCH_OP_FOR, UCP_ATOMIC_FETCH_OP_FXOR, UCP_PARAM_FIELD_ESTIMATED_NUM_PPN, - UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK], + UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK, + UCP_OP_ATTR_FLAG_MULTI_SEND], [], [], [#include ]) AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS], diff --git a/config/ompi_config_files.m4 b/config/ompi_config_files.m4 index 14e3077b03d..18f0e7c184c 100644 --- a/config/ompi_config_files.m4 +++ b/config/ompi_config_files.m4 @@ -6,6 +6,8 @@ # Copyright (c) 2018 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. +# Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -25,7 +27,6 @@ AC_DEFUN([OMPI_CONFIG_FILES],[ ompi/debuggers/Makefile ompi/mpi/c/Makefile - ompi/mpi/c/profile/Makefile ompi/mpi/fortran/base/Makefile ompi/mpi/fortran/mpif-h/Makefile ompi/mpi/fortran/mpif-h/profile/Makefile @@ -47,7 +48,6 @@ AC_DEFUN([OMPI_CONFIG_FILES],[ ompi/mpi/fortran/mpiext-use-mpi/Makefile ompi/mpi/fortran/mpiext-use-mpi-f08/Makefile ompi/mpi/tool/Makefile - ompi/mpi/tool/profile/Makefile ompi/mpi/man/man3/Makefile ompi/mpi/man/man5/Makefile diff --git a/config/ompi_configure_options.m4 b/config/ompi_configure_options.m4 index c7780baf020..311301d3fec 100644 --- a/config/ompi_configure_options.m4 +++ b/config/ompi_configure_options.m4 @@ -231,9 +231,18 @@ AC_DEFINE_UNQUOTED(MPI_PARAM_CHECK, $mpi_param_check, AC_DEFINE_UNQUOTED(OMPI_PARAM_CHECK, $ompi_param_check, [Whether we want to check MPI parameters never or possible (an integer constant)]) +AC_MSG_CHECKING([if want ompio support]) AC_ARG_ENABLE([io-ompio], [AS_HELP_STRING([--disable-io-ompio], [Disable the ompio MPI-IO component])]) +if test "$enable_io_ompio" = "no" ; then + AC_MSG_RESULT([no]) + ompi_want_ompio=0 +else + AC_MSG_RESULT([yes]) + ompi_want_ompio=1 +fi +AM_CONDITIONAL(OMPI_OMPIO_SUPPORT, test "$ompi_want_ompio" = "1") ])dnl diff --git a/config/ompi_deleted_options.m4 b/config/ompi_deleted_options.m4 index d068c7c0e57..b3e53bb42d6 100644 --- a/config/ompi_deleted_options.m4 +++ b/config/ompi_deleted_options.m4 @@ -1,7 +1,7 @@ # -*- shell-script -*- # # Copyright (c) 2020 Intel, Inc. All rights reserved. -# Copyright (c) 2020 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2020-2021 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -10,7 +10,7 @@ # AC_DEFUN([OMPI_CHECK_DELETED_OPTIONS],[ - OPAL_VAR_SCOPE_PUSH([with_pmi_given with_pmi_libdir_given cxx]) + OPAL_VAR_SCOPE_PUSH([with_pmi_given with_pmi_libdir_given ompi_cxx_warn ompi_cxx_error]) # --with-pmi options were removed in v5.0 AC_ARG_WITH([pmi], @@ -41,38 +41,55 @@ AC_DEFUN([OMPI_CHECK_DELETED_OPTIONS],[ AC_MSG_ERROR([Build cannot continue.]) fi - # Open MPI C++ bindings were removed in v5.0 - cxx=0 - cxxseek=0 - cxxex=0 + # Note that we always *warn* if someone used a CLI option for a + # feature that has been deleted. If, however, they are disabling + # the deleted feature (e.g., --disable-mpi-cxx), then emitting a + # warning is good enough -- allow configure to continue. If, + # however, the user asked to enable a deleted feature, then + # configure needs to error out. + ompi_cxx_warn=0 + ompi_cxx_error=0 AC_ARG_ENABLE([mpi-cxx], [AS_HELP_STRING([--enable-mpi-cxx], [*DELETED* Build the MPI C++ bindings])], - [cxx=1]) + [ompi_cxx_warn=1 + AS_IF([test "$enable_mpi_cxx" != "no"], + [ompi_cxx_error=1]) + ]) AC_ARG_ENABLE([mpi-cxx-seek], [AS_HELP_STRING([--enable-mpi-cxx-seek], [*DELETED* Build support for MPI::SEEK])], - [cxxseek=1]) + [ompi_cxx_warn=1 + AS_IF([test "$enable_mpi_cxx_seek" != "no"], + [ompi_cxx_error=1]) + ]) AC_ARG_ENABLE([cxx-exceptions], [AS_HELP_STRING([--enable-cxx-exceptions], [*DELETED* Build support for C++ exceptions in the MPI C++ bindings])], - [cxxex=1]) + [ompi_cxx_warn=1 + AS_IF([test "$enable_cxx_exceptions" != "no"], + [ompi_cxx_error=1]) + ]) - AS_IF([test "$enable_mpi_cxx" = "no" ], - [cxx=0]) - - AS_IF([test "$enable_mpi_cxx_seek" = "no" ], - [cxxseek=0]) - - AS_IF([test "$enable_cxx_exceptions" = "no" ], - [cxxex=0]) - - AS_IF([test $cxx -eq 1 || test $cxxseek -eq 1 || test $cxxex -eq 1], - [AC_MSG_WARN([The MPI C++ bindings have been removed from Open MPI.]) + AS_IF([test $ompi_cxx_warn -eq 1], + [AC_MSG_WARN([An MPI C++ bindings-related command line option]) + AC_MSG_WARN([was given to "configure".]) + AC_MSG_WARN([ ]) + AC_MSG_WARN([This command line option will be removed in a future]) + AC_MSG_WARN([version of Open MPI; you should discontinue using it.]) + AC_MSG_WARN([You have been warned!]) + AC_MSG_WARN([ ]) + AC_MSG_WARN([The MPI C++ bindings were deprecated in the MPI-2.2]) + AC_MSG_WARN([standard in 2009, and removed from the MPI-3.0]) + AC_MSG_WARN([standard in 2012. The MPI C++ bindings were then]) + AC_MSG_WARN([removed from Open MPI v5.0.0 in 2022.]) + AC_MSG_WARN([ ]) AC_MSG_WARN([If you need support for the MPI C++ bindings, you]) AC_MSG_WARN([will need to use an older version of Open MPI.]) - AC_MSG_ERROR([Build cannot continue.]) ]) + AS_IF([test $ompi_cxx_error -eq 1], + [AC_MSG_ERROR([Build cannot continue.])]) + OPAL_VAR_SCOPE_POP ]) diff --git a/config/ompi_ext.m4 b/config/ompi_ext.m4 index 6f96ef2fdbe..5120b3603e7 100644 --- a/config/ompi_ext.m4 +++ b/config/ompi_ext.m4 @@ -34,6 +34,9 @@ AC_DEFUN([OMPI_EXT],[ dnl for OPAL_CONFIGURE_USER env variable AC_REQUIRE([OPAL_CONFIGURE_SETUP]) + m4_ifdef([ompi_mpiext_list], [], + [m4_fatal([Could not find MPI Extensions list. Aborting.])]) + # Note that we do not build DSO's here -- we *only* build convenience # libraries that get slurped into higher-level libraries # @@ -42,7 +45,7 @@ AC_DEFUN([OMPI_EXT],[ # AC_ARG_ENABLE([mpi-ext], [AS_HELP_STRING([--enable-mpi-ext[=LIST]], - [Comma-separated list of extensions that should be built. Possible values: ompi_mpiext_list. Example: "--enable-mpi-ext=foo,bar" will enable building the MPI extensions "foo" and "bar". If LIST is empty or the special value "all", then all available MPI extensions will be built (default: all).])]) + [Comma-separated list of extensions that should be built. Possible values: ]m4_quote(ompi_mpiext_list)[. Example: "--enable-mpi-ext=foo,bar" will enable building the MPI extensions "foo" and "bar". If LIST is empty or the special value "all", then all available MPI extensions will be built (default: all).])]) # print some nice messages about what we're about to do... AC_MSG_CHECKING([for available MPI Extensions]) @@ -74,9 +77,6 @@ AC_DEFUN([OMPI_EXT],[ AC_MSG_RESULT([$msg]) unset msg - m4_ifdef([ompi_mpiext_list], [], - [m4_fatal([Could not find MPI Extensions list. Aborting.])]) - EXT_CONFIGURE ]) @@ -390,6 +390,9 @@ AC_DEFUN([EXT_CONFIGURE_M4_CONFIG_COMPONENT],[ AS_IF([test $should_build -eq 1], [EXT_PROCESS_COMPONENT([$1], [$2], [$3], [$4], [$5], [$6], [$7])], [EXT_PROCESS_DEAD_COMPONENT([$1], [$2])]) + + m4_ifdef([OMPI_MPIEXT_$1_POST_CONFIG], + [OMPI_MPIEXT_$1_POST_CONFIG($should_build)]) ]) ###################################################################### diff --git a/config/ompi_interix.m4 b/config/ompi_interix.m4 index 7e4339a497b..2348128e769 100644 --- a/config/ompi_interix.m4 +++ b/config/ompi_interix.m4 @@ -3,6 +3,7 @@ dnl dnl Copyright (c) 2008 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. +dnl Copyright (c) 2021 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -24,10 +25,10 @@ dnl AC_DEFUN([OMPI_INTERIX],[ AC_MSG_CHECKING(for Interix environment) - AC_TRY_COMPILE([], + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [#if !defined(__INTERIX) #error Normal Unix environment - #endif], + #endif])], is_interix=yes, is_interix=no) AC_MSG_RESULT([$is_interix]) diff --git a/config/ompi_microsoft.m4 b/config/ompi_microsoft.m4 index b50db392c89..ea187d86a78 100644 --- a/config/ompi_microsoft.m4 +++ b/config/ompi_microsoft.m4 @@ -3,6 +3,7 @@ dnl dnl Copyright (c) 2004-2007 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. +dnl Copyright (c) 2021 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -39,17 +40,17 @@ AC_DEFUN([OMPI_MICROSOFT_COMPILER],[ # The atomic functions are defined in a very unuasual manner. # Some of them are intrinsic defined in windows.h others are - # exported by kernel32.dll. If we force the usage of AC_TRY_RUN + # exported by kernel32.dll. If we force the usage of AC RUN_IFELSE # here we will check for both in same time: compilation and run. AC_MSG_CHECKING(for working InterlockedCompareExchange) - AC_TRY_RUN( [#include + AC_RUN_IFELSE([AC_LANG_PROGRAM([#include int main() { LONG dest = 1, exchange = 0, comperand = 1; SetErrorMode(SEM_FAILCRITICALERRORS); InterlockedCompareExchange( &dest, exchange, comperand ); return (int)dest; - }], + }])], [AC_MSG_RESULT(yes) ompi_windows_have_support_for_32_bits_atomic=1], [AC_MSG_RESULT(no) @@ -59,13 +60,13 @@ AC_DEFUN([OMPI_MICROSOFT_COMPILER],[ [Whether we support 32 bits atomic operations on Windows]) AC_MSG_CHECKING(for working InterlockedCompareExchangeAcquire) - AC_TRY_RUN( [#include + AC_RUN_IFELSE([AC_LANG_PROGRAM([#include int main() { LONG dest = 1, exchange = 0, comperand = 1; SetErrorMode(SEM_FAILCRITICALERRORS); InterlockedCompareExchangeAcquire( &dest, exchange, comperand ); return (int)dest; - }], + }])], [AC_MSG_RESULT(yes) ompi_windows_have_support_for_32_bits_atomic=1], [AC_MSG_RESULT(no) @@ -75,13 +76,13 @@ AC_DEFUN([OMPI_MICROSOFT_COMPILER],[ [Whether we support 32 bits atomic operations on Windows]) AC_MSG_CHECKING(for working InterlockedCompareExchangeRelease) - AC_TRY_RUN( [#include + AC_RUN_IFELSE([AC_LANG_PROGRAM([#include int main() { LONG dest = 1, exchange = 0, comperand = 1; SetErrorMode(SEM_FAILCRITICALERRORS); InterlockedCompareExchangeRelease( &dest, exchange, comperand ); return (int)dest; - }], + }])], [AC_MSG_RESULT(yes) ompi_windows_have_support_for_32_bits_atomic=1], [AC_MSG_RESULT(no) @@ -91,13 +92,13 @@ AC_DEFUN([OMPI_MICROSOFT_COMPILER],[ [Whether we support 32 bits atomic operations on Windows]) AC_MSG_CHECKING(for working InterlockedCompareExchange64) - AC_TRY_RUN( [#include + AC_RUN_IFELSE([AC_LANG_PROGRAM([#include int main() { LONGLONG dest = 1, exchange = 0, comperand = 1; SetErrorMode(SEM_FAILCRITICALERRORS); InterlockedCompareExchange64( &dest, exchange, comperand ); return (int)dest; - }], + }])], [AC_MSG_RESULT(yes) ompi_windows_have_support_for_64_bits_atomic=1], [AC_MSG_RESULT(no) diff --git a/config/ompi_setup_fc.m4 b/config/ompi_setup_fc.m4 index 9fcf482d58c..d18d6bf3837 100644 --- a/config/ompi_setup_fc.m4 +++ b/config/ompi_setup_fc.m4 @@ -72,12 +72,6 @@ AC_DEFUN([OMPI_SETUP_FC],[ ompi_fc_happy=0], [ompi_fc_happy=1]) - AS_IF([test $ompi_fc_happy -eq 1 && test "$WANT_DEBUG" = "1" && test "$enable_debug_symbols" != "no"], - [FCFLAGS="$FCFLAGS -g" - OPAL_FLAGS_UNIQ(FCFLAGS) - AC_MSG_WARN([-g has been added to FCFLAGS (--enable-debug)]) - ]) - # Make sure the compiler actually works, if not cross-compiling. # Don't just use the AC macro so that we can have a pretty # message. diff --git a/config/ompi_setup_prrte.m4 b/config/ompi_setup_prrte.m4 index fe6eb17308a..271516298ce 100644 --- a/config/ompi_setup_prrte.m4 +++ b/config/ompi_setup_prrte.m4 @@ -16,7 +16,7 @@ dnl Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2019-2020 Intel, Inc. All rights reserved. -dnl Copyright (c) 2020 Amazon.com, Inc. or its affiliates. +dnl Copyright (c) 2020-2021 Amazon.com, Inc. or its affiliates. dnl All Rights reserved. dnl Copyright (c) 2021 Nanook Consulting. All rights reserved. dnl Copyright (c) 2021 IBM Corporation. All rights reserved. @@ -43,46 +43,28 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[ opal_show_subtitle "Configuring PRRTE" - # Don't use OPAL_3RDPARTY_WITH because it will not allow packages - # to be disabled - m4_ifdef([package_prrte], - [AC_ARG_WITH([prrte], - [AS_HELP_STRING([--with-prrte(=DIR)], - [Build PRTE support. DIR can take one of four values: "internal", "external", "no", or a valid directory name. "internal" forces Open MPI to use its internal copy of PRRTE. "external" forces Open MPI to use an external installation of PRRTE. Supplying a valid directory name also forces Open MPI to use an external installation of PRRTE, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. If no argument is specified, Open MPI will search default locations for PRRTE and fall back to an internal version if one is not found.])])], - [AC_ARG_WITH([prrte], - [AS_HELP_STRING([--with-prrte(=DIR)], - [Build PRRTE support. DIR can take one of three values: "external", "no", or a valid directory name. "external" forces Open MPI to use an external installation of PRRTE. Supplying a valid directory name also forces Open MPI to use an external installation of PRRTE, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. If no argument is specified, Open MPI will search default locations for PRRTE and disable creating mpirun symlinks if one is not found.])])]) - - m4_ifdef([package_prrte], - [OMPI_PRRTE_ADD_ARGS]) - - # clean up $with_prrte so that it contains only a path or empty - # string. To determine internal or external preferences, use - # $opal_prrte_mode. - AS_IF([test "$with_prrte" = "yes"], [with_prrte=]) - AS_CASE([$with_prrte], - ["internal"], [with_prrte="" - opal_prrte_mode="internal"], - ["external"], [with_prrte="" - opal_prrte_mode="external"], - [""], [opal_prrte_mode="unspecified"], - ["no"], [opal_prrte_mode="disabled"], - [opal_prrte_mode="external"]) - - echo "with_prrte: $with_prrte" - echo "opal_prrte_mode: $opal_prrte_mode" - - m4_ifdef([package_prrte], [], - [AS_IF([test "$opal_prrte_mode" = "internal"], - [AC_MSG_WARN([Invalid argument to --with-prrte: internal.]) - AC_MSG_ERROR([Cannot continue])])]) + OPAL_3RDPARTY_WITH([prrte], [prrte], [package_prrte], [1]) prrte_setup_internal_happy=0 - m4_ifdef([package_prrte], [ - # always configure the internal prrte, so that - # make dist always works. - AS_IF([test "$opal_prrte_mode" = "disabled"], [prrte_setup_success_var=0], [prrte_setup_success_var=1]) - _OMPI_SETUP_PRRTE_INTERNAL([prrte_setup_internal_happy=$prrte_setup_success_var])]) + m4_ifdef([package_prrte], + [OMPI_PRRTE_ADD_ARGS + AS_IF([test "$opal_prrte_mode" = "unspecified" -o "$opal_prrte_mode" = "internal"], + [# Run PRRTE's configure script unless the user + # explicitly asked us to use an external PMIX, so that + # "make dist" includes PRRTE in the dist tarball. This + # does mean that "make dist" will not work if Open MPI + # was configured to use an external PRRTE library, but + # we decided this was a reasonable tradeoff for not + # having to deal with PRRTE (or PMIx) potentially + # failing to configure in a situation where it isn't + # desired. + _OMPI_SETUP_PRRTE_INTERNAL([prrte_setup_internal_happy=1], + [prrte_setup_internal_happy=0])]) + + # if we have a pmix package and configure did not complete + # successfullly (or wasn't started), then disable make dist. + AS_IF([test $prrte_setup_internal_happy != 1], + [OPAL_MAKEDIST_DISABLE="$OPAL_MAKEDIST_DISABLE PRRTE"])]) # unless internal specifically requested by the user, try to find # an external that works. @@ -98,7 +80,9 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[ # so try the internal version. AS_IF([test "$prrte_setup_external_happy" = "0" -a "$prrte_setup_internal_happy" = "1"], [opal_prrte_mode="internal" - _OMPI_SETUP_PRRTE_INTERNAL_POST()]) + OMPI_USING_INTERNAL_PRRTE=1 + _OMPI_SETUP_PRRTE_INTERNAL_POST()], + [OMPI_USING_INTERNAL_PRRTE=0]) AS_IF([test "$opal_prrte_mode" != "disabled"], [AS_IF([test "$prrte_setup_external_happy" = "0" -a "$prrte_setup_internal_happy" = "0"], @@ -115,6 +99,10 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[ [$OMPI_HAVE_PRRTE], [Whether or not PRRTE is available]) + AC_DEFINE_UNQUOTED([OMPI_USING_INTERNAL_PRRTE], + [$OMPI_USING_INTERNAL_PRRTE], + [Whether or not we are using the internal PRRTE]) + OPAL_SUMMARY_ADD([[Miscellaneous]], [[prrte]], [prrte], [$opal_prrte_mode]) OPAL_VAR_SCOPE_POP @@ -159,7 +147,6 @@ AC_DEFUN([_OMPI_SETUP_PRRTE_INTERNAL], [ internal_prrte_CPPFLAGS= internal_prrte_args="--with-proxy-version-string=$OPAL_VERSION --with-proxy-package-name=\"Open MPI\" --with-proxy-bugreport=\"https://www.open-mpi.org/community/help/\"" - internal_prrte_libs= # Set --enable-prte-prefix-by-default to the deprecated options, # if they were specified. Otherwise, set it to enabled if the @@ -171,19 +158,19 @@ AC_DEFUN([_OMPI_SETUP_PRRTE_INTERNAL], [ [internal_prrte_args="$internal_prrte_args --enable-prte-prefix-by-default"]) AS_IF([test "$opal_libevent_mode" = "internal"], - [internal_prrte_args="$internal_prrte_args --with-libevent-header=$opal_libevent_header" - internal_prrte_CPPFLAGS="$internal_prrte_CPPFLAGS $opal_libevent_CPPFLAGS" - internal_prrte_libs="$internal_prrte_libs $opal_libevent_LIBS"]) + [internal_prrte_args="$internal_prrte_args --with-libevent --disable-libevent-lib-checks" + internal_prrte_args="$internal_prrte_args --with-libevent-extra-libs=\"$opal_libevent_LIBS\"" + internal_prrte_CPPFLAGS="$internal_prrte_CPPFLAGS $opal_libevent_CPPFLAGS"]) AS_IF([test "$opal_hwloc_mode" = "internal"], - [internal_prrte_args="$internal_prrte_args --with-hwloc-header=$opal_hwloc_header" - internal_prrte_CPPFLAGS="$internal_prrte_CPPFLAGS $opal_hwloc_CPPFLAGS" - internal_prrte_libs="$internal_prrte_libs $opal_hwloc_LIBS"]) + [internal_prrte_args="$internal_prrte_args --disable-hwloc-lib-checks" + internal_prrte_args="$internal_prrte_args --with-hwloc-extra-libs=\"$opal_hwloc_LIBS\"" + internal_prrte_CPPFLAGS="$internal_prrte_CPPFLAGS $opal_hwloc_CPPFLAGS"]) AS_IF([test "$opal_pmix_mode" = "internal"], - [internal_prrte_args="$internal_prrte_args --with-pmix-header=$opal_pmix_header" - internal_prrte_CPPFLAGS="$internal_prrte_CPPFLAGS $opal_pmix_CPPFLAGS" - internal_prrte_libs="$internal_prrte_libs $opal_pmix_LIBS"]) + [internal_prrte_args="$internal_prrte_args --disable-pmix-lib-checks" + internal_prrte_args="$internal_prrte_args --with-pmix-extra-libs=\"$opal_pmix_LIBS\"" + internal_prrte_CPPFLAGS="$internal_prrte_CPPFLAGS $opal_pmix_CPPFLAGS"]) AC_MSG_CHECKING([if PMIx version is 4.0.0 or greater]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], @@ -202,8 +189,6 @@ AC_DEFUN([_OMPI_SETUP_PRRTE_INTERNAL], [ AC_MSG_WARN([--without-prrte option.]) AC_MSG_ERROR([Cannot continue])]) - # add the extra libs - internal_prrte_args="$internal_prrte_args --with-prte-extra-lib=\"$internal_prrte_libs\" --with-prte-extra-ltlib=\"$internal_prrte_libs\"" AS_IF([test "$with_ft" != "no"], [internal_prrte_args="--enable-prte-ft $internal_prrte_args"], []) diff --git a/config/opal_check_attributes.m4 b/config/opal_check_attributes.m4 index b4da2ca54cd..9b2d5488f38 100644 --- a/config/opal_check_attributes.m4 +++ b/config/opal_check_attributes.m4 @@ -11,7 +11,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2010-2018 Cisco Systems, Inc. All rights reserved +dnl Copyright (c) 2010-2021 Cisco Systems, Inc. All rights reserved dnl Copyright (c) 2013 Mellanox Technologies, Inc. dnl All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science @@ -64,7 +64,7 @@ AC_DEFUN([_OPAL_CHECK_SPECIFIC_ATTRIBUTE], [ # # Try to compile using the C compiler, then C++ # - AC_TRY_COMPILE([$2],[], + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([$2],[])], [ # # In case we did succeed: Fine, but was this due to the @@ -82,10 +82,10 @@ AC_DEFUN([_OPAL_CHECK_SPECIFIC_ATTRIBUTE], [ m4_ifdef([project_ompi], [if test "$opal_cv___attribute__[$1]" = "1" ; then AC_LANG_PUSH(C++) - AC_TRY_COMPILE([ + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ extern "C" { $2 - }],[], + }],[])], [ opal_cv___attribute__[$1]=1 _OPAL_ATTRIBUTE_FAIL_SEARCH([$1]) @@ -103,11 +103,11 @@ AC_DEFUN([_OPAL_CHECK_SPECIFIC_ATTRIBUTE], [ CFLAGS_safe=$CFLAGS CFLAGS="$CFLAGS [$4]" - AC_TRY_COMPILE([$3], + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([$3], [ int i=4711; i=usage(&i); - ], + ])], [opal_cv___attribute__[$1]=0], [ # @@ -161,7 +161,7 @@ AC_DEFUN([OPAL_CHECK_ATTRIBUTES], [ AC_MSG_CHECKING(for __attribute__) AC_CACHE_VAL(opal_cv___attribute__, [ - AC_TRY_COMPILE( + AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [#include /* Check for the longest available __attribute__ (since gcc-2.3) */ struct foo { @@ -169,13 +169,13 @@ AC_DEFUN([OPAL_CHECK_ATTRIBUTES], [ int x[2] __attribute__ ((__packed__)); }; ], - [], + [])], [opal_cv___attribute__=1], [opal_cv___attribute__=0], ) if test "$opal_cv___attribute__" = "1" ; then - AC_TRY_COMPILE( + AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [#include /* Check for the longest available __attribute__ (since gcc-2.3) */ struct foo { @@ -183,7 +183,7 @@ AC_DEFUN([OPAL_CHECK_ATTRIBUTES], [ int x[2] __attribute__ ((__packed__)); }; ], - [], + [])], [opal_cv___attribute__=1], [opal_cv___attribute__=0], ) diff --git a/config/opal_check_cflags.m4 b/config/opal_check_cflags.m4 index 59fc0ce1ace..8bd27eea325 100644 --- a/config/opal_check_cflags.m4 +++ b/config/opal_check_cflags.m4 @@ -1,6 +1,7 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2021 IBM Corporation. All rights reserved. +dnl Copyright (c) 2021 Cisco Systems, Inc. All rights reserved. dnl dnl $COPYRIGHT$ dnl @@ -24,14 +25,15 @@ AC_MSG_CHECKING(if $CC supports ([$1])) CFLAGS_orig=$CFLAGS CFLAGS="$CFLAGS $1" AC_CACHE_VAL(opal_cv_cc_[$2], [ - AC_TRY_COMPILE([], [$3], + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [$3])], [ opal_cv_cc_[$2]=1 - _OPAL_CFLAGS_FAIL_SEARCH("ignored\|not recognized\|not supported\|not compatible\|unrecognized\|unknown", [$2]) + _OPAL_CFLAGS_FAIL_SEARCH(["ignored\|not recognized\|not supported\|not compatible\|unrecognized\|unknown"], [$2]) ], + [ opal_cv_cc_[$2]=1 - _OPAL_CFLAGS_FAIL_SEARCH("ignored\|not recognized\|not supported\|not compatible\|unrecognized\|unknown\|error", [$2]) - )]) + _OPAL_CFLAGS_FAIL_SEARCH(["ignored\|not recognized\|not supported\|not compatible\|unrecognized\|unknown\|error"], [$2]) + ])]) if test "$opal_cv_cc_[$2]" = "0" ; then CFLAGS="$CFLAGS_orig" AC_MSG_RESULT([no]) @@ -55,14 +57,15 @@ AC_MSG_CHECKING(if $CXX supports ([$1])) CXXFLAGS_orig=$CXXFLAGS CXXFLAGS="$CXXFLAGS $1" AC_CACHE_VAL(opal_cv_cxx_[$2], [ - AC_TRY_COMPILE([], [$3], + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [$3])], [ opal_cv_cxx_[$2]=1 - _OPAL_CXXFLAGS_FAIL_SEARCH("ignored\|not recognized\|not supported\|not compatible\|unrecognized\|unknown", [$2]) + _OPAL_CXXFLAGS_FAIL_SEARCH(["ignored\|not recognized\|not supported\|not compatible\|unrecognized\|unknown"], [$2]) ], + [ opal_cv_cxx_[$2]=1 - _OPAL_CXXFLAGS_FAIL_SEARCH("ignored\|not recognized\|not supported\|not compatible\|unrecognized\|unknown\|error", [$2]) - )]) + _OPAL_CXXFLAGS_FAIL_SEARCH(["ignored\|not recognized\|not supported\|not compatible\|unrecognized\|unknown\|error"], [$2]) + ])]) if test "$opal_cv_cxx_[$2]" = "0" ; then CXXFLAGS="$CXXFLAGS_orig" AC_MSG_RESULT([no]) diff --git a/config/opal_check_compiler_version.m4 b/config/opal_check_compiler_version.m4 index da8321cdf1e..e3462f7c10e 100644 --- a/config/opal_check_compiler_version.m4 +++ b/config/opal_check_compiler_version.m4 @@ -1,6 +1,9 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2021 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights +dnl reserved. dnl dnl $COPYRIGHT$ dnl @@ -19,105 +22,36 @@ dnl AC_DEFUN([OPAL_CHECK_COMPILER_VERSION_ID], [ OPAL_CHECK_COMPILER(FAMILYID) - OPAL_CHECK_COMPILER_STRINGIFY(FAMILYNAME) OPAL_CHECK_COMPILER(VERSION) - OPAL_CHECK_COMPILER_STRING(VERSION_STR) ])dnl AC_DEFUN([OPAL_CHECK_COMPILER], [ - lower=m4_tolower($1) - AC_CACHE_CHECK([for compiler $lower], opal_cv_compiler_[$1], + AS_LITERAL_IF([$1], [], + [m4_fatal([OPAL_CHECK_COMPILER argument must be a literal])]) + lower=m4_tolower([$1]) + AC_CACHE_CHECK([for compiler $lower], [opal_cv_compiler_$1], [ CPPFLAGS_orig=$CPPFLAGS - CPPFLAGS="-I${OPAL_TOP_SRCDIR}/opal/include/opal $CPPFLAGS" - AC_TRY_RUN([ + CPPFLAGS="-I${OPAL_TOP_SRCDIR}/opal/include $CPPFLAGS" + AC_RUN_IFELSE([AC_LANG_PROGRAM([[ #include #include -#include "opal_portable_platform.h" - -int main (int argc, char * argv[]) -{ +#include "opal/opal_portable_platform.h" +]],[[ FILE * f; f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%d", PLATFORM_COMPILER_$1); - return 0; -} - ], [ - eval opal_cv_compiler_$1=`cat conftestval`; - ], [ - eval opal_cv_compiler_$1=0 - ], [ - eval opal_cv_compiler_$1=0 - ]) - CPPFLAGS=$CPPFLAGS_orig - ]) - AC_DEFINE_UNQUOTED([OPAL_BUILD_PLATFORM_COMPILER_$1], $opal_cv_compiler_[$1], - [The compiler $lower which OMPI was built with]) -])dnl - -AC_DEFUN([OPAL_CHECK_COMPILER_STRING], [ - lower=m4_tolower($1) - AC_CACHE_CHECK([for compiler $lower], opal_cv_compiler_[$1], - [ - CPPFLAGS_orig=$CPPFLAGS - CPPFLAGS="-I${OPAL_TOP_SRCDIR}/opal/include/opal $CPPFLAGS" - AC_TRY_RUN([ -#include -#include -#include "opal_portable_platform.h" - -int main (int argc, char * argv[]) -{ - FILE * f; - f=fopen("conftestval", "w"); - if (!f) exit(1); - fprintf (f, "%s", PLATFORM_COMPILER_$1); - return 0; -} - ], [ - eval opal_cv_compiler_$1=`cat conftestval`; - ], [ - eval opal_cv_compiler_$1=UNKNOWN - ], [ - eval opal_cv_compiler_$1=UNKNOWN - ]) - CPPFLAGS=$CPPFLAGS_orig - ]) - AC_DEFINE_UNQUOTED([OPAL_BUILD_PLATFORM_COMPILER_$1], $opal_cv_compiler_[$1], - [The compiler $lower which OMPI was built with]) -])dnl - - -AC_DEFUN([OPAL_CHECK_COMPILER_STRINGIFY], [ - lower=m4_tolower($1) - AC_CACHE_CHECK([for compiler $lower], opal_cv_compiler_[$1], - [ - CPPFLAGS_orig=$CPPFLAGS - CPPFLAGS="-I${OPAL_TOP_SRCDIR}/opal/include/opal $CPPFLAGS" - AC_TRY_RUN([ -#include -#include -#include "opal_portable_platform.h" - -int main (int argc, char * argv[]) -{ - FILE * f; - f=fopen("conftestval", "w"); - if (!f) exit(1); - fprintf (f, "%s", _STRINGIFY(PLATFORM_COMPILER_$1)); - return 0; -} - ], [ - eval opal_cv_compiler_$1=`cat conftestval`; + ]])], [ + opal_cv_compiler_$1=`cat conftestval` ], [ - eval opal_cv_compiler_$1=UNKNOWN + opal_cv_compiler_$1=0 ], [ - eval opal_cv_compiler_$1=UNKNOWN + opal_cv_compiler_$1=0 ]) CPPFLAGS=$CPPFLAGS_orig ]) - AC_DEFINE_UNQUOTED([OPAL_BUILD_PLATFORM_COMPILER_$1], $opal_cv_compiler_[$1], + AC_DEFINE_UNQUOTED([OPAL_BUILD_PLATFORM_COMPILER_$1], [$opal_cv_compiler_$1], [The compiler $lower which OMPI was built with]) ])dnl diff --git a/config/opal_check_icc.m4 b/config/opal_check_icc.m4 deleted file mode 100644 index 9c60fa29701..00000000000 --- a/config/opal_check_icc.m4 +++ /dev/null @@ -1,60 +0,0 @@ -dnl -*- shell-script -*- -dnl -dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -dnl University Research and Technology -dnl Corporation. All rights reserved. -dnl Copyright (c) 2004-2005 The University of Tennessee and The University -dnl of Tennessee Research Foundation. All rights -dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -dnl University of Stuttgart. All rights reserved. -dnl Copyright (c) 2004-2005 The Regents of the University of California. -dnl All rights reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl - -AC_DEFUN([OPAL_CHECK_ICC_VARARGS],[ -dnl -dnl On EM64T, icc-8.1 before version 8.1.027 segfaulted, since -dnl va_start was miscompiled... -dnl -AC_MSG_CHECKING([whether icc-8.1 for EM64T works with variable arguments]) -AC_TRY_RUN([ -#include -#include -#include - -void func (int c, char * f, ...) -{ - va_list arglist; - va_start (arglist, f); - /* vprintf (f, arglist); */ - va_end (arglist); -} - -int main () -{ - FILE *f; - func (4711, "Help %d [%s]\n", 10, "ten"); - f=fopen ("conftestval", "w"); - if (!f) exit (1); - return 0; -} - -],[opal_ac_icc_varargs=`test -f conftestval`],[opal_ac_icc_varargs=1],[opal_ac_icc_varargs=1]) - -if test "$opal_ac_icc_varargs" = "1"; then - AC_MSG_WARN([*** Problem running configure test!]) - AC_MSG_WARN([*** Your icc-8.1 compiler seems to miscompile va_start!]) - AC_MSG_WARN([*** Please upgrade compiler to at least version 8.1.027]) - AC_MSG_ERROR([*** Cannot continue.]) -fi - -AC_MSG_RESULT([yes]) - -rm -rf conftest*])dnl diff --git a/config/opal_check_ident.m4 b/config/opal_check_ident.m4 deleted file mode 100644 index 7f79b68648e..00000000000 --- a/config/opal_check_ident.m4 +++ /dev/null @@ -1,101 +0,0 @@ -dnl -*- shell-script -*- -dnl -dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. -dnl Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl -dnl defines: -dnl OPAL_$1_USE_PRAGMA_IDENT -dnl OPAL_$1_USE_IDENT -dnl OPAL_$1_USE_CONST_CHAR_IDENT -dnl - -# OPAL_CHECK_IDENT(compiler-env, compiler-flags, -# file-suffix, lang) Try to compile a source file containing -# a #pragma ident, and determine whether the ident was -# inserted into the resulting object file -# ----------------------------------------------------------- -AC_DEFUN([OPAL_CHECK_IDENT], [ - AC_MSG_CHECKING([for $4 ident string support]) - - opal_pragma_ident_happy=0 - opal_ident_happy=0 - opal_static_const_char_happy=0 - _OPAL_CHECK_IDENT( - [$1], [$2], [$3], - [[#]pragma ident], [], - [opal_pragma_ident_happy=1 - opal_message="[#]pragma ident"], - _OPAL_CHECK_IDENT( - [$1], [$2], [$3], - [[#]ident], [], - [opal_ident_happy=1 - opal_message="[#]ident"], - _OPAL_CHECK_IDENT( - [$1], [$2], [$3], - [[#]pragma comment(exestr, ], [)], - [opal_pragma_comment_happy=1 - opal_message="[#]pragma comment"], - [opal_static_const_char_happy=1 - opal_message="static const char[[]]"]))) - - AC_DEFINE_UNQUOTED([OPAL_$1_USE_PRAGMA_IDENT], - [$opal_pragma_ident_happy], [Use #pragma ident strings for $4 files]) - AC_DEFINE_UNQUOTED([OPAL_$1_USE_IDENT], - [$opal_ident_happy], [Use #ident strings for $4 files]) - AC_DEFINE_UNQUOTED([OPAL_$1_USE_PRAGMA_COMMENT], - [$opal_pragma_comment_happy], [Use #pragma comment for $4 files]) - AC_DEFINE_UNQUOTED([OPAL_$1_USE_CONST_CHAR_IDENT], - [$opal_static_const_char_happy], [Use static const char[] strings for $4 files]) - - AC_MSG_RESULT([$opal_message]) - - unset opal_pragma_ident_happy opal_ident_happy opal_static_const_char_happy opal_message -]) - -# _OPAL_CHECK_IDENT(compiler-env, compiler-flags, -# file-suffix, header_prefix, header_suffix, action-if-success, action-if-fail) -# Try to compile a source file containing a #-style ident, -# and determine whether the ident was inserted into the -# resulting object file -# ----------------------------------------------------------- -AC_DEFUN([_OPAL_CHECK_IDENT], [ - eval opal_compiler="\$$1" - eval opal_flags="\$$2" - - opal_ident="string_not_coincidentally_inserted_by_the_compiler" - cat > conftest.$3 <&1 1>/dev/null - opal_status=$? - AS_IF([test "$opal_output" != "" || test "$opal_status" = "0"], - [$6], - [$7])], - [OPAL_LOG_MSG([the failed program was:]) - OPAL_LOG_FILE([conftest.$3]) - $7] - [$7])]) - - unset opal_compiler opal_flags opal_output opal_status - rm -rf conftest.* conftest${EXEEXT} -])dnl diff --git a/config/opal_check_ofi.m4 b/config/opal_check_ofi.m4 index 072d6ab1e7c..6149be42273 100644 --- a/config/opal_check_ofi.m4 +++ b/config/opal_check_ofi.m4 @@ -3,6 +3,8 @@ dnl dnl Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights dnl reserved. +dnl Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights +dnl reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -124,19 +126,30 @@ AC_DEFUN([_OPAL_CHECK_OFI],[ CPPFLAGS="$CPPFLAGS $opal_ofi_CPPFLAGS" AS_IF([test $opal_ofi_happy = yes], - [AC_CHECK_MEMBER([struct fi_info.nic], + [AC_CHECK_HEADERS([rdma/fi_ext.h]) + + AC_CHECK_MEMBER([struct fi_info.nic], [opal_check_fi_info_pci=1], [opal_check_fi_info_pci=0], - [[#include ]])]) + [[#include ]]) + + AC_DEFINE_UNQUOTED([OPAL_OFI_PCI_DATA_AVAILABLE], + [$opal_check_fi_info_pci], + [check if pci data is available in ofi]) + + AC_CHECK_DECLS([PMIX_PACKAGE_RANK], + [], + [], + [#include ]) - AC_DEFINE_UNQUOTED([OPAL_OFI_PCI_DATA_AVAILABLE], - [$opal_check_fi_info_pci], - [check if pci data is available in ofi]) + AC_CHECK_DECLS([FI_OPT_FI_HMEM_P2P], + [], [], + [#include ]) - AC_CHECK_DECLS([PMIX_PACKAGE_RANK], - [], - [], - [#include ]) + AC_CHECK_TYPES([struct fi_ops_mem_monitor], [], [], + [#ifdef HAVE_RDMA_FI_EXT_H +#include +#endif])]) CPPFLAGS=$opal_check_ofi_save_CPPFLAGS LDFLAGS=$opal_check_ofi_save_LDFLAGS diff --git a/config/opal_check_package.m4 b/config/opal_check_package.m4 index dcf4c45bffc..d74f8e1fd66 100644 --- a/config/opal_check_package.m4 +++ b/config/opal_check_package.m4 @@ -13,8 +13,8 @@ dnl All rights reserved. dnl Copyright (c) 2012-2017 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015-2016 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2015-2021 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -185,6 +185,7 @@ dnl * header_filename: the foo.h file to check for dnl * library_name / function_name: check for function function_name in dnl -llibrary_name. Specifically, for library_name, use the "foo" form, dnl as opposed to "libfoo". +dnl * function: the function to check for existence dnl * extra_libraries: if the library_name you are checking for requires dnl additional -l arguments to link successfully, list them here. dnl * dir_prefix: if the header/library is located in a non-standard diff --git a/config/opal_check_pc.m4 b/config/opal_check_pc.m4 new file mode 100644 index 00000000000..7da069a84d3 --- /dev/null +++ b/config/opal_check_pc.m4 @@ -0,0 +1,72 @@ +dnl -*- autoconf -*- +dnl +dnl Copyright (c) 2021 IBM Corporation. All rights reserved. +dnl Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights +dnl reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +dnl OPAL_CHECK_PKG_CONFIG() +dnl +dnl Check for availability of pkg-config and store the result. +dnl If it is not available, store any passed in libs from the +dnl --with-extra-libs configure option, or the known defaults. +dnl +dnl If it is available, allow configury to check for .pc files +dnl and append to OPAL_WRAPPER_EXTRA_LIBS. +AC_DEFUN([OPAL_CHECK_PKG_CONFIG], [ + AC_CHECK_PROG([PKG_CONFIG], [pkg-config], [pkg-config]) +]) + +dnl OPAL_GET_LDFLAGS_FROM_PC(library name, variable-to-set-if-found, +dnl action-if-not-found) +AC_DEFUN([OPAL_GET_LDFLAGS_FROM_PC], [ + OPAL_VAR_SCOPE_PUSH([pkg_config_results happy]) + + AC_REQUIRE([OPAL_CHECK_PKG_CONFIG]) + + AC_MSG_CHECKING([for ldflags from pkg-config file $1]) + + happy=1 + AS_IF([test "$PKG_CONFIG" = ""], + [happy=0], + [OPAL_LOG_COMMAND([pkg_config_results=`$PKG_CONFIG --static --libs-only-L --libs-only-other $1`], + [AS_VAR_COPY([$2], [pkg_config_results])], + [happy=0])]) + AS_IF([test $happy -eq 0], + [pkg_config_results="none" + $3]) + + AC_MSG_RESULT([$pkg_config_results]) + + OPAL_VAR_SCOPE_POP +]) + +dnl OPAL_GET_LIBS_FROM_PC(library name, variable-to-set-if-found, +dnl action-if-not-found) +AC_DEFUN([OPAL_GET_LIBS_FROM_PC], [ + OPAL_VAR_SCOPE_PUSH([pkg_config_results happy]) + + AC_REQUIRE([OPAL_CHECK_PKG_CONFIG]) + + AC_MSG_CHECKING([for libs from pkg-config file $1]) + + happy=1 + AS_IF([test "$PKG_CONFIG" = ""], + [happy=0], + [OPAL_LOG_COMMAND([pkg_config_results=`$PKG_CONFIG --libs-only-l $1`], + [AS_VAR_COPY([$2], [pkg_config_results])], + [happy=0])]) + AS_IF([test $happy -eq 0], + [pkg_config_results="none" + $3]) + + AC_MSG_RESULT([$pkg_config_results]) + + OPAL_VAR_SCOPE_POP +]) diff --git a/config/opal_config_3rdparty.m4 b/config/opal_config_3rdparty.m4 index c80f20e12d7..980ea165266 100644 --- a/config/opal_config_3rdparty.m4 +++ b/config/opal_config_3rdparty.m4 @@ -4,7 +4,7 @@ dnl Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved dnl Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. dnl Copyright (c) 2015-2018 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. -dnl Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights +dnl Copyright (c) 2020-2021 Amazon.com, Inc. or its affiliates. All Rights dnl reserved. dnl $COPYRIGHT$ dnl @@ -13,44 +13,58 @@ dnl dnl $HEADER$ dnl -dnl OPAL_3RDPARTY_WITH(short package name, long package name, internal supported) +dnl OPAL_3RDPARTY_WITH(short package name, long package name, +dnl internal supported, disabled ok) dnl dnl Basic --with-pkg/--with-pkg-libdir handling for 3rd party dnl packages, with the big long description of internal/external/path dnl handling. dnl dnl At the end of this macro, with_pkg will contain an empty string or -dnl a path (implying external). Further, the shell variable opal_pkg_mode -dnl will be set to "internal", "external", or "unspecified". If a path is -dnl given to --with-pkg, then opal_pkg_mode will be set to external. +dnl a path (the later implying external). Further, the shell variable +dnl opal_pkg_mode will be set to "internal", "external", +dnl "unspecified", or "disabled". If a path is given to --with-pkg, then +dnl opal_pkg_mode will be set to external. If "internal supported" is +dnl not defined, then opal_pkg_mode will not be internal. If +dnl "disabled ok" is not defined, then opal_pkg_mode will not be +dnl "disabled". dnl dnl If m4_ifdef(internal support) does not evaluate to true (ie, at dnl autogen time), the references to internal in the help strings will dnl be removed and internal will not be a supported option. dnl +dnl If m4_ifval(ddisbaled ok) does not evaluete to true (ie, at autogen +dnl time), then --without-pkg will not be a valid configure option and +dnl will raise an error. +dnl dnl $1: short package name dnl $2: long pacakage name AC_DEFUN([OPAL_3RDPARTY_WITH], [ - m4_ifdef([$3], - [AC_ARG_WITH([$1], - [AS_HELP_STRING([--with-$1(=DIR)], - [Build $2 support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" forces Open MPI to use its internal copy of $2. "external" forces Open MPI to use an external installation of $2. Supplying a valid directory name also forces Open MPI to use an external installation of $2, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI no longer supports --without-$1. If no argument is specified, Open MPI will search default locations for $2 and fall back to an internal version if one is not found.])]) - - AC_ARG_WITH([$1-libdir], - [AS_HELP_STRING([--with-$1-libdir=DIR], - [Search for $2 libraries in DIR. Should only be used if an external copy of $2 is being used.])])], - [AC_ARG_WITH([$1], - [AS_HELP_STRING([--with-$1(=DIR)], - [Build $2 support. DIR can take one of two values: "external" or a valid directory name. "external" forces Open MPI to use an external installation of $2. Supplying a valid directory name also forces Open MPI to use an external installation of $2, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI no longer supports --without-$1. If no argument is specified, Open MPI will search default locations for $2 and error if one is not found.])]) + m4_ifval([$4], + [m4_ifdef([$3], + [AC_ARG_WITH([$1], + [AS_HELP_STRING([--with-$1(=DIR)], + [Build $2 support. DIR can take one of four values: "internal", "external", "no", or a valid directory name. "internal" forces Open MPI to use its internal copy of $2. "external" forces Open MPI to use an external installation of $2. Supplying a valid directory name also forces Open MPI to use an external installation of $2, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. "no" means that Open MPI will not build components that require this package. If no argument is specified, Open MPI will search default locations for $2 and fall back to an internal version if one is not found.])])], + [AC_ARG_WITH([$1], + [AS_HELP_STRING([--with-$1(=DIR)], + [Build $2 support. DIR can take one of three values: "external", "no", or a valid directory name. "external" forces Open MPI to use an external installation of $2. Supplying a valid directory name also forces Open MPI to use an external installation of $2, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. "no" means that Open MPI will not build components that require this package. If no argument is specified, Open MPI will search default locations for $2 and error if one is not found.])])])], + [m4_ifdef([$3], + [AC_ARG_WITH([$1], + [AS_HELP_STRING([--with-$1(=DIR)], + [Build $2 support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" forces Open MPI to use its internal copy of $2. "external" forces Open MPI to use an external installation of $2. Supplying a valid directory name also forces Open MPI to use an external installation of $2, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI no longer supports --without-$1. If no argument is specified, Open MPI will search default locations for $2 and fall back to an internal version if one is not found.])])], + [AC_ARG_WITH([$1], + [AS_HELP_STRING([--with-$1(=DIR)], + [Build $2 support. DIR can take one of two values: "external" or a valid directory name. "external" forces Open MPI to use an external installation of $2. Supplying a valid directory name also forces Open MPI to use an external installation of $2, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI no longer supports --without-$1. If no argument is specified, Open MPI will search default locations for $2 and error if one is not found.])])])]) - AC_ARG_WITH([$1-libdir], - [AS_HELP_STRING([--with-$1-libdir=DIR], - [Search for $2 libraries in DIR. Should only be used if an external copy of $2 is being used.])])]) + AC_ARG_WITH([$1-libdir], + [AS_HELP_STRING([--with-$1-libdir=DIR], + [Search for $2 libraries in DIR. Should only be used if an external copy of $2 is being used.])]) # Bozo check - AS_IF([test "$with_$1" = "no"], - [AC_MSG_WARN([It is not possible to configure Open MPI --without-$1]) - AC_MSG_ERROR([Cannot continue])]) + m4_ifval([$4], [], + [AS_IF([test "$with_$1" = "no"], + [AC_MSG_WARN([It is not possible to configure Open MPI --without-$1]) + AC_MSG_ERROR([Cannot continue])])]) AS_IF([test "$with_$1_libdir" = "no" -o "$with_$1_libdir" = "yes"], [AC_MSG_WARN([yes/no are invalid responses for --with-$1-libdir. Please specify a path.]) @@ -73,6 +87,8 @@ AC_DEFUN([OPAL_3RDPARTY_WITH], [ opal_$1_mode="internal"], ["external"], [with_$1="" opal_$1_mode="external"], + ["no"], [with_$1="" + opal_$1_mode="disabled"], [""], [opal_$1_mode="unspecified"], [opal_$1_mode="external"]) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 882d575f2e4..b176c8c6ca2 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -9,7 +9,7 @@ dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2008-2021 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2015-2018 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. @@ -20,6 +20,8 @@ dnl reserved. dnl Copyright (c) 2020 Google, LLC. All rights reserved. dnl Copyright (c) 2020 Intel, Inc. All rights reserved. dnl Copyright (c) 2021 IBM Corporation. All rights reserved. +dnl Copyright (c) 2022 Amazon.com, Inc. or its affiliates. +dnl All Rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -334,7 +336,7 @@ AC_DEFUN([OPAL_CHECK_GCC_ATOMIC_BUILTINS], [ if test -z "$opal_cv_have___atomic" ; then AC_MSG_CHECKING([for 32-bit GCC built-in atomics]) - AC_TRY_LINK([ + AC_LINK_IFELSE([AC_LANG_PROGRAM([ #include uint32_t tmp, old = 0; uint64_t tmp64, old64 = 0;], [ @@ -342,35 +344,35 @@ __atomic_thread_fence(__ATOMIC_SEQ_CST); __atomic_compare_exchange_n(&tmp, &old, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); __atomic_add_fetch(&tmp, 1, __ATOMIC_RELAXED); __atomic_compare_exchange_n(&tmp64, &old64, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); -__atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);], - [opal_cv_have___atomic=yes], - [opal_cv_have___atomic=no]) +__atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);])], + [opal_cv_have___atomic=yes], + [opal_cv_have___atomic=no]) AC_MSG_RESULT([$opal_cv_have___atomic]) if test $opal_cv_have___atomic = "yes" ; then - AC_MSG_CHECKING([for 64-bit GCC built-in atomics]) + AC_MSG_CHECKING([for 64-bit GCC built-in atomics]) - AC_TRY_LINK([ + AC_LINK_IFELSE([AC_LANG_PROGRAM([ #include uint64_t tmp64, old64 = 0;], [ __atomic_compare_exchange_n(&tmp64, &old64, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); -__atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);], - [opal_cv_have___atomic_64=yes], - [opal_cv_have___atomic_64=no]) - - AC_MSG_RESULT([$opal_cv_have___atomic_64]) - - if test $opal_cv_have___atomic_64 = "yes" ; then - AC_MSG_CHECKING([if 64-bit GCC built-in atomics are lock-free]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [if (!__atomic_is_lock_free (8, 0)) { return 1; }])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - opal_cv_have___atomic_64=no], - [AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])]) - fi +__atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);])], + [opal_cv_have___atomic_64=yes], + [opal_cv_have___atomic_64=no]) + + AC_MSG_RESULT([$opal_cv_have___atomic_64]) + + if test $opal_cv_have___atomic_64 = "yes" ; then + AC_MSG_CHECKING([if 64-bit GCC built-in atomics are lock-free]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [if (!__atomic_is_lock_free (8, 0)) { return 1; }])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + opal_cv_have___atomic_64=no], + [AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])]) + fi else - opal_cv_have___atomic_64=no + opal_cv_have___atomic_64=no fi # Check for 128-bit support @@ -378,6 +380,7 @@ __atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);], fi ]) + AC_DEFUN([OPAL_CHECK_C11_CSWAP_INT128], [ OPAL_VAR_SCOPE_PUSH([atomic_compare_exchange_result atomic_compare_exchange_CFLAGS_save atomic_compare_exchange_LIBS_save]) @@ -422,463 +425,6 @@ AC_DEFUN([OPAL_CHECK_C11_CSWAP_INT128], [ OPAL_VAR_SCOPE_POP ]) -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_TEXT -dnl -dnl Determine how to set current mode as text. -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_TEXT],[ - AC_MSG_CHECKING([directive for setting text section]) - opal_cv_asm_text="" - if test "$opal_cv_c_compiler_vendor" = "microsoft" ; then - # text section will be brought in with the rest of - # header for MS - leave blank for now - opal_cv_asm_text="" - else - case $host in - *-aix*) - opal_cv_asm_text=[".csect .text[PR]"] - ;; - *) - opal_cv_asm_text=".text" - ;; - esac - fi - AC_MSG_RESULT([$opal_cv_asm_text]) - AC_DEFINE_UNQUOTED([OPAL_ASM_TEXT], ["$opal_cv_asm_text"], - [Assembly directive for setting text section]) - OPAL_ASM_TEXT="$opal_cv_asm_text" - AC_SUBST(OPAL_ASM_TEXT) -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_GLOBAL -dnl -dnl Sets OPAL_ASM_GLOBAL to the value to prefix global values -dnl -dnl I'm sure if I don't have a test for this, there will be some -dnl dumb platform that uses something else -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_GLOBAL],[ - AC_MSG_CHECKING([directive for exporting symbols]) - opal_cv_asm_global="" - if test "$opal_cv_c_compiler_vendor" = "microsoft" ; then - opal_cv_asm_global="PUBLIC" - else - case $host in - *) - opal_cv_asm_global=".globl" - ;; - esac - fi - AC_MSG_RESULT([$opal_cv_asm_global]) - AC_DEFINE_UNQUOTED([OPAL_ASM_GLOBAL], ["$opal_cv_asm_global"], - [Assembly directive for exporting symbols]) - OPAL_ASM_GLOBAL="$opal_cv_asm_global" - AC_SUBST(OPAL_AS_GLOBAL) -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_LSYM -dnl -dnl Sets OPAL_ASM_LSYM to the prefix value on a symbol to make it -dnl an internal label (jump target and whatnot) -dnl -dnl We look for L .L $ L$ (in that order) for something that both -dnl assembles and does not leave a label in the output of nm. Fall -dnl back to L if nothing else seems to work :/ -dnl -dnl ################################################################# - -# _OPAL_CHECK_ASM_LSYM([variable-to-set]) -# --------------------------------------- -AC_DEFUN([_OPAL_CHECK_ASM_LSYM],[ - AC_REQUIRE([AC_PROG_GREP]) - - $1="L" - - for sym in L .L $ L$ ; do - asm_result=0 - echo "configure: trying $sym" >&AS_MESSAGE_LOG_FD - OPAL_TRY_ASSEMBLE([foobar$opal_cv_asm_label_suffix -${sym}mytestlabel$opal_cv_asm_label_suffix], - [# ok, we succeeded at assembling. see if we can nm, - # throwing the results in a file - if $NM conftest.$OBJEXT > conftest.out 2>&AS_MESSAGE_LOG_FD ; then - if test "`$GREP mytestlabel conftest.out`" = "" ; then - # there was no symbol... looks promising to me - $1="$sym" - asm_result=1 - elif test ["`$GREP ' [Nt] .*mytestlabel' conftest.out`"] = "" ; then - # see if we have a non-global-ish symbol - # but we should see if we can do better. - $1="$sym" - fi - else - # not so much on the NM goodness :/ - echo "$NM failed. Output from NM was:" >&AS_MESSAGE_LOG_FD - cat conftest.out >&AS_MESSAGE_LOG_FD - AC_MSG_WARN([$NM could not read object file]) - fi - ]) - if test "$asm_result" = "1" ; then - break - fi - done - rm -f conftest.out - unset asm_result sym -]) - -# OPAL_CHECK_ASM_LSYM() -# --------------------- -AC_DEFUN([OPAL_CHECK_ASM_LSYM],[ - AC_REQUIRE([LT_PATH_NM]) - - AC_CACHE_CHECK([prefix for lsym labels], - [opal_cv_asm_lsym], - [_OPAL_CHECK_ASM_LSYM([opal_cv_asm_lsym])]) - AC_DEFINE_UNQUOTED([OPAL_ASM_LSYM], ["$opal_cv_asm_lsym"], - [Assembly prefix for lsym labels]) - OPAL_ASM_LSYM="$opal_cv_asm_lsym" - AC_SUBST(OPAL_ASM_LSYM) -])dnl - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_PROC -dnl -dnl Sets a cv-flag, if the compiler needs a proc/endp-definition to -dnl link with C. -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_PROC],[ - AC_CACHE_CHECK([if .proc/endp is needed], - [opal_cv_asm_need_proc], - [opal_cv_asm_need_proc="no" - OPAL_TRY_ASSEMBLE([ - .proc mysym -mysym: - .endp mysym], - [opal_cv_asm_need_proc="yes"]) - rm -f conftest.out]) - - if test "$opal_cv_asm_need_proc" = "yes" ; then - opal_cv_asm_proc=".proc" - opal_cv_asm_endproc=".endp" - else - opal_cv_asm_proc="#" - opal_cv_asm_endproc="#" - fi -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_GSYM -dnl -dnl Sets OPAL_ASM_GSYM to the prefix value on a symbol to make it -dnl a global linkable from C. Basically, an _ or not. -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_GSYM],[ - AC_CACHE_CHECK([prefix for global symbol labels], - [opal_cv_asm_gsym], - [_OPAL_CHECK_ASM_GSYM]) - - if test "$opal_cv_asm_gsym" = "none" ; then - AC_MSG_ERROR([Could not determine global symbol label prefix]) - fi - - AC_DEFINE_UNQUOTED([OPAL_ASM_GSYM], ["$opal_cv_asm_gsym"], - [Assembly prefix for gsym labels]) - OPAL_ASM_GSYM="$opal_cv_asm_gsym" - AC_SUBST(OPAL_ASM_GSYM) - -]) - -AC_DEFUN([_OPAL_CHECK_ASM_GSYM],[ - opal_cv_asm_gsym="none" - - for sym in "_" "" "." ; do - asm_result=0 - echo "configure: trying $sym" >&AS_MESSAGE_LOG_FD -cat > conftest_c.c <&AS_MESSAGE_LOG_FD - opal_link="$CC $CFLAGS conftest_c.$OBJEXT conftest.$OBJEXT -o conftest $LDFLAGS $LIBS > conftest.link 2>&1" - if AC_TRY_EVAL(opal_link) ; then - # save the warnings - cat conftest.link >&AS_MESSAGE_LOG_FD - asm_result=1 - else - cat conftest.link >&AS_MESSAGE_LOG_FD - echo "configure: failed C program was: " >&AS_MESSAGE_LOG_FD - cat conftest_c.c >&AS_MESSAGE_LOG_FD - echo "configure: failed ASM program was: " >&AS_MESSAGE_LOG_FD - cat conftest.s >&AS_MESSAGE_LOG_FD - asm_result=0 - fi - else - # save output and failed program - cat conftest.cmpl >&AS_MESSAGE_LOG_FD - echo "configure: failed C program was: " >&AS_MESSAGE_LOG_FD - cat conftest.c >&AS_MESSAGE_LOG_FD - asm_result=0 - fi], - [asm_result=0]) - if test "$asm_result" = "1" ; then - opal_cv_asm_gsym="$sym" - break - fi - done - rm -rf conftest.* -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_LABEL_SUFFIX -dnl -dnl Sets OPAL_ASM_LABEL_SUFFIX to the value to suffix for labels -dnl -dnl I'm sure if I don't have a test for this, there will be some -dnl dumb platform that uses something else -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_LABEL_SUFFIX],[ - AC_MSG_CHECKING([suffix for labels]) - opal_cv_asm_label_suffix="" - case $host in - *) - opal_cv_asm_label_suffix=":" - ;; - esac - AC_MSG_RESULT([$opal_cv_asm_label_suffix]) - AC_DEFINE_UNQUOTED([OPAL_ASM_LABEL_SUFFIX], ["$opal_cv_asm_label_suffix"], - [Assembly suffix for labels]) - OPAL_ASM_LABEL_SUFFIX="$opal_cv_asm_label_suffix" - AC_SUBST(OPAL_AS_LABEL_SUFFIX) -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_ALIGN_LOG -dnl -dnl Sets OPAL_ASM_ALIGN_LOG to 1 if align is specified -dnl logarithmically, 0 otherwise -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_ALIGN_LOG],[ - AC_REQUIRE([LT_PATH_NM]) - AC_REQUIRE([AC_PROG_GREP]) - - AC_CACHE_CHECK([if .align directive takes logarithmic value], - [opal_cv_asm_align_log], - [ OPAL_TRY_ASSEMBLE([ $opal_cv_asm_text - .align 4 - $opal_cv_asm_global foo - .byte 1 - .align 4 -foo$opal_cv_asm_label_suffix - .byte 2], - [opal_asm_addr=[`$NM conftest.$OBJEXT | $GREP foo | sed -e 's/.*\([0-9a-fA-F][0-9a-fA-F]\).*foo.*/\1/'`]], - [opal_asm_addr=""]) - # test for both 16 and 10 (decimal and hex notations) - echo "configure: .align test address offset is $opal_asm_addr" >&AS_MESSAGE_LOG_FD - if test "$opal_asm_addr" = "16" || test "$opal_asm_addr" = "10" ; then - opal_cv_asm_align_log="yes" - else - opal_cv_asm_align_log="no" - fi]) - - if test "$opal_cv_asm_align_log" = "yes" || test "$opal_cv_asm_align_log" = "1" ; then - opal_asm_align_log_result=1 - else - opal_asm_align_log_result=0 - fi - - AC_DEFINE_UNQUOTED([OPAL_ASM_ALIGN_LOG], - [$asm_align_log_result], - [Assembly align directive expects logarithmic value]) - - unset omp_asm_addr asm_result -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_TYPE -dnl -dnl Sets OPAL_ASM_TYPE to the prefix for the function type to -dnl set a symbol's type as function (needed on ELF for shared -dnl libraries). If no .type directive is needed, sets OPAL_ASM_TYPE -dnl to an empty string -dnl -dnl We look for @ \# % -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_TYPE],[ - AC_CACHE_CHECK([prefix for function in .type], - [opal_cv_asm_type], - [_OPAL_CHECK_ASM_TYPE]) - - AC_DEFINE_UNQUOTED([OPAL_ASM_TYPE], ["$opal_cv_asm_type"], - [How to set function type in .type directive]) - OPAL_ASM_TYPE="$opal_cv_asm_type" - AC_SUBST(OPAL_ASM_TYPE) -]) - -AC_DEFUN([_OPAL_CHECK_ASM_TYPE],[ - opal_cv_asm_type="" - - case "${host}" in - *-sun-solaris*) - # GCC on solaris seems to accept just about anything, not - # that what it defines actually works... So just hardwire - # to the right answer - opal_cv_asm_type="#" - ;; - *) - for type in @ \# % ; do - asm_result=0 - echo "configure: trying $type" >&AS_MESSAGE_LOG_FD - OPAL_TRY_ASSEMBLE([ .type mysym, ${type}function -mysym:], - [opal_cv_asm_type="${type}" - asm_result=1]) - if test "$asm_result" = "1" ; then - break - fi - done - ;; - esac - rm -f conftest.out - - unset asm_result type -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_SIZE -dnl -dnl Sets OPAL_ASM_SIZE to 1 if we should set .size directives for -dnl each function, 0 otherwise. -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_SIZE],[ - AC_CACHE_CHECK([if .size is needed], - [opal_cv_asm_need_size], - [opal_cv_asm_need_size="no" - OPAL_TRY_ASSEMBLE([ .size mysym, 1], - [opal_cv_asm_need_size="yes"]) - rm -f conftest.out]) - - if test "$opal_cv_asm_need_size" = "yes" ; then - opal_asm_size=1 - else - opal_asm_size=0 - fi - - AC_DEFINE_UNQUOTED([OPAL_ASM_SIZE], ["$opal_asm_size"], - [Do we need to give a .size directive]) - OPAL_ASM_SIZE="$opal_asm_size" - AC_SUBST(OPAL_ASM_TYPE) - unset asm_result -])dnl - - -# OPAL_CHECK_ASM_GNU_STACKEXEC(var) -# ---------------------------------- -# sets shell variable var to the things necessary to -# disable execable stacks with GAS -AC_DEFUN([OPAL_CHECK_ASM_GNU_STACKEXEC], [ - AC_REQUIRE([AC_PROG_GREP]) - - AC_CHECK_PROG([OBJDUMP], [objdump], [objdump]) - AC_CACHE_CHECK([if .note.GNU-stack is needed], - [opal_cv_asm_gnu_stack_result], - [AS_IF([test "$OBJDUMP" != ""], - [ # first, see if a simple C program has it set - cat >conftest.c <&1 | $GREP '\.note\.GNU-stack' &> /dev/null && opal_cv_asm_gnu_stack_result=yes], - [OPAL_LOG_MSG([the failed program was:], 1) - OPAL_LOG_FILE([conftest.c]) - opal_cv_asm_gnu_stack_result=no]) - if test "$opal_cv_asm_gnu_stack_result" != "yes" ; then - opal_cv_asm_gnu_stack_result="no" - fi - rm -rf conftest.*], - [opal_cv_asm_gnu_stack_result="no"])]) - if test "$opal_cv_asm_gnu_stack_result" = "yes" ; then - opal_cv_asm_gnu_stack=1 - else - opal_cv_asm_gnu_stack=0 - fi -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_POWERPC_REG -dnl -dnl See if the notation for specifying registers is X (most everyone) -dnl or rX (OS X) -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_POWERPC_REG],[ - AC_MSG_CHECKING([if PowerPC registers have r prefix]) - OPAL_TRY_ASSEMBLE([$opal_cv_asm_text - addi 1,1,0], - [opal_cv_asm_powerpc_r_reg=0], - [OPAL_TRY_ASSEMBLE([$opal_cv_asm_text - addi r1,r1,0], - [opal_cv_asm_powerpc_r_reg=1], - [AC_MSG_ERROR([Can not determine how to use PPC registers])])]) - if test "$opal_cv_asm_powerpc_r_reg" = "1" ; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi - - AC_DEFINE_UNQUOTED([OPAL_POWERPC_R_REGISTERS], - [$opal_cv_asm_powerpc_r_reg], - [Whether r notation is used for ppc registers]) -])dnl dnl ################################################################# dnl @@ -955,20 +501,16 @@ AC_DEFUN([OPAL_CHECK_CMPXCHG16B],[ OPAL_VAR_SCOPE_POP ])dnl + dnl ################################################################# dnl -dnl OPAL_CHECK_INLINE_GCC +dnl OPAL_CHECK_INLINE_GCC([action-if-found], [action-if-not-found]) dnl dnl Check if the compiler is capable of doing GCC-style inline dnl assembly. Some compilers emit a warning and ignore the inline dnl assembly (xlc on OS X) and compile without error. Therefore, dnl the test attempts to run the emitted code to check that the -dnl assembly is actually run. To run this test, one argument to -dnl the macro must be an assembly instruction in gcc format to move -dnl the value 0 into the register containing the variable ret. -dnl For PowerPC, this would be: -dnl -dnl "li %0,0" : "=&r"(ret) +dnl assembly is actually run. dnl dnl For testing ia32 assembly, the assembly instruction xaddl is dnl tested. The xaddl instruction is used by some of the atomic @@ -977,272 +519,191 @@ dnl some compilers (i.e. earlier versions of Sun Studio 12) do not dnl necessarily handle xaddl properly, so that needs to be detected dnl during configure time. dnl -dnl DEFINE OPAL_GCC_INLINE_ASSEMBLY to 0 or 1 depending on GCC +dnl DEFINE OPAL_C_GCC_INLINE_ASSEMBLY to 0 or 1 depending on GCC dnl support dnl dnl ################################################################# AC_DEFUN([OPAL_CHECK_INLINE_C_GCC],[ - assembly="$1" - asm_result="unknown" + AC_CACHE_CHECK([if $CC supports GCC inline assembly], + [opal_cv_asm_gcc_inline_assembly], + [OPAL_VAR_SCOPE_PUSH([asm_result opal_gcc_inline_assign OPAL_C_GCC_INLINE_ASSEMBLY]) - AC_MSG_CHECKING([if $CC supports GCC inline assembly]) + asm_result="unknown" + + opal_gcc_inline_assign="" + case "${host}" in + x86_64-*x32|i?86-*|x86_64*|amd64*) + opal_gcc_inline_assign='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)' + ;; + aarch64*) + opal_gcc_inline_assign='"mov %0, #0" : "=&r"(ret)' + ;; + powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) + opal_gcc_inline_assign='"1: li %0,0" : "=&r"(ret)' + ;; + esac - if test ! "$assembly" = "" ; then - AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ + AS_IF([test "$opal_gcc_inline_assign" != ""], + [AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ int ret = 1; int negone = -1; -__asm__ __volatile__ ($assembly); +__asm__ __volatile__ ($opal_gcc_inline_assign); return ret; - ]])], - [asm_result="yes"], [asm_result="no"], - [asm_result="unknown"]) - else - assembly="test skipped - assuming no" - fi - - # if we're cross compiling, just try to compile and figure good enough - if test "$asm_result" = "unknown" ; then - AC_LINK_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ + ]])], + [asm_result="yes"], [asm_result="no"], + [asm_result="unknown"])], + [asm_result="no - architecture not supported"]) + + # if we're cross compiling, just try to compile and figure good enough + AS_IF([test "$asm_result" = "unknown"], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ int ret = 1; int negone = -1; -__asm__ __volatile__ ($assembly); +__asm__ __volatile__ ($opal_gcc_inline_assign); return ret; - ]])], - [asm_result="yes"], [asm_result="no"]) - fi + ]])], + [asm_result="yes"], [asm_result="no"])]) + opal_cv_asm_gcc_inline_assembly="$asm_result" + OPAL_VAR_SCOPE_POP]) - AC_MSG_RESULT([$asm_result]) - - if test "$asm_result" = "yes" ; then - OPAL_C_GCC_INLINE_ASSEMBLY=1 - opal_cv_asm_inline_supported="yes" - else - OPAL_C_GCC_INLINE_ASSEMBLY=0 - fi + AS_IF([test "$opal_cv_asm_gcc_inline_assembly" = "yes"], + [OPAL_C_GCC_INLINE_ASSEMBLY=1 + $1], + [OPAL_C_GCC_INLINE_ASSEMBLY=0 + $2]) AC_DEFINE_UNQUOTED([OPAL_C_GCC_INLINE_ASSEMBLY], [$OPAL_C_GCC_INLINE_ASSEMBLY], [Whether C compiler supports GCC style inline assembly]) - - unset OPAL_C_GCC_INLINE_ASSEMBLY assembly asm_result ])dnl + dnl ################################################################# dnl dnl OPAL_CONFIG_ASM dnl -dnl DEFINE OPAL_ASSEMBLY_ARCH to something in sys/architecture.h -dnl DEFINE OPAL_ASSEMBLY_FORMAT to string containing correct -dnl format for assembly (not user friendly) -dnl SUBST OPAL_ASSEMBLY_FORMAT to string containing correct -dnl format for assembly (not user friendly) +dnl Configure assembly support. AC_DEFINES the following: +dnl - OPAL_C_GCC_INLINE_ASSEMBLY - 1 if C compiler supports +dnl GCC-style inline assembly +dnl - OPAL_USE_C11_ATOMICS - 1 if atomics implementation should +dnl use C11-style atomics +dnl - OPAL_USE_GCC_BUILTIN_ATOMICS - 1 if atomics implementation +dnl should use GCC built-in style atomics +dnl - OPAL_USE_ASM_ATOMICS - 1 if atomics implementation should +dnl use inline assembly (using GCC-style inline assembly) +dnl for atomics implementaiton dnl dnl ################################################################# AC_DEFUN([OPAL_CONFIG_ASM],[ AC_REQUIRE([OPAL_SETUP_CC]) - AC_REQUIRE([AM_PROG_AS]) - AC_ARG_ENABLE([c11-atomics],[AS_HELP_STRING([--enable-c11-atomics], - [Enable use of C11 atomics if available (default: enabled)])]) + OPAL_VAR_SCOPE_PUSH([atomics_found want_c11_atomics want_gcc_builtin_atomics want_asm_atomics]) + + # only assembly style we support today is gcc-style inline + # assembly, find out if it works. We need this even for C11/GCC + # builtin atomics cases, because we use inline assembly for + # timers, LLSC, and 16 byte compare and swap routines. + OPAL_CHECK_INLINE_C_GCC([gcc_inline=1], [gcc_inline=0]) + + atomics_found=no + want_c11_atomics=0 + want_gcc_builtin_atomics=0 + want_asm_atomics=0 + + AC_ARG_ENABLE([c11-atomics], + [AS_HELP_STRING([--enable-c11-atomics], + [Enable use of C11 atomics if available (default: use if available, disabled by default on 64-bit PowerPC)])]) AC_ARG_ENABLE([builtin-atomics], - [AS_HELP_STRING([--enable-builtin-atomics], - [Enable use of GCC built-in atomics (default: autodetect)])]) - - OPAL_CHECK_C11_CSWAP_INT128 - opal_cv_asm_builtin="BUILTIN_NO" - OPAL_CHECK_GCC_ATOMIC_BUILTINS - - if test "x$enable_c11_atomics" != "xno" && test "$opal_cv_c11_supported" = "yes" ; then - opal_cv_asm_builtin="BUILTIN_C11" - OPAL_CHECK_C11_CSWAP_INT128 - elif test "x$enable_c11_atomics" = "xyes"; then - AC_MSG_WARN([C11 atomics were requested but are not supported]) - AC_MSG_ERROR([Cannot continue]) - elif test "$enable_builtin_atomics" = "yes" ; then - if test $opal_cv_have___atomic = "yes" ; then - opal_cv_asm_builtin="BUILTIN_GCC" - else - AC_MSG_WARN([GCC built-in atomics requested but not found.]) - AC_MSG_ERROR([Cannot continue]) - fi - fi + [AS_HELP_STRING([--enable-builtin-atomics], + [Enable use of GCC built-in atomics. Note that C11 atomics are preferred over built-in atomics. (default: use if available, disabled by default on 64-bit PowerPC)])]) - OPAL_CHECK_ASM_PROC - OPAL_CHECK_ASM_TEXT - OPAL_CHECK_ASM_GLOBAL - OPAL_CHECK_ASM_GNU_STACKEXEC - OPAL_CHECK_ASM_LABEL_SUFFIX - OPAL_CHECK_ASM_GSYM - OPAL_CHECK_ASM_LSYM - OPAL_CHECK_ASM_TYPE - OPAL_CHECK_ASM_SIZE - OPAL_CHECK_ASM_ALIGN_LOG - - # find our architecture for purposes of assembly stuff - opal_cv_asm_arch="UNSUPPORTED" - OPAL_GCC_INLINE_ASSIGN="" + AC_ARG_ENABLE([builtin-atomics-for-ppc], + [AS_HELP_STRING([--enable-builtin-atomics-for-ppc], + [For performance reasons, 64-bit POWER architectures will not use C11 or GCC built-in atomics, even if --enable-c11-atomics is passed to configure. Enabling this option will re-enable support for both C11 and GCC built-in atomics.])]) - case "${host}" in - x86_64-*x32|i?86-*|x86_64*|amd64*) - if test "$ac_cv_sizeof_long" = "4" ; then - if test $opal_cv_asm_builtin = BUILTIN_NO ; then - AC_MSG_ERROR([IA32 atomics are no longer supported. Use a C11 compiler]) - fi - opal_cv_asm_arch="IA32" - else - opal_cv_asm_arch="X86_64" - OPAL_CHECK_CMPXCHG16B - fi - OPAL_GCC_INLINE_ASSIGN='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)' - ;; + # See the following github PR and some performance numbers/discussion: + # https://github.com/open-mpi/ompi/pull/8649 + # + # This logic is a bit convoluted, but matches existing logic in v4.x. + case "${host}" in + powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) + AS_IF([test "$ac_cv_sizeof_long" = "8" -a "$enable_builtin_atomics_for_ppc" != "yes"], + [AS_IF([test "$enable_c11_atomics" != "no" -a "$enable_builtin_atomics" != "no"], + [AC_MSG_NOTICE([Disabling built-in and C11 atomics due to known performance issues on Powerpc])]) + AS_IF([test "$enable_c11_atomics" = "yes" -o "$enable_builtin_atomics" = "yes"], + [AC_MSG_WARN([Ignoring --enable-c11-atomics and --enable-builtin-atomics options on POWER. Set +--enable-builtin-atomics-for-ppc to re-enable.])]) + enable_c11_atomics="no" + enable_builtin_atomics="no"]) + ;; + esac - aarch64*) - opal_cv_asm_arch="ARM64" - OPAL_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' + # Option 1 for atomics: C11 + # + # We currently always disable C11 atomics with the Intel compilers. + # We know builds older than 20200310 are broken with respect to + # C11 atomics, but have not apparently found a build we are happy + # with. In the future, this should be changed to a check for a + # particular Intel version. + AS_IF([test "$enable_c11_atomics" != "no" -a "$opal_cv_c11_supported" = "yes" -a "$opal_cv_c_compiler_vendor" != "intel"], + [AC_MSG_NOTICE([Using C11 atomics]) + OPAL_CHECK_C11_CSWAP_INT128 + want_c11_atomics=1 + atomics_found="C11 atomics"], + [test "$enable_c11_atomics" = "yes"], + [AC_MSG_WARN([C11 atomics were requested but are not supported]) + AC_MSG_ERROR([Cannot continue])]) + + # Option 2 for atomics: GCC-style Builtin + AS_IF([test "$atomics_found" = "no" -a "$enable_builtin_atomics" != "no"], + [OPAL_CHECK_GCC_ATOMIC_BUILTINS + AS_IF([test $opal_cv_have___atomic = "yes"], + [AC_MSG_NOTICE([Using GCC built-in style atomics]) + atomics_found="GCC built-in style atomics" + want_gcc_builtin_atomics=1], + [test "$enable_builtin_atomics" = "yes"], + [AC_MSG_WARN([GCC built-in atomics requested but not found.]) + AC_MSG_ERROR([Cannot continue])])]) + + # Option 3 for atomics: inline assembly + AS_IF([test "$atomics_found" = "no" -a "$gcc_inline" = "1"], + [case "${host}" in + x86_64-*x32|i?86-*|x86_64*|amd64*) + AS_IF([test "$ac_cv_sizeof_long" = "8"], + [OPAL_CHECK_CMPXCHG16B + atomics_found="x86_64 assembly"]) ;; - armv7*|arm-*-linux-gnueabihf|armv6*) - if test $opal_cv_asm_builtin = BUILTIN_NO ; then - AC_MSG_ERROR([32-bit ARM atomics are no longer supported. Use a C11 compiler]) - fi + aarch64*) + atomics_found="aarch64 assembly" + ;; - opal_cv_asm_arch="ARM" - OPAL_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' + powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) + AS_IF([test "$ac_cv_sizeof_long" = "8"], + [atomics_found="PowerPC asssembly"]) ;; + esac - powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) - OPAL_CHECK_POWERPC_REG - if test "$ac_cv_sizeof_long" = "4" ; then - if test $opal_cv_asm_builtin = BUILTIN_NO ; then - AC_MSG_ERROR([PowerPC 32-bit atomics are no longer supported. Use a C11 compiler]) - fi - opal_cv_asm_arch="POWERPC32" - elif test "$ac_cv_sizeof_long" = "8" ; then - opal_cv_asm_arch="POWERPC64" - else - AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long]) - fi - OPAL_GCC_INLINE_ASSIGN='"1: li %0,0" : "=&r"(ret)' - - # See the following github PR and some performance numbers/discussion: - # https://github.com/open-mpi/ompi/pull/8649 - AC_MSG_CHECKING([$opal_cv_asm_arch: Checking if force gcc atomics requested]) - if test $force_gcc_atomics_ppc = 0 ; then - AC_MSG_RESULT([no]) - opal_cv_asm_builtin="BUILTIN_NO" - else - AC_MSG_RESULT([Yes]) - AC_MSG_WARN([$opal_cv_asm_arch: gcc atomics have been known to perform poorly on powerpc.]) - fi + AS_IF([test "$atomics_found" != "no"], + [want_asm_atomics=1]) + AC_MSG_CHECKING([for inline assembly atomics]) + AC_MSG_RESULT([$atomics_found])]) - ;; - *) - if test $opal_cv_have___atomic = "yes" ; then - opal_cv_asm_builtin="BUILTIN_GCC" - else - AC_MSG_ERROR([No atomic primitives available for $host]) - fi - ;; - esac + AS_IF([test "$aomics_found" = "no"], + [AC_MSG_ERROR([No usable atomics implementation found. Cannot continue.])]) - if test "$opal_cv_asm_builtin" = "BUILTIN_GCC" ; then - AC_DEFINE([OPAL_C_GCC_INLINE_ASSEMBLY], [1], - [Whether C compiler supports GCC style inline assembly]) - else - opal_cv_asm_inline_supported="no" - # now that we know our architecture, try to inline assemble - OPAL_CHECK_INLINE_C_GCC([$OPAL_GCC_INLINE_ASSIGN]) - - # format: - # config_file-text-global-label_suffix-gsym-lsym-type-size-align_log-ppc_r_reg-64_bit-gnu_stack - asm_format="default" - asm_format="${asm_format}-${opal_cv_asm_text}-${opal_cv_asm_global}" - asm_format="${asm_format}-${opal_cv_asm_label_suffix}-${opal_cv_asm_gsym}" - asm_format="${asm_format}-${opal_cv_asm_lsym}" - asm_format="${asm_format}-${opal_cv_asm_type}-${opal_asm_size}" - asm_format="${asm_format}-${opal_asm_align_log_result}" - if test "$opal_cv_asm_arch" = "POWERPC64" ; then - asm_format="${asm_format}-${opal_cv_asm_powerpc_r_reg}" - else - asm_format="${asm_format}-1" - fi - asm_format="${asm_format}-1" - opal_cv_asm_format="${asm_format}-${opal_cv_asm_gnu_stack}" - # For the Makefile, need to escape the $ as $$. Don't display - # this version, but make sure the Makefile gives the right thing - # when regenerating the files because the base has been touched. - OPAL_ASSEMBLY_FORMAT=`echo "$opal_cv_asm_format" | sed -e 's/\\\$/\\\$\\\$/'` - - AC_MSG_CHECKING([for assembly format]) - AC_MSG_RESULT([$opal_cv_asm_format]) - AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_FORMAT], ["$OPAL_ASSEMBLY_FORMAT"], - [Format of assembly file]) - AC_SUBST([OPAL_ASSEMBLY_FORMAT]) - fi # if opal_cv_asm_builtin = BUILTIN_GCC - - result="OPAL_$opal_cv_asm_arch" - OPAL_ASSEMBLY_ARCH="$opal_cv_asm_arch" - AC_MSG_CHECKING([for assembly architecture]) - AC_MSG_RESULT([$opal_cv_asm_arch]) - AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_ARCH], [$result], - [Architecture type of assembly to use for atomic operations and CMA]) - AC_SUBST([OPAL_ASSEMBLY_ARCH]) - - # Check for RDTSCP support - result=0 - AS_IF([test "$opal_cv_asm_arch" = "X86_64" || test "$opal_cv_asm_arch" = "IA32"], - [AC_MSG_CHECKING([for RDTSCP assembly support]) - AC_LANG_PUSH([C]) - AC_TRY_RUN([[ -int main(int argc, char* argv[]) -{ - unsigned int rax, rdx; - __asm__ __volatile__ ("rdtscp\n": "=a" (rax), "=d" (rdx):: "%rax", "%rdx"); - return 0; -} - ]], - [result=1 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])], - [#cross compile not supported - AC_MSG_RESULT(["no (cross compiling)"])]) - AC_LANG_POP([C])]) - AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_SUPPORTS_RDTSCP], [$result], - [Whether we have support for RDTSCP instruction]) - - result="OPAL_$opal_cv_asm_builtin" - OPAL_ASSEMBLY_BUILTIN="$opal_cv_asm_builtin" - AC_MSG_CHECKING([for builtin atomics]) - AC_MSG_RESULT([$opal_cv_asm_builtin]) - AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_BUILTIN], [$result], - [Whether to use builtin atomics]) - AC_SUBST([OPAL_ASSEMBLY_BUILTIN]) - - OPAL_SUMMARY_ADD([[Atomics]],[[OMPI]],[],[$opal_cv_asm_builtin]) - - OPAL_ASM_FIND_FILE - - unset result asm_format -])dnl + AC_DEFINE_UNQUOTED([OPAL_USE_C11_ATOMICS], + [$want_c11_atomics], + [Whether to use C11 atomics for atomics implementation]) + AC_DEFINE_UNQUOTED([OPAL_USE_GCC_BUILTIN_ATOMICS], + [$want_gcc_builtin_atomics], + [Whether to use GCC-style built-in atomics for atomics implementation]) + AC_DEFINE_UNQUOTED([OPAL_USE_ASM_ATOMICS], + [$want_asm_atomics], + [Whether to use assembly-coded atomics for atomics implementation]) + OPAL_SUMMARY_ADD([[Miscellaneous]],[[Atomics]],[],[$atomics_found]) -dnl ################################################################# -dnl -dnl OPAL_ASM_FIND_FILE -dnl -dnl -dnl do all the evil mojo to provide a working assembly file -dnl -dnl ################################################################# -AC_DEFUN([OPAL_ASM_FIND_FILE], [ - AC_REQUIRE([AC_PROG_GREP]) - AC_REQUIRE([AC_PROG_FGREP]) - -if test "$opal_cv_asm_arch" != "WINDOWS" && test "$opal_cv_asm_builtin" != "BUILTIN_GCC" && test "$opal_cv_asm_builtin" != "BUILTIN_OSX" && test "$opal_cv_asm_inline_arch" = "no" ; then - AC_MSG_ERROR([no atomic support available. exiting]) -else - # On windows with VC++, atomics are done with compiler primitives - opal_cv_asm_file="" -fi + OPAL_VAR_SCOPE_POP ])dnl diff --git a/config/opal_config_files.m4 b/config/opal_config_files.m4 index 090b476d35a..9018661e0ff 100644 --- a/config/opal_config_files.m4 +++ b/config/opal_config_files.m4 @@ -24,7 +24,6 @@ AC_DEFUN([OPAL_CONFIG_FILES],[ opal/mca/base/Makefile opal/tools/wrappers/Makefile opal/tools/wrappers/opalcc-wrapper-data.txt - opal/tools/wrappers/opalc++-wrapper-data.txt opal/tools/wrappers/opal.pc ]) ]) diff --git a/config/opal_config_hwloc.m4 b/config/opal_config_hwloc.m4 index 6f006f4044b..e7597dfad47 100644 --- a/config/opal_config_hwloc.m4 +++ b/config/opal_config_hwloc.m4 @@ -3,7 +3,7 @@ dnl dnl Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved dnl Copyright (c) 2014-2018 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. -dnl Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights +dnl Copyright (c) 2020-2021 Amazon.com, Inc. or its affiliates. All Rights dnl reserved. dnl Copyright (c) 2020 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ @@ -39,10 +39,16 @@ dnl uses Hwloc. Cannot be added to LIBS yet, because then dnl other execution tests later in configure (there are sadly dnl some) would fail if the path in LDFLAGS was not added to dnl LD_LIBRARY_PATH. +dnl * opal_hwloc_WRAPPER_LDFLAGS - the linker flags necessary to +dnl add to the wrapper compilers in order to link an opal +dnl application when opal is built as a static library. +dnl * opal_hwloc_WRAPPER_LIBS - the linker flags necessary to +dnl add to the wrapper compilers in order to link an opal +dnl application when opal is built as a static library. dnl * CPPFLAGS, LDFLAGS - Updated opal_hwloc_CPPFLAGS and dnl opal_hwloc_LDFLAGS. AC_DEFUN([OPAL_CONFIG_HWLOC], [ - OPAL_VAR_SCOPE_PUSH([external_hwloc_happy internal_hwloc_happy]) + OPAL_VAR_SCOPE_PUSH([external_hwloc_happy internal_hwloc_happy pkg_config_file pkg_config_happy pkg_config_ldflags pkg_config_libs]) opal_show_subtitle "Configuring hwloc" @@ -70,6 +76,30 @@ AC_DEFUN([OPAL_CONFIG_HWLOC], [ AS_IF([test "$external_hwloc_happy" = "0" -a "$internal_hwloc_happy" = "0"], [AC_MSG_ERROR([Could not find viable hwloc build.])]) + AS_IF([test "$opal_hwloc_mode" = "internal"], + [pkg_config_file="${OMPI_TOP_BUILDDIR}/3rd-party/hwloc_directory/hwloc.pc" + PKG_CONFIG_PATH="${OMPI_TOP_BUILDDIR}/3rd-party/hwloc_directory:${PKG_CONFIG_PATH}"], + [test -n "$with_hwloc"], + [pkg_config_file="${with_hwloc}/lib/pkgconfig/hwloc.pc" + PKG_CONFIG_PATH="${with_hwloc}/lib/pkgconfig:${PKG_CONFIG_PATH}"], + [pkg_config_file="hwloc"]) + + pkg_config_happy=1 + OPAL_GET_LDFLAGS_FROM_PC([$pkg_config_file], [pkg_config_ldflags], [pkg_config_happy=0]) + OPAL_GET_LIBS_FROM_PC([$pkg_config_file], [pkg_config_libs], [pkg_config_happy=0]) + + AS_IF([test $pkg_config_happy -ne 0], + [opal_hwloc_WRAPPER_LDFLAGS="$pkg_config_ldflags" + opal_hwloc_WRAPPER_LIBS="$pkg_config_libs"], + [# guess that what we have from compiling OMPI is good enough + AS_IF([test -z "$opal_hwloc_WRAPPER_LDFLAGS"], + [opal_hwloc_WRAPPER_LDFLAGS="$opal_hwloc_LDFLAGS"]) + AS_IF([test -z "$opal_hwloc_WRAPPER_LIBS"], + [opal_hwloc_WRAPPER_LIBS="$opal_hwloc_LIBS"])]) + + OPAL_WRAPPER_FLAGS_ADD([LDFLAGS], [$opal_hwloc_WRAPPER_LDFLAGS]) + OPAL_WRAPPER_FLAGS_ADD([LIBS], [$opal_hwloc_WRAPPER_LIBS]) + # this will work even if there is no hwloc package included, # because hwloc_tarball and hwloc_directory will evaluate to an # empty string. These are relative to the 3rd-party/ directory. @@ -114,16 +144,16 @@ AC_DEFUN([_OPAL_CONFIG_HWLOC_EXTERNAL], [ LIBS="$opal_hwloc_LIBS_save $opal_hwloc_LIBS" AS_IF([test "$opal_hwloc_external_support" = "yes"], - [AC_MSG_CHECKING([if external hwloc version is 1.6 or greater]) + [AC_MSG_CHECKING([if external hwloc version is 1.11.0 or greater]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[ #if HWLOC_API_VERSION < 0x00010500 -#error "hwloc API version is less than 0x00010500" +#error "hwloc API version is less than 0x00011100" #endif ]])], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) - AC_MSG_WARN([external hwloc version is too old (1.5 or later required)]) + AC_MSG_WARN([external hwloc version is too old (1.11.0 or later required)]) opal_hwloc_external_support="no"])]) AS_IF([test "$opal_hwloc_external_support" = "yes"], @@ -148,15 +178,39 @@ dnl external hwloc is not going to be used. Assumes that if dnl this function is called, that success means the internal package dnl will be used. AC_DEFUN([_OPAL_CONFIG_HWLOC_INTERNAL], [ - OPAL_VAR_SCOPE_PUSH(subconfig_happy subconfig_prefix internal_hwloc_location) - - AS_IF([test ! -z $prefix], [subconfig_prefix="--prefix=$prefix"]) + OPAL_VAR_SCOPE_PUSH([subconfig_happy internal_hwloc_location extra_configure_args found_enable_plugins hwloc_config_arg]) + + extra_configure_args= + + # look for a --{enable/disable}-plugins option in the top level + # configure arguments, so that we can add --enable-plugins if + # appropriate. + found_enable_plugins=0 + eval "set x $ac_configure_args" + shift + for hwloc_config_arg + do + case $hwloc_config_arg in + --enable-plugins|--enable-plugins=*|--disable-plugins) + found_enable_plugins=1 + ;; + esac + done + + # while the plugins in hwloc are not explicitly using Open MPI's dlopen + # interface, it seems rude to enable plugins in hwloc if the builder asked + # us not to use plugins in Open MPI. So only enable plugins in hwloc if there's + # a chance we're going to do so. We enable plugins by default so that libhwloc + # does not end up with a dependency on libcuda, which would mean everything else + # would end up with a dependency on libcuda (and similar). + AS_IF([test $found_enable_plugins -eq 0 -a "$enable_dlopen" != "no"], + [extra_configure_args="--enable-plugins"]) # Note: To update the version of hwloc shipped, update the # constant in autogen.pl. OPAL_EXPAND_TARBALL([3rd-party/hwloc_tarball], [3rd-party/hwloc_directory], [configure]) OPAL_SUBDIR_ENV_CLEAN([opal_hwloc_configure]) - PAC_CONFIG_SUBDIR_ARGS([3rd-party/hwloc_directory], [], [[--enable-debug]], + PAC_CONFIG_SUBDIR_ARGS([3rd-party/hwloc_directory], [$extra_configure_args], [[--enable-debug]], [subconfig_happy=1], [subconfig_happy=0]) OPAL_SUBDIR_ENV_RESTORE([opal_hwloc_configure]) @@ -171,6 +225,7 @@ AC_DEFUN([_OPAL_CONFIG_HWLOC_INTERNAL], [ # our tree and in the mean time are referenced by their .la # files. opal_hwloc_LIBS="$OMPI_TOP_BUILDDIR/$internal_hwloc_location/hwloc/libhwloc.la" + opal_hwloc_WRAPPER_LIBS="-lhwloc" opal_hwloc_header="$OMPI_TOP_BUILDDIR/$internal_hwloc_location/include/hwloc.h" diff --git a/config/opal_config_libevent.m4 b/config/opal_config_libevent.m4 index 948ee12c871..634048de5fb 100644 --- a/config/opal_config_libevent.m4 +++ b/config/opal_config_libevent.m4 @@ -4,7 +4,7 @@ dnl Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved dnl Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. dnl Copyright (c) 2015-2018 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. -dnl Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights +dnl Copyright (c) 2020-2021 Amazon.com, Inc. or its affiliates. All Rights dnl reserved. dnl $COPYRIGHT$ dnl @@ -39,10 +39,16 @@ dnl uses Libevent. Cannot be added to LIBS yet, because then dnl other execution tests later in configure (there are sadly dnl some) would fail if the path in LDFLAGS was not added to dnl LD_LIBRARY_PATH. +dnl * opal_libevent_WRAPPER_LDFLAGS - the linker flags necessary to +dnl add to the wrapper compilers in order to link an opal +dnl application when opal is built as a static library. +dnl * opal_libevent_WRAPPER_LIBS - the linker flags necessary to +dnl add to the wrapper compilers in order to link an opal +dnl application when opal is built as a static library. dnl * CPPFLAGS, LDFLAGS - Updated opal_libevent_CPPFLAGS and dnl opal_libevent_LDFLAGS. AC_DEFUN([OPAL_CONFIG_LIBEVENT], [ - OPAL_VAR_SCOPE_PUSH([internal_libevent_happy external_libevent_happy]) + OPAL_VAR_SCOPE_PUSH([internal_libevent_happy external_libevent_happy pkg_config_core pkg_config_pthreads pkg_config_core_ldflags pkg_config_pthreads_ldflags pkg_config_core_libs pkg_config_pthreads_libs pkg_config_happy]) opal_show_subtitle "Configuring Libevent" @@ -70,6 +76,41 @@ AC_DEFUN([OPAL_CONFIG_LIBEVENT], [ AS_IF([test "$external_libevent_happy" = "0" -a "$internal_libevent_happy" = "0"], [AC_MSG_ERROR([Could not find viable libevent build.])]) + AS_IF([test "$opal_libevent_mode" = "internal"], + [pkg_config_core="${OMPI_TOP_BUILDDIR}/3rd-party/libevent_directory/libevent_core.pc" + pkg_config_pthreads="${OMPI_TOP_BUILDDIR}/3rd-party/libevent_directory/libevent_pthreads.pc" + PKG_CONFIG_PATH="${OMPI_TOP_BUILDDIR}/3rd-party/libevent_directory:${PKG_CONFIG_PATH}"], + [test -n "$with_libevent"], + [pkg_config_core="${with_libevent}/lib/pkgconfig/libevent_core.pc" + pkg_config_pthreads="${with_libevent}/lib/pkgconfig/libevent_pthreads.pc" + PKG_CONFIG_PATH="${with_libevent}/lib/pkgconfig:${PKG_CONFIG_PATH}"], + [pkg_config_core="libevent_core" + pkg_config_pthreads="libevent_pthreads"]) + + pkg_config_happy=1 + OPAL_GET_LDFLAGS_FROM_PC([$pkg_config_core], [pkg_config_core_ldflags], [pkg_config_happy=0]) + OPAL_GET_LDFLAGS_FROM_PC([$pkg_config_pthreads], [pkg_config_pthreads_ldflags], [pkg_config_happy=0]) + OPAL_GET_LIBS_FROM_PC([$pkg_config_core], [pkg_config_core_libs], [pkg_config_happy=0]) + OPAL_GET_LIBS_FROM_PC([$pkg_config_pthreads], [pkg_config_pthreads_libs], [pkg_config_happy=0]) + + AS_IF([test $pkg_config_happy -ne 0], + [# Strip -levent from pkg_config_pthreads_libs, since we + # only want to link against libevent_core. We'll pick up + # the core library from pkg_config_core_libs. + pkg_config_pthreads_libs=`echo $pkg_config_pthreads_libs | sed "s/\\-levent\b//g"` + opal_libevent_WRAPPER_LDFLAGS="$pkg_config_core_ldflags" + OPAL_FLAGS_APPEND_UNIQ([opal_libevent_WRAPPER_LDFLAGS], [$pkg_config_pthreads_ldflags]) + opal_libevent_WRAPPER_LIBS="$pkg_config_pthreads_libs" + OPAL_FLAGS_APPEND_MOVE([opal_libevent_WRAPPER_LIBS], [$pkg_config_core_libs])], + [# guess that what we have from compiling OMPI is good enough + AS_IF([test -z "$opal_libevent_WRAPPER_LDFLAGS"], + [opal_libevent_WRAPPER_LDFLAGS="$opal_libevent_LDFLAGS"]) + AS_IF([test -z "$opal_libevent_WRAPPER_LIBS"], + [opal_libevent_WRAPPER_LIBS="$opal_libevent_LIBS"])]) + + OPAL_WRAPPER_FLAGS_ADD([LDFLAGS], [$opal_libevent_WRAPPER_LDFLAGS]) + OPAL_WRAPPER_FLAGS_ADD([LIBS], [$opal_libevent_WRAPPER_LIBS]) + # this will work even if there is no libevent package included, # because libevent_tarball and libevent_directory will evaluate to # an empty string. These are relative to the 3rd-party/ @@ -200,6 +241,7 @@ AC_DEFUN([_OPAL_CONFIG_LIBEVENT_INTERNAL], [ # our tree and in the mean time are referenced by their .la # files. opal_libevent_LIBS="$OMPI_TOP_BUILDDIR/$internal_libevent_location/libevent_core.la $OMPI_TOP_BUILDDIR/$internal_libevent_location/libevent_pthreads.la" + opal_libevent_WRAPPER_LIBS="-levent_core -levent_pthreads" opal_libevent_header="$OMPI_TOP_BUILDDIR/$internal_libevent_location/event.h" diff --git a/config/opal_config_pmix.m4 b/config/opal_config_pmix.m4 index 5b13e6bd082..a5ad5b8eabe 100644 --- a/config/opal_config_pmix.m4 +++ b/config/opal_config_pmix.m4 @@ -19,7 +19,7 @@ dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2016-2021 IBM Corporation. All rights reserved. dnl Copyright (c) 2020 Triad National Security, LLC. All rights dnl reserved. -dnl Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights +dnl Copyright (c) 2020-2021 Amazon.com, Inc. or its affiliates. All Rights dnl reserved. dnl Copyright (c) 2021 Nanook Consulting. All rights reserved. dnl $COPYRIGHT$ @@ -55,10 +55,16 @@ dnl uses PMIx. Cannot be added to LIBS yet, because then dnl other execution tests later in configure (there are sadly dnl some) would fail if the path in LDFLAGS was not added to dnl LD_LIBRARY_PATH. +dnl * opal_pmix_WRAPPER_LDFLAGS - the linker flags necessary to +dnl add to the wrapper compilers in order to link an opal +dnl application when opal is built as a static library. +dnl * opal_pmix_WRAPPER_LIBS - the linker flags necessary to +dnl add to the wrapper compilers in order to link an opal +dnl application when opal is built as a static library. dnl * CPPFLAGS, LDFLAGS - Updated opal_pmix_CPPFLAGS and dnl opal_pmix_LDFLAGS. AC_DEFUN([OPAL_CONFIG_PMIX], [ - OPAL_VAR_SCOPE_PUSH([external_pmix_happy internal_pmix_happy internal_pmix_args internal_pmix_libs internal_pmix_CPPFLAGS]) + OPAL_VAR_SCOPE_PUSH([external_pmix_happy internal_pmix_happy internal_pmix_args internal_pmix_wrapper_libs internal_pmix_CPPFLAGS]) opal_show_subtitle "Configuring PMIx" @@ -68,45 +74,59 @@ AC_DEFUN([OPAL_CONFIG_PMIX], [ internal_pmix_happy=0 m4_ifdef([package_pmix], - [# always configure the internal pmix, so that - # make dist always works. - internal_pmix_args="--without-tests-examples --disable-pmix-binaries --disable-pmix-backward-compatibility --disable-visibility" - internal_pmix_libs= - internal_pmix_CPPFLAGS= - - OMPI_PMIX_ADD_ARGS - - AS_IF([test "$opal_libevent_mode" = "internal"], - [internal_pmix_args="$internal_pmix_args --with-libevent=cobuild" - internal_pmix_CPPFLAGS="$internal_pmix_CPPFLAGS $opal_libevent_CPPFLAGS" - internal_pmix_libs="$internal_pmix_libs $opal_libevent_LIBS"]) - - AS_IF([test "$opal_hwloc_mode" = "internal"], - [internal_pmix_args="$internal_pmix_args --with-hwloc=cobuild" - internal_pmix_CPPFLAGS="$internal_pmix_CPPFLAGS $opal_hwloc_CPPFLAGS" - internal_pmix_libs="$internal_pmix_libs $opal_hwloc_LIBS"]) - - AS_IF([test ! -z "$internal_pmix_libs"], - [internal_pmix_args="$internal_pmix_args --with-prte-extra-lib=\"$internal_pmix_libs\""]) - - if test "$WANT_DEBUG" = "1"; then - internal_pmix_args="$internal_pmix_args --enable-debug" - fi - - # Pass all our compiler/linker flags to PMIx, so that it - # picks up how to build an internal HWLOC and libevent, plus - # picks up any user-specified compiler flags from the master - # configure run. - OPAL_SUBDIR_ENV_CLEAN([opal_pmix_configure]) - AS_IF([test -n "$internal_pmix_CPPFLAGS"], - [OPAL_SUBDIR_ENV_APPEND([CPPFLAGS], [$internal_pmix_CPPFLAGS])]) - PAC_CONFIG_SUBDIR_ARGS([3rd-party/openpmix], [$internal_pmix_args], - [[--with-libevent=internal], [--with-hwloc=internal], - [--with-libevent=external], [--with-hwloc=external], - [--with-pmix=[[^ ]]*], [--with-platform=[[^ ]]*]], - [internal_pmix_happy=1]) - OPAL_SUBDIR_ENV_RESTORE([opal_pmix_configure]) - OPAL_3RDPARTY_DIST_SUBDIRS="$OPAL_3RDPARTY_DIST_SUBDIRS openpmix"]) + [OMPI_PMIX_ADD_ARGS + AS_IF([test "$opal_pmix_mode" = "unspecified" -o "$opal_pmix_mode" = "internal"], + [# Run PMIx's configure script unless the user + # explicitly asked us to use an external PMIX, so that + # "make dist" includes PMIx in the dist tarball. This + # does mean that "make dist" will not work if Open MPI + # was configured to use an external PMIx library, but + # we decided this was a reasonable tradeoff for not + # having to deal with PMIx (or PRRTE) potentially + # failing to configure in a situation where it isn't + # desired. + + internal_pmix_args="--without-tests-examples --enable-pmix-binaries --disable-pmix-backward-compatibility --disable-visibility" + internal_pmix_wrapper_libs= + internal_pmix_CPPFLAGS= + + AS_IF([test "$opal_libevent_mode" = "internal"], + [internal_pmix_args="$internal_pmix_args --with-libevent --disable-libevent-lib-checks" + internal_pmix_args="$internal_pmix_args --with-libevent-extra-libs=\"$opal_libevent_LIBS\"" + internal_pmix_wrapper_libs="$internal_pmix_wrapper_libs \"$opal_libevent_WRAPPER_LIBS\"" + internal_pmix_CPPFLAGS="$internal_pmix_CPPFLAGS $opal_libevent_CPPFLAGS"]) + + AS_IF([test "$opal_hwloc_mode" = "internal"], + [internal_pmix_args="$internal_pmix_args --disable-hwloc-lib-checks" + internal_pmix_args="$internal_pmix_args --with-hwloc-extra-libs=\"$opal_hwloc_LIBS\"" + internal_pmix_wrapper_libs="$internal_pmix_wrapper_libs \"$opal_hwloc_WRAPPER_LIBS\"" + internal_pmix_CPPFLAGS="$internal_pmix_CPPFLAGS $opal_hwloc_CPPFLAGS"]) + + if test "$WANT_DEBUG" = "1"; then + internal_pmix_args="$internal_pmix_args --enable-debug" + fi + + # Pass all our compiler/linker flags to PMIx, so that it + # picks up how to build an internal HWLOC and libevent, plus + # picks up any user-specified compiler flags from the master + # configure run. + OPAL_SUBDIR_ENV_CLEAN([opal_pmix_configure]) + AS_IF([test -n "$internal_pmix_CPPFLAGS"], + [OPAL_SUBDIR_ENV_APPEND([CPPFLAGS], [$internal_pmix_CPPFLAGS])]) + AS_IF([test -n "$internal_pmix_wrapper_libs"], + [inernal_pmix_args="$internal_pmix_args --with-wrapper-libs=\"$internal_pmix_wrapper_libs\""]) + PAC_CONFIG_SUBDIR_ARGS([3rd-party/openpmix], [$internal_pmix_args], + [[--with-libevent=internal], [--with-hwloc=internal], + [--with-libevent=external], [--with-hwloc=external], + [--with-pmix=[[^ ]]*], [--with-platform=[[^ ]]*]], + [internal_pmix_happy=1]) + OPAL_SUBDIR_ENV_RESTORE([opal_pmix_configure]) + OPAL_3RDPARTY_DIST_SUBDIRS="$OPAL_3RDPARTY_DIST_SUBDIRS openpmix"]) + + # if we have a pmix package and configure did not complete + # successfullly (or wasn't started), then disable make dist. + AS_IF([test $internal_pmix_happy != 1], + [OPAL_MAKEDIST_DISABLE="$OPAL_MAKEDIST_DISABLE PMIX"])]) # unless internal specifically requested by the user, try to find # an external that works. @@ -122,11 +142,41 @@ AC_DEFUN([OPAL_CONFIG_PMIX], [ # so try the internal version. AS_IF([test "$external_pmix_happy" = "0" -a "$internal_pmix_happy" = "1"], [opal_pmix_mode="internal" - _OPAL_CONFIG_PMIX_INTERNAL_POST()]) + OPAL_USING_INTERNAL_PMIX=1 + _OPAL_CONFIG_PMIX_INTERNAL_POST()], + [OPAL_USING_INTERNAL_PMIX=0]) AS_IF([test "$external_pmix_happy" = "0" -a "$internal_pmix_happy" = "0"], [AC_MSG_ERROR([Could not find viable pmix build.])]) + AS_IF([test "$opal_pmix_mode" = "internal"], + [pkg_config_file="${OMPI_TOP_BUILDDIR}/3rd-party/openpmix/maint/pmix.pc" + PKG_CONFIG_PATH="${OMPI_TOP_BUILDDIR}/3rd-party/openpmix/maint:${PKG_CONFIG_PATH}"], + [test -n "$with_hwloc"], + [pkg_config_file="${with_pmix}/lib/pkgconfig/pmix.pc" + PKG_CONFIG_PATH="${with_pmix}/lib/pkgconfig:${PKG_CONFIG_PATH}"], + [pkg_config_file="pmix"]) + + pkg_config_happy=1 + OPAL_GET_LDFLAGS_FROM_PC([$pkg_config_file], [pkg_config_ldflags], [pkg_config_happy=0]) + OPAL_GET_LIBS_FROM_PC([$pkg_config_file], [pkg_config_libs], [pkg_config_happy=0]) + + AS_IF([test $pkg_config_happy -ne 0], + [opal_pmix_WRAPPER_LDFLAGS="$pkg_config_ldflags" + opal_pmix_WRAPPER_LIBS="$pkg_config_libs"], + [# guess that what we have from compiling OMPI is good enough + AS_IF([test -z "$opal_pmix_WRAPPER_LDFLAGS"], + [opal_pmix_WRAPPER_LDFLAGS="$opal_pmix_LDFLAGS"]) + AS_IF([test -z "$opal_pmix_WRAPPER_LIBS"], + [opal_pmix_WRAPPER_LIBS="$opal_pmix_LIBS"])]) + + OPAL_WRAPPER_FLAGS_ADD([LDFLAGS], [$opal_pmix_WRAPPER_LDFLAGS]) + OPAL_WRAPPER_FLAGS_ADD([LIBS], [$opal_pmix_WRAPPER_LIBS]) + + AC_DEFINE_UNQUOTED([OPAL_USING_INTERNAL_PMIX], + [$OPAL_USING_INTERNAL_PMIX], + [Whether or not we are using the internal PMIx]) + AC_SUBST(opal_pmix_CPPFLAGS) AC_SUBST(opal_pmix_LDFLAGS) AC_SUBST(opal_pmix_LIBS) @@ -204,6 +254,7 @@ AC_DEFUN([_OPAL_CONFIG_PMIX_INTERNAL_POST], [ opal_pmix_CPPFLAGS="-I$OMPI_TOP_BUILDDIR/3rd-party/openpmix/include -I$OMPI_TOP_SRCDIR/3rd-party/openpmix/include" opal_pmix_LDFLAGS="" opal_pmix_LIBS="$OMPI_TOP_BUILDDIR/3rd-party/openpmix/src/libpmix.la" + opal_pmix_WRAPPER_LIBS="-lpmix $opal_hwloc_WRAPPER_LIBS $opal_libevent_WRAPPER_LIBS" CPPFLAGS="$CPPFLAGS $opal_pmix_CPPFLAGS" diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index bf32627137e..8a731700599 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -20,6 +20,9 @@ dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights +dnl Copyright (c) 2019-2021 Triad National Security, LLC. All rights +dnl reserved. dnl dnl $COPYRIGHT$ dnl @@ -84,13 +87,6 @@ else WANT_BRANCH_PROBABILITIES=0 fi -AC_ARG_ENABLE([builtin-atomics-for-ppc],[AS_HELP_STRING([--enable-builtin-atomics-for-ppc], - [POWER architectures only: Force use of builtin atomics if available. This could either be gcc builtins or C11 atomics, depending on what is available on your system. Enabling this is known to cause poor performance in atomic operations on Power machines. (default: disabled)])]) -if test "x$enable_builtin_atomics_for_ppc" = "xyes" ; then -force_gcc_atomics_ppc=1 -else -force_gcc_atomics_ppc=0 -fi # # Memory debugging @@ -185,10 +181,47 @@ AC_DEFINE_UNQUOTED(OPAL_ENABLE_TIMING, $WANT_TIMING, AM_CONDITIONAL([OPAL_COMPILE_TIMING], [test "$WANT_TIMING" = "1"]) AM_CONDITIONAL([OPAL_INSTALL_TIMING_BINARIES], [test "$WANT_TIMING" = "1" && test "$enable_binaries" != "no"]) +# Later calls to AC_PROG_CC/CXX/FC can +# inject things like -O2 into compile flags if they are +# not defined, which we don't want. Make sure these flags +# are at least set to an empty string now. +# +# Complicating matters is that autogen can re-order +# these calls toward the top of configure. This block should +# be at/near the top, so do it now. +# if test "$WANT_DEBUG" = "0"; then CFLAGS="-DNDEBUG $CFLAGS" CXXFLAGS="-DNDEBUG $CXXFLAGS" + + # NDEBUG doesn't exist in fortran, so just make sure it's defined. + if [ test -z "$FCFLAGS" ]; then + FCFLAGS="" + fi +else + # Do we want debugging symbols? + if test "$enable_debug_symbols" != "no" ; then + CFLAGS="$CFLAGS -g" + CXXFLAGS="$CXXFLAGS -g" + FCFLAGS="$FCFLAGS -g" + AC_MSG_WARN([-g has been added to compiler (--enable-debug)]) + else + # If not set, define compile flags to an empty string + # to prevent AC_PROG_CC/FC/CXX from modifying compiler flags. + # See: https://www.gnu.org/software/autoconf/manual/autoconf-2.69/html_node/C-Compiler.html + # for more info. + if [ test -z "$CFLAGS" ]; then + CFLAGS="" + fi + if [ test -z "$CXXFLAGS" ]; then + CXXFLAGS="" + fi + if [ test -z "$FCFLAGS" ]; then + FCFLAGS="" + fi + fi fi + AC_DEFINE_UNQUOTED(OPAL_ENABLE_DEBUG, $WANT_DEBUG, [Whether we want developer-level debugging code or not]) @@ -213,7 +246,6 @@ else fi AM_CONDITIONAL(WANT_INSTALL_HEADERS, test "$WANT_INSTALL_HEADERS" = 1) - # # Do we want the pretty-print stack trace feature? # @@ -340,29 +372,6 @@ AC_DEFINE_UNQUOTED([OPAL_ENABLE_HETEROGENEOUS_SUPPORT], [Enable features required for heterogeneous support]) -if test "$opal_want_heterogeneous" = 1; then - ompi_cv_c_word_size_align=yes -else - AC_CACHE_CHECK([if word-sized integers must be word-size aligned], - [ompi_cv_c_word_size_align], - [AC_LANG_PUSH(C) - AC_RUN_IFELSE([AC_LANG_PROGRAM([dnl -#include ], [[ long data[2] = {0, 0}; - long *lp; - int *ip; - ip = (int*) data; - ip++; - lp = (long*) ip; - return lp[0]; ]])], - [ompi_cv_c_word_size_align=no], - [ompi_cv_c_word_size_align=yes], - [ompi_cv_c_word_size_align=yes])]) -fi -AS_IF([test $ompi_cv_c_word_size_align = yes], [results=1], [results=0]) -AC_DEFINE_UNQUOTED([OPAL_ALIGN_WORD_SIZE_INTEGERS], [$results], - [set to 1 if word-size integers must be aligned to word-size padding to prevent bus errors]) - - # # Cross-compile data # @@ -405,6 +414,7 @@ AM_CONDITIONAL([OPAL_WANT_SCRIPT_WRAPPER_COMPILERS], [test "$enable_script_wrapp # # Support per-user config files? # +OPAL_VAR_SCOPE_PUSH([result]) AC_ARG_ENABLE([per-user-config-files], [AS_HELP_STRING([--enable-per-user-config-files], [Disable per-user configuration files, to save disk accesses during job start-up. This is likely desirable for large jobs. Note that this can also be achieved by environment variables at run-time. (default: enabled)])]) @@ -415,6 +425,7 @@ else fi AC_DEFINE_UNQUOTED([OPAL_WANT_HOME_CONFIG_FILES], [$result], [Enable per-user config files]) +OPAL_VAR_SCOPE_POP # # Do we want to enable IPv6 support? @@ -517,6 +528,10 @@ OPAL_WITH_OPTION_MIN_MAX_VALUE(port_name, 1024, 255, 2048) # Min length accroding to MPI-2.1, p. 418 OPAL_WITH_OPTION_MIN_MAX_VALUE(datarep_string, 128, 64, 256) +OPAL_WITH_OPTION_MIN_MAX_VALUE(pset_name_len, 512, 512, 4096) + +OPAL_WITH_OPTION_MIN_MAX_VALUE(stringtag_len, 1024, 256, 2048) + # some systems don't want/like getpwuid AC_MSG_CHECKING([if want getpwuid support]) AC_ARG_ENABLE([getpwuid], diff --git a/config/opal_functions.m4 b/config/opal_functions.m4 index 7ae5ff994cb..df1a0f812a0 100644 --- a/config/opal_functions.m4 +++ b/config/opal_functions.m4 @@ -16,6 +16,8 @@ dnl Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights +dnl reserved. dnl dnl $COPYRIGHT$ dnl @@ -317,6 +319,18 @@ dnl ####################################################################### dnl ####################################################################### dnl ####################################################################### +# OPAL_APPEND(variable, new_argument) +# ---------------------------------------- +# Append new_argument to variable, assuming a space separated list. +# +AC_DEFUN([OPAL_APPEND], [ + AS_IF([test -z "$$1"], [$1="$2"], [$1="$$1 $2"]) +]) + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + # OPAL_APPEND_UNIQ(variable, new_argument) # ---------------------------------------- # Append new_argument to variable if not already in variable. This assumes a @@ -333,11 +347,7 @@ for arg in $2; do fi done if test "$opal_found" = "0" ; then - if test -z "$$1"; then - $1="$arg" - else - $1="$$1 $arg" - fi + OPAL_APPEND([$1], [$arg]) fi done unset opal_found @@ -463,7 +473,51 @@ AC_DEFUN([OPAL_FLAGS_APPEND_UNIQ], [ AS_IF([test "x$val" = "x$arg"], [opal_append=0]) done]) AS_IF([test "$opal_append" = "1"], - [AS_IF([test -z "$$1"], [$1=$arg], [$1="$$1 $arg"])]) + [OPAL_APPEND([$1], [$arg])]) + done + + OPAL_VAR_SCOPE_POP +]) + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# OPAL_FLAGS_APPEND_MOVE(variable, new_argument) +# ---------------------------------------------- +# add new_arguments to the end of variable. +# +# If an argument in new_arguments does not begin with -I, -L, or -l OR +# the argument begins with -I, -L, or -l and it is not already in +# variable, it is appended to variable. +# +# If an argument in new_argument begins with a -l and is already in +# variable, the existing occurances of the argument are removed from +# variable and the argument is appended to variable. This behavior +# is most useful in LIBS, where ordering matters and being rightmost +# is usually the right behavior. +# +# This macro assumes a space separated list. +AC_DEFUN([OPAL_FLAGS_APPEND_MOVE], [ + OPAL_VAR_SCOPE_PUSH([opal_tmp_variable opal_tmp opal_append]) + + for arg in $2; do + AS_CASE([$arg], + [-I*|-L*], + [opal_append=1 + for val in ${$1} ; do + AS_IF([test "x$val" = "x$arg"], [opal_append=0]) + done + AS_IF([test $opal_append -eq 1], [OPAL_APPEND([$1], [$arg])])], + [-l*], + [opal_tmp_variable= + for val in ${$1}; do + AS_IF([test "x$val" != "x$arg"], + [OPAL_APPEND([opal_tmp_variable], [$val])]) + done + OPAL_APPEND([opal_tmp_variable], [$arg]) + $1="$opal_tmp_variable"], + [OPAL_APPEND([$1], [$arg])]) done OPAL_VAR_SCOPE_POP @@ -480,24 +534,59 @@ dnl ####################################################################### # of the assignment in foo=`which `). This macro ensures that we # get a sane executable value. AC_DEFUN([OPAL_WHICH],[ -# 1 is the variable name to do "which" on -# 2 is the variable name to assign the return value to - -OPAL_VAR_SCOPE_PUSH([opal_prog opal_file opal_dir opal_sentinel]) - -opal_prog=$1 + # 1 is the variable name to do "which" on + # 2 is the variable name to assign the return value to + + OPAL_VAR_SCOPE_PUSH([opal_prog opal_file opal_dir opal_sentinel]) + + opal_prog=$1 + + # There are 3 cases: + + # 1. opal_prog is an absolute filename. If that absolute filename + # exists and is executable, return $2 with that name. Otherwise, + # $2 is unchanged. + + # 2. opal_prog is a relative filename (i.e., it contains one or + # more /, but does not begin with a /). If that file exists + # relative to where we are right now in the filesystem and is + # executable, return the absolute path of that value in $2. + # Otherwise, $2 is unchanged. + + # 3. opal_prog contains no /. Search the PATH for an excutable + # with the appropriate name. If found, return the absolute path + # in $2. Otherwise, $2 is unchanged. + + # Note that these three cases are exactly what which(1) does. + + # Note the double square brackets around the case expressions for + # m4 escaping. + case $opal_prog in + [[\\/]]* | ?:[[\\/]]* ) + # Case 1: absolute + AS_IF([test -x "$opal_prog"], + [$2=$opal_prog]) + ;; + + *[[\\/]]*) + # Case 2: relative with 1 or more / + AS_IF([test -x "$opal_prog"], + [$2="$cwd/$opal_prog"]) + ;; + + *) + # Case 3: no / at all + IFS_SAVE=$IFS + IFS=$PATH_SEPARATOR + for opal_dir in $PATH; do + AS_IF([test -x "$opal_dir/$opal_prog"], + [$2="$opal_dir/$opal_prog"]) + done + IFS=$IFS_SAVE + ;; + esac -IFS_SAVE=$IFS -IFS="$PATH_SEPARATOR" -for opal_dir in $PATH; do - if test -x "$opal_dir/$opal_prog"; then - $2="$opal_dir/$opal_prog" - break - fi -done -IFS=$IFS_SAVE - -OPAL_VAR_SCOPE_POP + OPAL_VAR_SCOPE_POP ])dnl dnl ####################################################################### diff --git a/config/opal_mca.m4 b/config/opal_mca.m4 index 7712103349c..b72ab922b7f 100644 --- a/config/opal_mca.m4 +++ b/config/opal_mca.m4 @@ -10,7 +10,7 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2010-2021 Cisco Systems, Inc. All rights reserved dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. dnl Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. dnl All Rights reserved. @@ -43,6 +43,8 @@ AC_DEFUN([OPAL_EVAL_ARG], [$1]) AC_DEFUN([OPAL_MCA],[ dnl for OPAL_CONFIGURE_USER env variable AC_REQUIRE([OPAL_CONFIGURE_SETUP]) + dnl For all the path management + AC_REQUIRE([OPAL_SETUP_WRAPPER_INIT]) # Set a special flag so that we can detect if the user calls # OPAL_WRAPPER_FLAGS_ADD and error. @@ -61,9 +63,9 @@ AC_DEFUN([OPAL_MCA],[ [AS_HELP_STRING([--enable-mca-no-build=LIST], [Comma-separated list of - pairs that will not be built. Example: - "--enable-mca-no-build=btl-portals,oob-ud" will - disable building the "portals" btl and the "ud" - oob components.])]) + "--enable-mca-no-build=btl-portals4,topo-treematch" will + disable building the "portals4" btl and the "treematch" + topo components.])]) AC_ARG_ENABLE([mca-dso], [AS_HELP_STRING([--enable-mca-dso=LIST], [Comma-separated list of types and/or @@ -93,7 +95,7 @@ AC_DEFUN([OPAL_MCA],[ if test "$enable_mca_no_build" = "yes"; then AC_MSG_RESULT([yes]) AC_MSG_ERROR([*** The enable-mca-no-build flag requires an explicit list -*** of type-component pairs. For example, --enable-mca-no-build=pml-ob1]) +of type-component pairs. For example, --enable-mca-no-build=pml-ob1]) else ifs_save="$IFS" IFS="${IFS}$PATH_SEPARATOR," @@ -123,11 +125,7 @@ AC_DEFUN([OPAL_MCA],[ # in the form DIRECT_[type]=[component] # AC_MSG_CHECKING([which components should be direct-linked into the library]) - if test "$enable_mca_direct" = "yes" ; then - AC_MSG_RESULT([yes]) - AC_MSG_ERROR([*** The enable-mca-direct flag requires an explicit list of -*** type-component pairs. For example, --enable-mca-direct=pml-ob1,coll-basic]) - elif test ! -z "$enable_mca_direct" && test "$enable_mca_direct" != "" ; then + if test -n "$enable_mca_direct" ; then # # we need to add this into the static list, unless the static list # is everything @@ -145,15 +143,21 @@ AC_DEFUN([OPAL_MCA],[ IFS="${IFS}$PATH_SEPARATOR," msg= for item in $enable_mca_direct; do - type="`echo $item | cut -f1 -d-`" - comp="`echo $item | cut -f2- -d-`" + type="`echo $item | cut -s -f1 -d-`" + comp="`echo $item | cut -s -f2- -d-`" if test -z $type || test -z $comp ; then - AC_MSG_ERROR([*** The enable-mca-direct flag requires a -*** list of type-component pairs. Invalid input detected.]) - else - AS_VAR_SET([AS_TR_SH([DIRECT_$type])], [AS_TR_SH([$comp])]) - msg="$item $msg" + AC_MSG_ERROR([enable-mca-direct requires a list of type-component pairs (ex. --enable-mca-direct=pml-ob1,smsc-xpmem)]) fi + + var_name=AS_TR_SH([DIRECT_${type}]) + AS_VAR_COPY([var_value], [$var_name]) + + if test -n "$var_value" ; then + AC_MSG_ERROR([enable-mca-direct can only enable a single component per framwork: specified both ${type}-${var_value} and ${type}-${comp}.]) + fi + + AS_VAR_SET([$var_name], AS_TR_SH([${comp}])) + msg="$item $msg" done IFS="$ifs_save" fi @@ -456,6 +460,18 @@ AC_DEFUN([MCA_CONFIGURE_FRAMEWORK],[ [static_components], [dso_components], [static_ltlibs])])])])]) + AS_VAR_SET_IF([OPAL_EVAL_ARG([DIRECT_$2])], [ + AC_MSG_CHECKING([if direct-selection component exists for $2 framework]) + direct_component_happy=no + for component in $all_components ; do + AS_IF([test $component = "$DIRECT_$2"], [direct_component_happy=yes]) + done + if test $direct_component_happy = no ; then + AC_MSG_ERROR([direct component $DIRECT_$2 requested but not found in $all_components]) + fi + AC_MSG_RESULT([$DIRECT_$2]) + ]) + MCA_$1_$2_ALL_COMPONENTS="$all_components" MCA_$1_$2_STATIC_COMPONENTS="$static_components" MCA_$1_$2_DSO_COMPONENTS="$dso_components" @@ -820,28 +836,29 @@ AC_MSG_ERROR([*** $2 component $3 was supposed to be direct-called, but # provide them for the final link of the application. Components # can explicitly set __WRAPPER_EXTRA_ # for either LDFLAGS or LIBS, for cases where the component wants - # to explicitly manage that behavior. If the full variable is not - # defined, this macro will copy __ - # into the wrapper flags. + # to explicitly manage which flags are passed to the wrapper + # compiler. If the __WRAPPER_EXTRA_ + # variable is not set, then it is assumed that the component + # wishes all LDFLAGS and LIBS to be provided as wrapper flags. AS_IF([test "$8" = "static"], - [m4_foreach(flags, [LDFLAGS, LIBS], - [m4_if(flags, [LIBS], - [OPAL_MCA_STRIP_LAFILES([tmp_]flags, [$$2_$3_]flags)], - [tmp_]flags[=$$2_$3_]flags) - AS_VAR_SET_IF([$2_$3_WRAPPER_EXTRA_]flags, - [OPAL_FLAGS_APPEND_UNIQ([mca_wrapper_extra_]m4_tolower(flags), [$$2_$3_WRAPPER_EXTRA_]flags)], - [OPAL_FLAGS_APPEND_UNIQ([mca_wrapper_extra_]m4_tolower(flags), [$tmp_]flags)]) - dnl yes, this is weird indenting, but the - dnl combination of m4_foreach and AS_VAR_SET_IF - dnl will result in the closing of one if and the - dnl start of the next on the same line, resulting - dnl in parse errors, if this is not here. - ])]) - - - # if needed, copy over WRAPPER_EXTRA_CPPFLAGS. Since a configure script - # component can never be used in a STOP_AT_FIRST framework, we - # don't have to implement the else clause in the literal check... + [AS_VAR_SET_IF([$2_$3_WRAPPER_EXTRA_LDFLAGS], + [AS_VAR_COPY([tmp_flags], [$2_$3_WRAPPER_EXTRA_LDFLAGS])], + [AS_VAR_COPY([tmp_flags], [$2_$3_LDFLAGS])]) + OPAL_FLAGS_APPEND_UNIQ([mca_wrapper_extra_ldflags], [$tmp_flags]) + + AS_VAR_SET_IF([$2_$3_WRAPPER_EXTRA_LIBS], + [AS_VAR_COPY([tmp_flags], [$2_$3_WRAPPER_EXTRA_LIBS])], + [AS_VAR_COPY([tmp_all_flags], [$2_$3_LIBS]) + OPAL_MCA_STRIP_LAFILES([tmp_flags], [$tmp_all_flags])]) + OPAL_FLAGS_APPEND_MOVE([mca_wrapper_extra_libs], [$tmp_flags])]) + + # WRAPPER_EXTRA_CPPFLAGS are only needed for STOP_AT_FIRST + # components, as all other components are not allowed to leak + # headers or compile-time flags into the top-level library or + # wrapper compilers. If needed, copy over WRAPPER_EXTRA_CPPFLAGS. + # Since a configure script component can never be used in a + # STOP_AT_FIRST framework, we don't have to implement the else + # clause in the literal check. AS_LITERAL_IF([$3], [AS_IF([test "$$2_$3_WRAPPER_EXTRA_CPPFLAGS" != ""], [m4_if(OPAL_EVAL_ARG([MCA_$1_$2_CONFIGURE_MODE]), [STOP_AT_FIRST], [stop_at_first=1], [stop_at_first=0]) diff --git a/config/opal_setup_cc.m4 b/config/opal_setup_cc.m4 index cbcb2f305d4..17b4b23ed09 100644 --- a/config/opal_setup_cc.m4 +++ b/config/opal_setup_cc.m4 @@ -11,7 +11,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. -dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2008-2021 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2015-2019 Research Organization for Information Science @@ -211,10 +211,11 @@ AC_DEFUN([OPAL_SETUP_CC],[ AC_DEFINE_UNQUOTED([OPAL_C_HAVE___THREAD], [$opal_prog_cc__thread_available], [Whether C compiler supports __thread]) - # Check for standard headers, needed here because needed before - # the types checks. - AC_HEADER_STDC + # the types checks. This is only necessary for Autoconf < v2.70. + m4_version_prereq([2.70], + [], + [AC_HEADER_STDC]) # GNU C and autotools are inconsistent about whether this is # defined so let's make it true everywhere for now... However, IBM @@ -275,15 +276,6 @@ AC_DEFUN([OPAL_SETUP_CC],[ OPAL_FLAGS_UNIQ(LDFLAGS) WANT_DEBUG=1 fi - - - # Do we want debugging? - if test "$WANT_DEBUG" = "1" && test "$enable_debug_symbols" != "no" ; then - CFLAGS="$CFLAGS -g" - - OPAL_FLAGS_UNIQ(CFLAGS) - AC_MSG_WARN([-g has been added to CFLAGS (--enable-debug)]) - fi # These flags are generally gcc-specific; even the # gcc-impersonating compilers won't accept them. @@ -302,6 +294,11 @@ AC_DEFUN([OPAL_SETUP_CC],[ _OPAL_CHECK_SPECIFIC_CFLAGS(-fno-strict-aliasing, fno_strict_aliasing, int main() { long double x; }) _OPAL_CHECK_SPECIFIC_CFLAGS(-pedantic, pedantic) _OPAL_CHECK_SPECIFIC_CFLAGS(-Wall, Wall) + + # There are some warnings that we specifically do not care + # about / do not agree that gcc emits warnings about them. So + # we turn them off. + _OPAL_CHECK_SPECIFIC_CFLAGS(-Wformat-truncation=0, format_truncation) fi # Note: Some versions of clang (at least >= 3.5 -- perhaps @@ -332,9 +329,9 @@ AC_DEFUN([OPAL_SETUP_CC],[ # see if the C compiler supports __builtin_expect AC_CACHE_CHECK([if $CC supports __builtin_expect], [opal_cv_cc_supports___builtin_expect], - [AC_TRY_LINK([], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([], [void *ptr = (void*) 0; - if (__builtin_expect (ptr != (void*) 0, 1)) return 0;], + if (__builtin_expect (ptr != (void*) 0, 1)) return 0;])], [opal_cv_cc_supports___builtin_expect="yes"], [opal_cv_cc_supports___builtin_expect="no"])]) if test "$opal_cv_cc_supports___builtin_expect" = "yes" ; then @@ -348,9 +345,9 @@ AC_DEFUN([OPAL_SETUP_CC],[ # see if the C compiler supports __builtin_prefetch AC_CACHE_CHECK([if $CC supports __builtin_prefetch], [opal_cv_cc_supports___builtin_prefetch], - [AC_TRY_LINK([], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([], [int ptr; - __builtin_prefetch(&ptr,0,0);], + __builtin_prefetch(&ptr,0,0);])], [opal_cv_cc_supports___builtin_prefetch="yes"], [opal_cv_cc_supports___builtin_prefetch="no"])]) if test "$opal_cv_cc_supports___builtin_prefetch" = "yes" ; then @@ -364,9 +361,9 @@ AC_DEFUN([OPAL_SETUP_CC],[ # see if the C compiler supports __builtin_clz AC_CACHE_CHECK([if $CC supports __builtin_clz], [opal_cv_cc_supports___builtin_clz], - [AC_TRY_LINK([], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([], [int value = 0xffff; /* we know we have 16 bits set */ - if ((8*sizeof(int)-16) != __builtin_clz(value)) return 0;], + if ((8*sizeof(int)-16) != __builtin_clz(value)) return 0;])], [opal_cv_cc_supports___builtin_clz="yes"], [opal_cv_cc_supports___builtin_clz="no"])]) if test "$opal_cv_cc_supports___builtin_clz" = "yes" ; then @@ -410,26 +407,30 @@ AC_DEFUN([OPAL_SETUP_CC],[ AC_DEFUN([_OPAL_START_SETUP_CC],[ opal_show_subtitle "C compiler and preprocessor" - - # $%@#!@#% AIX!! This has to be called before anything invokes the C - # compiler. - dnl AC_AIX ]) AC_DEFUN([_OPAL_PROG_CC],[ + dnl It is really easy to accidently call AC_PROG_CC implicitly through + dnl some other test run before OPAL_SETUP_CC. Try to make that harder. + m4_provide_if([AC_PROG_CC], + [m4_fatal([AC_PROG_CC called before OPAL_SETUP_CC])]) + # # Check for the compiler # - OPAL_VAR_SCOPE_PUSH([opal_cflags_save dummy opal_cc_arvgv0]) - opal_cflags_save="$CFLAGS" + OPAL_VAR_SCOPE_PUSH([dummy opal_cc_arvgv0]) + + AC_USE_SYSTEM_EXTENSIONS + AC_PROG_CC BASECC="`basename $CC`" - CFLAGS="$opal_cflags_save" - AC_DEFINE_UNQUOTED(OPAL_CC, "$CC", [OMPI underlying C compiler]) + OPAL_CC="$CC" + AC_DEFINE_UNQUOTED(OPAL_CC, "$OPAL_CC", [OMPI underlying C compiler]) set dummy $CC opal_cc_argv0=[$]2 OPAL_WHICH([$opal_cc_argv0], [OPAL_CC_ABSOLUTE]) AC_SUBST(OPAL_CC_ABSOLUTE) + OPAL_VAR_SCOPE_POP ]) diff --git a/config/opal_setup_wrappers.m4 b/config/opal_setup_wrappers.m4 index bbe11794535..79855a2bb14 100644 --- a/config/opal_setup_wrappers.m4 +++ b/config/opal_setup_wrappers.m4 @@ -11,7 +11,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. -dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2009-2021 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -43,8 +43,9 @@ AC_DEFUN([OPAL_WRAPPER_FLAGS_ADD], [ [$1], [CXXFLAGS], [OPAL_FLAGS_APPEND_UNIQ([wrapper_extra_cxxflags], [$2])], [$1], [FCFLAGS], [OPAL_FLAGS_APPEND_UNIQ([wrapper_extra_fcflags], [$2])], [$1], [LDFLAGS], [OPAL_FLAGS_APPEND_UNIQ([wrapper_extra_ldflags], [$2])], - [$1], [LIBS], [OPAL_FLAGS_APPEND_UNIQ([wrapper_extra_libs], [$2])], + [$1], [LIBS], [OPAL_FLAGS_APPEND_MOVE([wrapper_extra_libs], [$2])], [m4_fatal([Unknown wrapper flag type $1])]) + opal_show_verbose "Adding \"$2\" to \"$1\"" ]) @@ -79,13 +80,16 @@ AC_DEFUN([OPAL_WRAPPER_FLAGS_ADD], [ # _prefix, configure is not. There's no known use case for # doing so, and we'd like to force the issue. AC_DEFUN([OPAL_SETUP_WRAPPER_INIT],[ + dnl for OPAL_CC + AC_REQUIRE([OPAL_SETUP_CC]) + + opal_show_subtitle "Wrapper compiler setup" + OPAL_VAR_SCOPE_PUSH([wrapper_cc_tmp]) - # AC_PROG_CC_C99 changes CC (instead of CFLAGS) so this method - # must be called before OPAL_SETUP_CC. AC_ARG_WITH([wrapper_cc], [AS_HELP_STRING([--with-wrapper-cc=path], [Set a different wrapper C compiler than the one used to build Open MPI])], - [], [with_wrapper_cc="$CC"]) + [], [with_wrapper_cc="$OPAL_CC"]) AC_MSG_CHECKING([for wrapper C compiler]) @@ -96,10 +100,10 @@ AC_DEFUN([OPAL_SETUP_WRAPPER_INIT],[ # Get the full path to the wrapper compiler. If it doesn't exist # assume that the path is not currently valid. wrapper_tmp="$(type -p "$with_wrapper_cc")" - WRAPPER_CC="${wrapper_tmp:-$with_wrapper_cc}" if test -z "$wrapper_tmp" ; then - AC_MSG_WARN([could not find \"$with_wrapper_cc\" in path]) + AC_MSG_WARN([could not find "$with_wrapper_cc" in path]) fi + WRAPPER_CC=$with_wrapper_cc AC_MSG_RESULT([$WRAPPER_CC]) @@ -117,19 +121,19 @@ AC_DEFUN([OPAL_SETUP_WRAPPER_INIT],[ AS_IF([test "$with_wrapper_cflags_prefix" = "yes" || test "$with_wrapper_cflags_prefix" = "no"], [AC_MSG_ERROR([--with-wrapper-cflags-prefix must have an argument.])]) - AC_ARG_WITH([wrapper-cxxflags], - [AS_HELP_STRING([--with-wrapper-cxxflags], - [Extra flags to add to CXXFLAGS when using mpiCC/mpic++])]) - AS_IF([test "$with_wrapper_cxxflags" = "yes" || test "$with_wrapper_cxxflags" = "no"], - [AC_MSG_ERROR([--with-wrapper-cxxflags must have an argument.])]) - - AC_ARG_WITH([wrapper-cxxflags-prefix], - [AS_HELP_STRING([--with-wrapper-cxxflags-prefix], - [Extra flags to add to CXXFLAGS when using mpiCC/mpic++])]) - AS_IF([test "$with_wrapper_cxxflags_prefix" = "yes" || test "$with_wrapper_cxxflags_prefix" = "no"], - [AC_MSG_ERROR([--with-wrapper-cxxflags-prefix must have an argument.])]) - m4_ifdef([project_ompi], [ + AC_ARG_WITH([wrapper-cxxflags], + [AS_HELP_STRING([--with-wrapper-cxxflags], + [Extra flags to add to CXXFLAGS when using mpiCC/mpic++])]) + AS_IF([test "$with_wrapper_cxxflags" = "yes" || test "$with_wrapper_cxxflags" = "no"], + [AC_MSG_ERROR([--with-wrapper-cxxflags must have an argument.])]) + + AC_ARG_WITH([wrapper-cxxflags-prefix], + [AS_HELP_STRING([--with-wrapper-cxxflags-prefix], + [Extra flags to add to CXXFLAGS when using mpiCC/mpic++])]) + AS_IF([test "$with_wrapper_cxxflags_prefix" = "yes" || test "$with_wrapper_cxxflags_prefix" = "no"], + [AC_MSG_ERROR([--with-wrapper-cxxflags-prefix must have an argument.])]) + AC_ARG_WITH([wrapper-fcflags], [AS_HELP_STRING([--with-wrapper-fcflags], [Extra flags to add to FCFLAGS when using mpifort])]) @@ -340,6 +344,8 @@ AC_DEFUN([OPAL_SETUP_WRAPPER_FINAL],[ AC_MSG_CHECKING([for OPAL CPPFLAGS]) if test "$WANT_INSTALL_HEADERS" = "1" ; then OPAL_WRAPPER_EXTRA_CPPFLAGS='-I${includedir}/openmpi' + else + OPAL_WRAPPER_EXTRA_CPPFLAGS= fi OPAL_WRAPPER_EXTRA_CPPFLAGS="$OPAL_WRAPPER_EXTRA_CPPFLAGS $opal_mca_wrapper_extra_cppflags $wrapper_extra_cppflags $with_wrapper_cppflags" AC_SUBST([OPAL_WRAPPER_EXTRA_CPPFLAGS]) @@ -381,8 +387,9 @@ AC_DEFUN([OPAL_SETUP_WRAPPER_FINAL],[ # asked for, as they know better than us. AC_MSG_CHECKING([for OPAL LIBS]) OPAL_WRAPPER_EXTRA_LIBS="$opal_mca_wrapper_extra_libs" - OPAL_FLAGS_APPEND_UNIQ([OPAL_WRAPPER_EXTRA_LIBS], [$wrapper_extra_libs]) - OPAL_WRAPPER_EXTRA_LIBS="$OPAL_WRAPPER_EXTRA_LIBS $with_wrapper_libs" + OPAL_FLAGS_APPEND_MOVE([OPAL_WRAPPER_EXTRA_LIBS], [$wrapper_extra_libs]) + OPAL_FLAGS_APPEND_MOVE([OPAL_WRAPPER_EXTRA_LIBS], [$with_wrapper_libs]) + OPAL_FLAGS_APPEND_MOVE([OMPI_WRAPPER_EXTRA_LIBS], [$LIBS]) AC_SUBST([OPAL_WRAPPER_EXTRA_LIBS]) AC_MSG_RESULT([$OPAL_WRAPPER_EXTRA_LIBS]) ]) @@ -391,6 +398,8 @@ AC_DEFUN([OPAL_SETUP_WRAPPER_FINAL],[ AC_MSG_CHECKING([for OMPI CPPFLAGS]) if test "$WANT_INSTALL_HEADERS" = "1" ; then OMPI_WRAPPER_EXTRA_CPPFLAGS='-I${includedir}/openmpi' + else + OPAL_WRAPPER_EXTRA_CPPFLAGS= fi OMPI_WRAPPER_EXTRA_CPPFLAGS="$OMPI_WRAPPER_EXTRA_CPPFLAGS $ompi_mca_wrapper_extra_cppflags $wrapper_extra_cppflags $with_wrapper_cppflags" AC_SUBST([OMPI_WRAPPER_EXTRA_CPPFLAGS]) @@ -443,9 +452,9 @@ AC_DEFUN([OPAL_SETUP_WRAPPER_FINAL],[ AC_MSG_CHECKING([for OMPI LIBS]) OMPI_WRAPPER_EXTRA_LIBS="$ompi_mca_wrapper_extra_libs" - OPAL_FLAGS_APPEND_UNIQ([OMPI_WRAPPER_EXTRA_LIBS], [$wrapper_extra_libs]) - OMPI_WRAPPER_EXTRA_LIBS="$OMPI_WRAPPER_EXTRA_LIBS $with_wrapper_libs" - OPAL_FLAGS_APPEND_UNIQ([OMPI_WRAPPER_EXTRA_LIBS], [$LIBS]) + OPAL_FLAGS_APPEND_MOVE([OMPI_WRAPPER_EXTRA_LIBS], [$wrapper_extra_libs]) + OPAL_FLAGS_APPEND_MOVE([OMPI_WRAPPER_EXTRA_LIBS], [$with_wrapper_libs]) + OPAL_FLAGS_APPEND_MOVE([OMPI_WRAPPER_EXTRA_LIBS], [$LIBS]) AC_SUBST([OMPI_WRAPPER_EXTRA_LIBS]) AC_MSG_RESULT([$OMPI_WRAPPER_EXTRA_LIBS]) diff --git a/config/oshmem_config_files.m4 b/config/oshmem_config_files.m4 index 36385ab2fbc..00bdaafed2d 100644 --- a/config/oshmem_config_files.m4 +++ b/config/oshmem_config_files.m4 @@ -5,6 +5,8 @@ # Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017-2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2021 Amazon.com, Inc. or its affiliates. +# All Rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -17,10 +19,8 @@ AC_DEFUN([OSHMEM_CONFIG_FILES],[ oshmem/Makefile oshmem/include/Makefile oshmem/shmem/c/Makefile - oshmem/shmem/c/profile/Makefile oshmem/shmem/fortran/Makefile - oshmem/shmem/fortran/profile/Makefile oshmem/tools/oshmem_info/Makefile oshmem/tools/wrappers/Makefile diff --git a/config/oshmem_configure_options.m4 b/config/oshmem_configure_options.m4 index 0ceac244b40..8290b9684ac 100644 --- a/config/oshmem_configure_options.m4 +++ b/config/oshmem_configure_options.m4 @@ -28,22 +28,30 @@ AC_MSG_CHECKING([if want oshmem]) AC_ARG_ENABLE([oshmem], [AS_HELP_STRING([--enable-oshmem], [Enable building the OpenSHMEM interface (available on Linux only, where it is enabled by default)])]) + if test "$enable_oshmem" = "no"; then AC_MSG_RESULT([no]) elif test "$enable_oshmem" = ""; then - if test "$opal_found_linux" = "yes"; then + case $host_os in + linux*) AC_MSG_RESULT([yes]) - else + ;; + *) enable_oshmem=no AC_MSG_RESULT([not supported on this platform]) - fi + ;; + esac else AC_MSG_RESULT([yes]) - if test "$opal_found_linux" != "yes"; then + case $host_os in + linux*) + ;; + *) AC_MSG_WARN([OpenSHMEM support was requested, but currently]) AC_MSG_WARN([only supports Linux.]) AC_MSG_ERROR([Cannot continue]) - fi + ;; + esac fi # @@ -91,11 +99,6 @@ fi AC_DEFINE_UNQUOTED(OSHMEM_PARAM_CHECK, $shmem_param_check, [Whether we want to check OSHMEM parameters always or never]) -# -# check for on_exit -# -AC_CHECK_FUNCS([on_exit]) - # # OSHMEM profiling support # diff --git a/configure.ac b/configure.ac index 55c5f443bab..81c7a092c79 100644 --- a/configure.ac +++ b/configure.ac @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2006-2022 Cisco Systems, Inc. All rights reserved # Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights # reserved. @@ -23,13 +23,11 @@ # Copyright (c) 2014-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016-2017 IBM Corporation. All rights reserved. -# Copyright (c) 2018 Amazon.com, Inc. or its affiliates. +# Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. # All Rights reserved. # Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. # Copyright (c) 2019 Triad National Security, LLC. All rights # reserved. -# Copyright (c) 2020 Amazon.com, Inc. or its affiliates. -# All Rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -68,6 +66,9 @@ OPAL_CAPTURE_CONFIGURE_CLI([OPAL_CONFIGURE_CLI]) # because it twiddles random bits of autoconf OPAL_LOAD_PLATFORM +# Start a list of packages / modules / etc. that want to disable "make dist". +OPAL_MAKEDIST_DISABLE="" + # # Start it up # @@ -93,15 +94,6 @@ AS_IF([test "$host" != "$target"], AC_MSG_WARN([Cross-compiling is only partially supported]) AC_MSG_WARN([Proceed at your own risk!])]) -# AC_USE_SYSTEM_EXTENSIONS alters CFLAGS (e.g., adds -g -O2) -OPAL_VAR_SCOPE_PUSH([CFLAGS_save]) -CFLAGS_save=$CFLAGS -AC_USE_SYSTEM_EXTENSIONS -# AC_USE_SYSTEM_EXTENSIONS will modify CFLAGS if nothing was in there -# beforehand. We don't want that. So if there was nothing in -# CFLAGS, put nothing back in there. -AS_IF([test -z "$CFLAGS_save"], [CFLAGS=]) -OPAL_VAR_SCOPE_POP # # Init automake @@ -290,8 +282,6 @@ m4_ifdef([project_oshmem], ############################################################################ OPAL_CONFIGURE_OPTIONS -OPAL_CHECK_OS_FLAVORS -OPAL_CHECK_CUDA m4_ifdef([project_ompi], [OMPI_CONFIGURE_OPTIONS]) m4_ifdef([project_oshmem], [OSHMEM_CONFIGURE_OPTIONS]) @@ -346,10 +336,6 @@ m4_ifdef([project_ompi], AC_ENABLE_SHARED AC_DISABLE_STATIC -# Must be called before OPAL_SETUP_CC to get the value of CC -# before it is modified by the C99/C11 checks. -OPAL_SETUP_WRAPPER_INIT - ################################## # Check for known incompatibility ################################## @@ -394,10 +380,6 @@ AM_CONDITIONAL(OMPI_NEED_WINDOWS_REPLACEMENTS, # Do all Interix detections if necessary OMPI_INTERIX -# Does the compiler support "ident"-like constructs? - -OPAL_CHECK_IDENT([CC], [CFLAGS], [c], [C]) - # # Check for some types # @@ -550,6 +532,29 @@ OPAL_C_GET_ALIGNMENT(size_t, OPAL_ALIGNMENT_SIZE_T) OPAL_CHECK_ALT_SHORT_FLOAT +# Check system alignment requirements +if test "$opal_want_heterogeneous" = 1; then + ompi_cv_c_word_size_align=yes +else + AC_CACHE_CHECK([if word-sized integers must be word-size aligned], + [ompi_cv_c_word_size_align], + [AC_LANG_PUSH(C) + AC_RUN_IFELSE([AC_LANG_PROGRAM([dnl +#include ], [[ long data[2] = {0, 0}; + long *lp; + int *ip; + ip = (int*) data; + ip++; + lp = (long*) ip; + return lp[0]; ]])], + [ompi_cv_c_word_size_align=no], + [ompi_cv_c_word_size_align=yes], + [ompi_cv_c_word_size_align=yes])]) +fi +AS_IF([test $ompi_cv_c_word_size_align = yes], [results=1], [results=0]) +AC_DEFINE_UNQUOTED([OPAL_ALIGN_WORD_SIZE_INTEGERS], [$results], + [set to 1 if word-size integers must be aligned to word-size padding to prevent bus errors]) + # # Check for other compiler characteristics # @@ -557,10 +562,6 @@ OPAL_CHECK_ALT_SHORT_FLOAT OPAL_C_WEAK_SYMBOLS OPAL_C_MACRO_WEAK_SYMBOLS -if test "x$CC" = "xicc"; then - OPAL_CHECK_ICC_VARARGS -fi - # If we want the profiling layer: # - If the C compiler has weak symbols, use those. # - If not, then set to compile the code again with #define's in a @@ -600,6 +601,22 @@ opal_show_subtitle "Compiler characteristics" OPAL_CHECK_ATTRIBUTES OPAL_CHECK_COMPILER_VERSION_ID +# Open MPI only supports GCC >=v4.8.1. Notes: +# +# 1. The default compiler that comes with RHEL 7 is v4.8.5 (version ID +# 264197). +# 2. We regularly test with GCC v4.8.1 (version ID 264193). +# 3. GCC 4.8.0 probably also works; we just haven't tested it. +# +# Since we regularly test with 4.8.1, that's what we check for. +AS_IF([test "$opal_cv_compiler_FAMILYNAME" = "GNU" && \ + test "$opal_cv_compiler_VERSION" -lt 264193], + [AC_MSG_WARN([Open MPI no longer supports versions of the GNU compiler suite]) + AC_MSG_WARN([less than v4.8.1.]) + AC_MSG_WARN([Please upgrade your GNU compiler suite, or use]) + AC_MSG_WARN([a different compiler to build Open MPI.]) + AC_MSG_ERROR([Cannot continue]) + ]) ################################## # Java MPI Binding request @@ -651,6 +668,14 @@ AM_CONDITIONAL(OSHMEM_BUILD_FORTRAN_BINDINGS, AC_CACHE_SAVE +################################## +# Wrapper compilers. +# +# Must be called before MCA system +################################## +OPAL_SETUP_WRAPPER_INIT + + ################################## # Header files ################################## @@ -665,7 +690,7 @@ AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ sys/fcntl.h sys/ipc.h sys/shm.h \ sys/ioctl.h sys/mman.h sys/param.h sys/queue.h \ sys/resource.h sys/select.h sys/socket.h sys/sockio.h \ - sys/stat.h sys/statfs.h sys/statvfs.h sys/time.h sys/tree.h \ + sys/stat.h sys/statfs.h sys/statvfs.h time.h sys/time.h sys/tree.h \ sys/types.h sys/uio.h sys/un.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \ termios.h ulimit.h unistd.h util.h utmp.h malloc.h \ ifaddrs.h crt_externs.h regex.h mntent.h paths.h \ @@ -854,6 +879,15 @@ AC_INCLUDES_DEFAULT #endif ]) +AC_CHECK_MEMBERS([struct timespec.tv_nsec], + [], [], [AC_INCLUDES_DEFAULT +#ifdef HAVE_TIME_H +#include +#endif +#ifdef HAVE_SYS_TIME_H +#include +#endif]) + # # Find corresponding types for MPI_Aint, MPI_Count, and MPI_Offset. # And if relevant, find the corresponding MPI_ADDRESS_KIND, @@ -872,7 +906,8 @@ AC_MSG_CHECKING([the linker for support for the -fini option]) OPAL_VAR_SCOPE_PUSH([LDFLAGS_save]) LDFLAGS_save=$LDFLAGS LDFLAGS="$LDFLAGS_save -Wl,-fini -Wl,finalize" -AC_TRY_LINK([void finalize (void) {}], [], [AC_MSG_RESULT([yes]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([void finalize (void) {}], [])], + [AC_MSG_RESULT([yes]) opal_ld_have_fini=1], [AC_MSG_RESULT([no]) opal_ld_have_fini=0]) LDFLAGS=$LDFLAGS_save @@ -903,7 +938,7 @@ OPAL_SEARCH_LIBS_CORE([ceil], [m]) # -lrt might be needed for clock_gettime OPAL_SEARCH_LIBS_CORE([clock_gettime], [rt]) -AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog vsyslog regcmp regexec regfree _NSGetEnviron socketpair usleep mkfifo dbopen dbm_open statfs statvfs setpgid setenv __malloc_initialize_hook __clear_cache]) +AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog vsyslog regcmp regexec regfree _NSGetEnviron socketpair usleep mkfifo dbopen dbm_open statfs statvfs setpgid setenv __malloc_initialize_hook __clear_cache on_exit]) # Sanity check: ensure that we got at least one of statfs or statvfs. if test $ac_cv_func_statfs = no && test $ac_cv_func_statvfs = no; then @@ -962,6 +997,9 @@ AC_CACHE_SAVE opal_show_title "System-specific tests" +OPAL_CHECK_CUDA +OPAL_CHECK_OS_FLAVORS + # Do we have _SC_NPROCESSORS_ONLN? (only going to pass if we also have # and sysconf(), which is ok) OS X 10.4 has and # sysconf(), but does not have _SC_NPROCESSORS_ONLN. Doh! @@ -1003,10 +1041,14 @@ AC_PROG_GREP AC_PROG_EGREP # -# We need as and lex +# We need as and flex # AM_PROG_AS -AM_PROG_LEX + +dnl Note that prior to AC v2.70, PROG_LEX did not take any arguments. +dnl But it is harmless to pass an argument to it ($1 will just be +dnl ignored). +AC_PROG_LEX([noyywrap]) # If we don't have Flex and we don't have a generated .c file # (distribution tarballs will have the .c file included, but git @@ -1014,18 +1056,17 @@ AM_PROG_LEX # Lex are not workable (all things being equal, since this is *only* # required for developers, we decided that it really was not worth it # to be portable between different versions of lex ;-). - -if test -z "$LEX" || \ +AS_IF([test -z "$LEX" || \ test -n "`echo $LEX | $GREP missing`" || \ - test "`basename $LEX`" != "flex"; then - if test ! -f "$srcdir/opal/util/show_help_lex.c"; then - AC_MSG_WARN([*** Could not find Flex on your system.]) + test "`basename $LEX`" != "flex"], + [AS_IF([test ! -f "$srcdir/opal/util/show_help_lex.c"], + [AC_MSG_WARN([*** Could not find Flex on your system.]) AC_MSG_WARN([*** Flex is required for developer builds of Open MPI.]) AC_MSG_WARN([*** Other versions of Lex are not supported.]) AC_MSG_WARN([*** NOTE: If you are building a tarball downloaded from www.open-mpi.org, you do not need Flex]) AC_MSG_ERROR([Cannot continue]) - fi -fi + ]) + ]) # # Setup man page processing @@ -1499,6 +1540,10 @@ AC_CONFIG_FILES([contrib/dist/mofed/debian/rules], AC_CONFIG_FILES([contrib/dist/mofed/compile_debian_mlnx_example], [chmod +x contrib/dist/mofed/compile_debian_mlnx_example]) +AS_IF([test -n "$OPAL_MAKEDIST_DISABLE"], + [AC_MSG_WARN(["make dist" will be disabled due to: $OPAL_MAKEDIST_DISABLE])]) +AC_SUBST([OPAL_MAKEDIST_DISABLE]) + OPAL_CONFIG_FILES m4_ifdef([project_ompi], [OMPI_CONFIG_FILES]) m4_ifdef([project_oshmem], [OSHMEM_CONFIG_FILES]) diff --git a/contrib/doc-ft-components/DEVEL.FT-REQUIREMENTS.md b/contrib/doc-ft-components/DEVEL.FT-REQUIREMENTS.md new file mode 100644 index 00000000000..f6f0306a2b4 --- /dev/null +++ b/contrib/doc-ft-components/DEVEL.FT-REQUIREMENTS.md @@ -0,0 +1,777 @@ +ULFM OPEN MPI COMPONENTS DESIGN AND REQUIREMENTS FOR FAULT TOLERANT OPERATION +============================================================================= + +This documents the requirements and best practices for Open MPI +components to support operating accross failures with the +**User Level Failure Mitigation (ULFM)** Open MPI implementation. +___________________________________________________________________________ +[TOC] +___________________________________________________________________________ + +Introduction +============ + +This document concerns itself with the support of the User Level Failure +Mitigation (ULFM) MPI Standard draft proposal in Open MPI. The ULFM proposal +is developed by the MPI Forum's Fault Tolerance Working Group to support +the continued operation of MPI programs after crash (node failures) have +impacted the execution. The key principle is that no MPI call (point-to-point, +collective, RMA, IO, ...) can block indefinitely after a failure, but must +either succeed or raise an MPI error. + +Open MPI contains the general infrastructure to support this mode of operation. +MPI operations may produce three supplementary error codes and the recovery +of the MPI capability for communicating with five supplementary interfaces +(see [http://fault-tolerance.org/wp-content/uploads/2012/10/20170221-ft.png] +(ULFM chapter) standard draft document). + ++ `MPIX_ERR_PROC_FAILED` when a process failure prevents the completion of + an MPI operation. ++ `MPIX_ERR_PROC_FAILED_PENDING` when a potential sender matching a non-blocking + wildcard source receive has failed. ++ `MPIX_ERR_REVOKED` when one of the ranks in the application has invoked the + `MPI_Comm_revoke` operation on the communicator. ++ `MPIX_Comm_revoke(MPI_Comm comm)` Interrupts any communication pending on + the communicator at all ranks. ++ `MPIX_Comm_shrink(MPI_Comm comm, MPI_Comm* newcomm)` creates a new + communicator where dead processes in comm were removed. ++ `MPIX_Comm_agree(MPI_Comm comm, int *flag)` performs a consensus (i.e. fault + tolerant allreduce operation) on flag (with the operation bitwise or). ++ `MPIX_Comm_failure_get_acked(MPI_Comm, MPI_Group*)` obtains the group of + currently acknowledged failed processes. ++ `MPIX_Comm_failure_ack(MPI_Comm)` acknowledges that the application intends + to ignore the effect of currently known failures on wildcard receive + completions and agreement return values. + +While the general infrastructure for fault tolerance support is available in +Open MPI, communication components (i.e., BTL, PML, MTL components) need to +refrain from aborting the whole application, and return +errors for fault event detected during communication operations to the upper +layers of the Open MPI infrastructure to handle the error condition. +The following sections describe specific requirements for the named area. + +Pathways for reporting failures +=============================== + +Process failures can be detected through two separate mechanisms as illustrated +in the following figure: + +![Fault detection pathways](./general-ulfm-fault-handling.png) + +1. The network transport is incapable of completing some communication operation + with the peer (send, recv, atomic operation, etc.). The network transport + component (BTL, MTL) should report the failure to the PML, so that the PML + has a chance of reacting, performing necessary cleanup, and initiate recovery + actions as needed by trigerring the `errhandler_proc_failed_internal` function. + +2. The runtime environment (PRTE) has identified a peer failure. A PMIx event + notification containing the identification of the impacted processes will be + received from the PMIx thread, and transitionned as an event in the main + OMPI progress loop. This event will cause the OMPI errhandler function + `errhandler_proc_failed_internal` to be called. + +Error Completion and Requests +----------------------------- + +Once a failure has been reported, MPI will have to complete requests that are +representing communication operations with the failed process(es). The operation +may involve the failed process directly (a receive from a failed process), or +indirectly (a collective operation in which a failed process should participate; +an ANY_SOURCE receive). + +The `errhandler_proc_failed_internal` function considers all communicators in turn +and if a failed process appears in the group of the communicator, 1) pending +any-source receive on comm return an error; 2) collective operations +on comm will be **revoked**, that is, complete at all ranks (in error at some of the +ranks, but maybe not all); 3) point-to-point communication with a failed peer are +completed in error. + +Request fields used for Fault Tolerance +--------------------------------------- + +In order to track the error status of a request, some fields of a request must always +be initialized and updated correctly. + +* `req->req_status.MPI_ERROR`: The error status field of the request needs to be + initialized to `OMPI_SUCCESS` when the request is allocated. This field will be + updated by `ompi_request_wait` and similar functions to mark the request as in + need of attention. Of note: this is the internal status of the request, and + it can be initialized without undesirable side effect on the `MPI_STATUS` object + returned from `MPI_WAIT` (and similar). + +* `req->req_type`: Fault tolerance operations support the following types of requests: + `OMPI_REQUEST_PML` (a point-to-point request), `OMPI_REQUEST_COLL` (a non-blocking + collective communication request), and `OMPI_REQUEST_COMM` (a communicator creation + request, e.g., `ompi_comm_dup`, `ompi_comm_idup`). Other types of requests are not yet + supported and if your component requires such support, cases handling new types of + requests must be added in `ompi/request/req_ft.c::ompi_request_is_failed_fn`. + +* `req->req_mpi_object`: This field must contain the `comm` on which the request operates. + This is true for PML, NBC, as well as COMM types of requests (see above). + +* `pml_req->req_peer`: for a PML type request, this field must contain the `peer` that the + request is trying to communicate with. This field is used to identify requests that + directly involve failed processes. + +* `pml_req->req_tag`: for a PML type request, this field is used to identify requests that + represent collective operations (refer to `ompi_request_tag_is_collective()` for the + tag space), or resilient operations, i.e., for implementing internal resilient operations, + like `MPI_COMM_AGREE`, `MPI_COMM_SHRINK`, etc. (refer to `ompi_request_tag_is_ft()`). + +Wait-sync +--------- + +The **wait_sync** Multithreaded wait-synchronization object is an optimization that reduces +concurrent active polling in `ompi_request_wait` and other similar functions. In order for +requests to be rechecked for fault error condition (and hence completed in error), the +wait_sync must be interrupted. Note that all sync objects will be interrupted upon an +error. Request whose state is not failed will return into a new sync object; request +whose state is failed will complete in error and return control to the user. The +`sync->status` field will contain the reason for interrupting the sync object. + +Modifications to the wait-sync logic must pay attention to not damage the error path. +That is, a meaningfull error must be present in `sync->status` when an error condition is +found, and the `wait_sync_global_wakeup()` function does require tracking of all currently +active `sync` objects. In the `pthreads` sync implementation, the management of multiple +threads each using a different `sync` object also uses a list of active `sync`, which is +parsed by the global wakeup function. The only needed adaptation for supported +fault tolerance in the `pthreads sync `is to also populate the `sync` list in the +single-threaded code path (so that it can be found by global wakeup using the same +mechanism as for the multi-threaded case). + +BTL components +============== + +In order to support fault tolerance, BTL components must provide the following functionality: + +Driver error capture +-------------------- + +Capture and convert driver error codes returned from the NIC driver operations (e.g., +`fi_write`). Most driver functions provide an error code that indicate error conditions on a +per-operation basis by default. Some network drivers may require setting the endpoint in a +special mode to enable error reporting. For example, the `UCX` driver aborts within the driver +function itself if the endpoint is not created with the `UCT_IFACE_PARAM_FIELD_ERR_HANDLER` mode. + +Driver error verbosity +---------------------- + +The BTL that encounters a process failure or unexpected peer disconnect can output an error +message to the console using the `BTL_PEER_ERROR` macro. Non process fault error should use +the `BTL_ERROR` macro instead. When fault tolerance is inactive (default), both macros are +equally verbose and will inform the end-user of the details of the communication issue. +However, when fault tolerance is active, process failure events are correctible, and +verbose error reporting is generally undesired at the BTL level. Thus, unlike the +`BTL_ERROR` macro, the `BTL_PEER_ERROR` macro is silent when fault tolerance is active. +For debugging purposes, the messages produced by `BTL_PEER_ERROR` can be enabled with +fault tolerance by either setting the `btl_base_verbose` MCA to a value greater than 1, +or by setting the `"btl_base_warn_peer_error"` to `true`. + +Do not abort within the component +--------------------------------- + +Refrain from aborting within the component itself when an error is reported by a driver +function. Driver errors should be converted to an appropriate `OPAL_ERR_XYZ` and transmitted +to the caller component. For example, in the BTL `ofi` component, every error would be +treated as fatal by calling the `MCA_BTL_OFI_ABORT()`. As a replacement, the `ofi` BTL has +been adapted to replace these aborting calls with `BTL_PEER_ERROR`, followed by +`mca_btl_ofi_context_finalize()` to clean the endpoint associated with the error peer, +before returning the error code `OPAL_ERR_UNREACH` to the caller. + +The following code sample shows one place where the OFI BTL is adapted to avoid aborting +when a CQ read error occurs, output an error message with the appropriate verbosity, +perform endpoint cleanup, and return an appropriate error code to the upper layer. This +change (and similar changes at other places) are sufficient to enable resumed operation +of OB1 over the OFI BTL, and thus fault recovery. + + diff --git a/opal/mca/btl/ofi/btl_ofi_context.c b/opal/mca/btl/ofi/btl_ofi_context.c + index bd399c62..ffafb96c 100644 + --- a/opal/mca/btl/ofi/btl_ofi_context.c + +++ b/opal/mca/btl/ofi/btl_ofi_context.c + @@ -437,13 +437,14 @@ int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) { + + /* cq readerr failed!? */ + if (0 > ret) { + - BTL_ERROR(("%s:%d: Error returned from fi_cq_readerr: %s(%d)", + + BTL_PEER_ERROR(NULL, ("%s:%d: Error returned from fi_cq_readerr: %s(%d)", + __FILE__, __LINE__, fi_strerror(-ret), ret)); + } else { + - BTL_ERROR(("fi_cq_readerr: (provider err_code = %d)\n", + + BTL_PEER_ERROR(NULL, ("fi_cq_readerr: (provider err_code = %d)\n", + cqerr.prov_errno)); + } + - MCA_BTL_OFI_ABORT(); + + mca_btl_ofi_context_finalize(context, context->rx_ctx != context->tx_ctx); + + return OPAL_ERR_UNREACH; + } + #ifdef FI_EINTR + /* sometimes, sockets provider complain about interupt. We do nothing. */ + +Complete affected fragments +--------------------------- + +Closing the affected endpoint should have the effect of completing the associated +fragments pending on the endpoint. If fragments remain pending in the network interface, +RDMA operations may still be updating the user buffers and prevent the PML level from +releasing the associated MPI requests. Thus, the BTL should complete and cleanup +pending fragments and update the associated requests so that request completion +(potentially in error) can be safely reported by the PML layer to the end-user. + +Use the error callback mechanism +-------------------------------- + +The BTL should invoke the error callback registered by the PML when a process error is +produced within the component. The BTL module contains the API funcion `btl_register_error`. +The PML will attach a callback for managing the error at the PML level. A good example of +using the error callback is found in the TCP BTL. When an error is produced within the +TCP BTL, the endpoint is marked with a special internal flag, and is then closed. Upon +closing an endpoint, the error status is checked, and if an error condition caused the +endpoint closing, the registered error callback is triggered. + +MTL Components +============== + +In general terms, the same functionalities must be provided by MTL components to support +fault tolerance. The MTL component should not abort internally. The peer failure should be +visible to the `cm` PML through error codes passed in the return values of the MTL module calls. +Most of the actual management of the error is expected to happen at the level of the `cm` PML. + +PML Components +============== + +Capturing errors with the PML +----------------------------- + +The PML should not abort internally, but instead set error codes in the internal status +of requests that have been impacted by a failure. The PML can capture errors from the +BTL components through the registered error callback that can be attached using the +`bml_register` interface. This callback is a prime position in the software stack to +transfer the handling of process failure errors to the cleanup and recovery part of the +MPI library, i.e., to invoke the errhandler `errhandler_proc_failed_internal` function. + +This function will then be in charge of updating requests, proc arrays, communicators, etc, +and will also call-back into the PML to trigger the cleanup of PML level structures and +matching queues in a generic and organized way. + +Cancellation of failed requests +------------------------------- + +When a request is identified as being in error due to a process failure, it needs to be +completed. This is achieved by having MPI completing calls (e.g., MPI_WAIT) to reconsider +the status of the requests. If a request is found to be in an error state, the request is +first cancelled, and then completed in error. The reason for cancelling the request instead +of directly completing it is to leave an opportunity for the BTL to purge pending fragments +and RDMA orders on the buffer associated with the request, hence ensuring that the request +is `pml_complete` before it is marked as `mpi_complete` (to enforce that the user-buffer is not +updated past the completion of the request). + +A consequence is that it is expected that the PML supports the cancellation of requests +that communicate with failed processes. The following code-snippet illustrates how the +cancellation of requests in error is managed in the OB1 PML. Note how general support for +cancellation of request to/from non-failed processes is not required. + + if( true != request->req_match_received ) { /* the match has not been already done */ + assert( OMPI_ANY_TAG == ompi_request->req_status.MPI_TAG ); /* not matched isn't it */ + if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { + opal_list_remove_item( &ob1_comm->wild_receives, (opal_list_item_t*)request ); + } else { + mca_pml_ob1_comm_proc_t* proc = mca_pml_ob1_peer_lookup (comm, request->req_recv.req_base.req_peer); + opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); + } + } + else { /* it has matched */ + if( ompi_comm_is_proc_active( comm, request->req_recv.req_base.req_peer, + OMPI_COMM_IS_INTER(comm) ) ) { + opal_output_verbose(10, ompi_ftmpi_output_handle, + "Recv_request_cancel: cancel denied for request %p because it has matched peer %d\n", + (void*)request, request->req_recv.req_base.req_peer); + return OMPI_SUCCESS; + } + else { + /* This process is dead, therefore this request is complete */ + opal_output_verbose(10, ompi_ftmpi_output_handle, + "Recv_request_cancel: cancel granted for request %p because peer %d is dead\n", + (void*)request, request->req_recv.req_base.req_peer); + } + } + /** + * As now the PML is done with this request we have to force the pml_complete + * to true. Otherwise, the request will never be freed. + */ + + ompi_request->req_status._cancelled = true; + recv_request_pml_complete(request); + +Supporting the Revoke operation +------------------------------- + +The `MPIX_COMM_REVOKE` operation lets the user interrupt all ongoing operation on `comm`. +The principal use case is to stop the ongoing communication pattern when a process +failure error is reported at a given rank. Once a rank has experienced an error, +it is likely that it needs to stop communicating according to the pre-fault +communication pattern, and must instead enter a distinct communication pattern +to recover MPI object or application dataset. However, another process that has +a pending operation that peer with that rank is at risk of deadlocking in that +case: its pending operation does not necessarily peer with a failed process, thus may +not be interrupted with an `MPI_ERR_PROC_FAILED` error. Yet, that operation will not be +matched, hence it may never complete. The solution is for the rank that initially +experienced the process fault error to call the `MPIX_COMM_REVOKE` operation when it +is intent in abandonning an ongoing communication pattern. + +The `MPIX_COMM_REVOKE` operation will cause all operations on `comm` (pending or future) +to be interrupted with the specific error class `MPI_ERR_REVOKED`. This in turn will +ensure that no process deadlock on unmatched operations. + +It is unlikely that a developper will need to concern himself with the reliable +broadcast algorithm that propagates the revoke order between the processes of +a communicator. On the other hand, when a communicator is revoked, the PML needs to +identify which requests need to be interrupted, and do so in an orderly fashion. + +When a communicator is revoked, the new PML module function `pml_revoke_comm()` is +called. This function considers the unexpected fragments and matching queues, and +makes a determination of how to best interrupt the corresponding requests. A +difficult part of this operation is that the communication must be interrupted +in a state that is consistent between the two peers. For example, if a send +request is revoked before completion, it is mandatory that the corresponding +recv request is also revoked (otherwise a deadlock would arise). Multiple +implementation choices are possible, but lets describe how the PML OB1 cleans +ongoing requests. The OB1 implementation makes a determination of when to +cancel an ongoing communication by considering the state of the request at +the receiver. There are 3 cases: + +1. Send requests are never directly revoked. The decision to revoke a particular + communication is taken at the receiver end, and the communication protocol + will inform the sender of what actions to take if the receiver decided to revoke + the communication. +2. The receive request is not present in the matching queue anymore (it has matched, + and the communication protocol has advanced to the next stage (possibly RDMA + operations at the NIC level). Cancelling this case can be very difficult and is not + always supported by all network types. For this reason, in this case, the operation + is allowed to proceed to completion. Since the request has already matched, as long + as the sender does not fail, this request will complete. +3. If the receive request is still present in the matching queue (that is, the + request has not matched yet), or is just posted. In this case, the request is marked + with the appropriate error code, and is removed from the matching queue. The communicator + is marked as revoked already. +4. A new fragment is received, or a fragment is available in one of the unexpected + queues. In this case, the fragment has not yet matched a receive request. The + receiver decides that the communication is now revoked. Instead of sending the + normal control message that initiates the next stage of the communication protocol + (e.g., rendez-vous, rdma pipeline, etc.), the receiver answers to the frag + with a NACK control message that indicates to the sender that + the communication is now completed in error and no further steps should + be taken at the sender to continue that send-request. + +Again, different PML/MTL may have different implementation for stopping ongoing +communication when a revoke occurs. The described implementation has the major +advantage that it does not require the BTL/network driver to be able to +cancel or interrupt ongoing RDMA operations. Another implementation may be able +to close an endpoint, thus causing all ongoing operations to implicitely terminate. + +Note that legacy components may use static initializers to fill-in the component +module structure. This practice should be avoided in general, because it will +cause silently compiling incorrect code when the module function ordering changes. +The addition of `pml_revoke_comm` is one such change. It is recommended +that all `pml` components move to named fields static initializers to ensure the +appropriate functions are set to the appropriate module fields regardless of +field ordering. + +Collective operations +===================== + +The agree operations +-------------------- + +The `coll` framework module now includes two new operations `coll_agree` and `coll_iagree`. +It is expected that, at least at an initial stage, most `coll` framework will not +provide their own implementation of the agreement operations. When fault tolerance +is runtime disabled, a base implementation of these operations is provided +(`coll_base_agree_noft`) by the `coll_basic` module and will be installed as the +default implementation. That is, a collective module does not need to implement +an agreement. + +The `coll_ftagree` component provides an optimized fault tolerant +implementation of these operations, and is loaded when fault tolerance is +enabled at runtime. If a collective component wants to substitute its own +implementation of the agreement, the normal priority rules for component selection +apply (i.e., users and develloppers can tweak the values of MCA parameters +`coll_ftagree_priority` and `coll_xyz_priority` to prefer one over the other. + +Note that legacy components may use static initializers to fill-in the component +module structure. This practice should be avoided in general, because it will +cause silently compiling incorrect code when the module function ordering changes. +The addition of `coll_agree` and `coll_iagree` is one such change. It is recommended +that all `coll` components move to named fields static initializers to ensure the +appropriate functions are set to the appropriate module fields regardless of +field ordering. + +Interrupting collectives in error +--------------------------------- + +The same issue that commands the availability of the `MPIX_COMM_REVOKE` can also +happen within the implementation of the collective operation itself. Many `coll` +components use a set of point-to-point operations internally, and when a failure +is reported at a given step in the collective operation at a particular rank, it +does not entail that other ranks have also observed that same failure. Thus, if +the rank that has experienced a point-to-point error jumps out of the collective +operation without completing the full communication schedule for the collective +communication, it may leave other rank pending on unmatched collective internal +point-to-point communication operation. In that case, the collective operation +would deadlock at the ranks that have not observed the failure through +point-to-point operations with the failed process. + +Two approaches are possible to eliminate this issue. + +1. The first approach requires heavy modification of the `coll` component to support +fault tolerance operation. It is based on unconditionally completing the communication +schedule of the collective operation, but substituting the actual content of +post-error messages with special values indicating that the collective operation +should cause an MPI error at the peer rank. This in essence requires the implementation +of the collective operations to itself become fault-aware and, to some extent resilient. +As a consequence, this approach is expected to be used only in special cases (e.g., it +is currently used in the implementation of MPI_COMM_SPAWN), but in the general case, +collective frameworks require less intrusive modifications when using the second mechanism. +When a collective operation wants to use this methodology, it needs to add its collective +tags to the range of reserved collective tags that are non-interruptible (see the +function `ompi_request_tag_is_ft()` for details). + +2. The second approach, which is provided by the shared Open MPI infrastructure, +is for the occurence of a fault at any rank to cause the interruption of all +collective operations at all ranks on the communicator. This feature is +implemented by the `errhandler_proc_failed_internal()` calling the `ompi_comm_revoke()` +operation internally, with the special mode `coll_only`. This call will cause the +interruption of all requests on `comm` with a collective tag (i.e., one of the tags +in the range defined by function `ompi_request_tag_is_collective()`) with the error +`MPI_ERR_PROC_FAILED`. Thanks to this mechanism, the collective implementation +can do per-request error handling, and bail-out from the collective communication +pattern as soon as one error is reported. The implicit revoke of collective +operations will ensure that other ranks will either complete the collective +successfully (if all contributions to their output buffers were available +despite the failure), or be appropriately interrupted during one of the +internal communication operations, even when it does not directly peers with +a failed process. + +Internal request cleanup +------------------------ + +Collective operations often require the use of internal requests to follow +the advancement of internal point-to-point operation comprising the +communication pattern. Care must be taken to not simply `ompi_request_free` +these requests in the error case, as doing so does not enforce completion. +That is, the buffer associated with such a request may still be receiving +updates from the network receiving message fragments, or RDMA updates. + +Instead, the cleanup of such requests must wait that they complete in error +(as they will, due to the mechanism discussed in the above paragrah). Note +that only process failure errors are garanteed to complete in error. Other +types of errors may deadlock if the request is waited. An example of the +appropriate cleanup of fault-interrupted requests is found in `coll_base_util.c` + + @@ -76,6 +76,26 @@ int ompi_coll_base_sendrecv_actual( const void* sendbuf, size_t scount, + if (MPI_STATUS_IGNORE != status) { + status->MPI_ERROR = err; + } + + if( MPI_REQUEST_NULL != req ) { + +#if OPAL_ENABLE_FT_MPI + + if( MPI_ERR_PROC_FAILED == req->req_status.MPI_ERROR + + || MPI_ERR_PROC_FAILED_PENDING == req->req_status.MPI_ERROR + + || MPI_ERR_REVOKED == req->req_status.MPI_ERROR ) { + + /* We cannot just 'free' and forget, as the PML/BTLS would still + + * be updating the request buffer after we return from the MPI + + * call! + + * For other errors that do not have a well defined post-error + + * behavior, calling the cancel/wait could deadlock, so we just + + * free, as this is the best that can be done in this case. */ + + ompi_request_cancel(req); + + ompi_request_wait(&req, MPI_STATUS_IGNORE); + + if( MPI_ERR_PROC_FAILED_PENDING == err ) { + + err = MPI_ERR_PROC_FAILED; + + } + + } else /* this 'else' intentionaly spills outside the ifdef */ + +#endif /* OPAL_ENABLE_FT_MPI */ + + ompi_request_free(&req); + + } + return (err); + +Non-blocking collectives and Generalized requests +------------------------------------------------- + +Non-blocking collective and generalized requests operations can be comprised of +sub-requests. The final error that is presented to the end-user is presented +in the completion status of the top-level request. It is however common for the +top-level request to simply orchestrate the submission and completion of +a number of sub-requests, rather than engaging directly in communication +operations. The general design for fault tolerance in this case is that the +lowest level of requests (that is, request that represent actual communication +that go down to the BTL/MTL layers) produce errors. These errors are then captured +during the execution of the sub-request schedule. If a sub-request is found in error, +the schedule is interrupted, the higher level request is also marked in error, and +control is returned to the upper level of the schedule, up until the top-level request +is also marked in error. Note that like normal collective operations, the implicit +revocation of collective operations on the communicator impacted by a failure +ensures that it is safe to bail-out from the schedule in a localized manner. +Other ranks will also experience an error in operations that are blocking thus +ensuring that no deadlock arises. + +A good example of that mode of operation can be found in the +`libnbc` collective communication component progres in `mca/coll/libnb/nbc.c`: + + @@ -334,8 +336,20 @@ int NBC_Progress(NBC_Handle *handle) { + /* don't call ompi_request_test_all as it causes a recursive call into opal_progress */ + while (handle->req_count) { + ompi_request_t *subreq = handle->req_array[handle->req_count - 1]; + +#if OPAL_ENABLE_FT_MPI + + if (REQUEST_COMPLETE(subreq) + + || OPAL_UNLIKELY( ompi_request_is_failed(subreq) )) { + +#else + if (REQUEST_COMPLETE(subreq)) { + +#endif /* OPAL_ENABLE_FT_MPI */ + if(OPAL_UNLIKELY( OMPI_SUCCESS != subreq->req_status.MPI_ERROR )) { + +#if OPAL_ENABLE_FT_MPI + + if( MPI_ERR_PROC_FAILED == subreq->req_status.MPI_ERROR || + + MPI_ERR_PROC_FAILED_PENDING == subreq->req_status.MPI_ERROR || + + MPI_ER_REVOKED == subreq->req_status.MPI_ERROR ) { + + NBC_DEBUG (1, "MPI Error in NBC subrequest %p : %d)", subreq, subreq->req_status.MPI_ERROR); + + } else // this 'else' intentionally spills outside the ifdef + +#endif /* OPAL_ENABLE_FT_MPI */ + NBC_Error ("MPI Error in NBC subrequest %p : %d", subreq, subreq->req_status.MPI_ERROR); + /* copy the error code from the underlying request and let the + * round finish */ + +Changing CID algorithms +----------------------- + +This section describes some specificities of the allocation of CID under fault +tolerant operations, and is addressed to developpers intending to modify the CID +algorithm. + +The `MPI_COMM_REVOKE` operation interrupts all calls on the communicator. Because it is +a resilient operation, the revoke notification is sent using a reliable broadcast. +Unlike a normal broadcast, a given process will receive multiple reliable broadcast +messages for each revoke operation. As is the nature of asynchronous systems, timing +of reception may cause a revoke notification to reach a particular process much later +than anticipated. In that case, the process may have already produced the +`MPI_ERR_REVOKED` to the end-user, and the impacted communicator may have then been +freed using `MPI_COMM_FREE`. In this case, a new communicator may have re-used the +context identifier (CID), since it is now free from reuse. Thus, late reliable +broadcast that pertain to a prior communicator using that CID could erroneously +revoke that new communicator, and must be ignored. + +In order to perform that discrimination between communicators using the same CID at +different time periods, revoke notice messages themselves are decorated with the CID +of the communicator to revoke, as well as the epoch at which they have been produced. +This epoch number is also attached to every communicator when it is created. +This uniquely identify the communicator that must be revoked independently of the time +period at which the revoke notification is received. + +In order to associate an unique couple (CID,epoch) to communicators, the CID allocation +algorithm has been expanded to integrate the computation of the epoch as a byproduct +of the normal operation of hte CID algorithm. The way the normal CID algorihm operates +is based on a multi-step non-blocking allreduce operation to select the best global +CID. At each round, every rank proposes one (or many) CID that are locally available. +A non-blocking allreduce then identifies the best among all propositions (if any). If +a good CID is found, a second allreduce confirms the choice. If no good CID is found, +that second allreduce will trigger another round of propositions and the algorithm +repeats. In order to not increase the complexity of that algorithm, the selection of the +epoch is integrated into the second allreduce that flags the successful selection of a +proposed CID. A developper that wants to modify the CID algorithm must pay attention to +the computation of teh epoch. Ideally the two numbers can be computed in a combined fashion. + +Testing Fault Tolerant Operation +================================ + +Testing fault-tolerant operation extensively can be hard. Unlike normal communication for +which the matching and ordering semantic simplify the space of possible ordering of +messages, faults may interrupt communication at any time, including in the middle +of a transfer, during RDMA updates, etc. Over the years we have come up with a number of +tests that stress particular aspects of the communication infrastructure, or represent +common resilience patterns that users are likely to exercise. + +The following repository contains a set of tests that can be used to exercise fault +tolerance in MPI: [https://github.com/ICLDisco/ulfm-testing] + +Testing general error handling +------------------------------ + +Test `api/err_handler.c` gives an estimate of the time it takes to report an error during +an `MPI_BARRIER`. The test times a barrier without fault, injects a process failure, +times the barrier again (time for new fault to be reported), and then times a barrier again +(time for an old fault to be reported). The test also verifies that communication between +non-faulty processes remain possible. + +Test `api/err_any.c` verifies that ANY_SOURCE receptions are correctly interrupted when a +potential sender fails. The test also verifies that ANY_SOURCE operations are again +operational after the use of `MPIX_Comm_failure_ack()` and counts that the correct number +of errors gets reported to the application level. + +Test `api/getack.c` verifies that errors that have been reported through `MPIX_Comm_get_failed()` +are indeed resulting in communication with the reported process returning an error of class +`MPIX_ERR_PROC_FAILED` as mandated. + +Stress test for the Revoke operation +------------------------------------ + +Test `api/revoke.c` verifies that non-matched pending operations get correctly interrupted by +the use of API call` MPIX_Comm_revoke()`. ISend and IRecv operations are posted before a +barrier ensures that the corresponding completion cannot happen before the operation +gets revoked. This is a good stress test when debugging the cleanup of revoked +operations from the PML matching queues. + +Stress test for BTL timeout +--------------------------- + +Test `stress/pingpairs.c` is designed to exercise the case of in-band detection of failure, +and thus the cleanup code for fragments of requests whose peer with a failed process. +This test executes a ring-like neighbor pattern that is repeated for a long-enough +period of time that the network driver will reach the maximum retry-count timeout. +This will verify that errors are reported quickly from the failure detector, and that +the occurence of retry timeout at the BTL level do not cause issues (like unexpected +aborts in the BTL, or crash due to orphaned fragments cleanup). + +Stress test for MPI_COMM_SPAWN +------------------------------ + +`MPI_COMM_SPAWN` is often employed to replace failed processes in application usage patterns. +The test `stress/spawn.c` is designed to force the detection of a process failure to happen +during the `MPI_COMM_SPAWN` operation itself, thus exercising the error reporting in that case. + +Application patterns +-------------------- + +The `tutorial/sc19` directory contains a set of application patterns that are demonstrated for +instruction purposes. These application pattern also represent common use-cases that are to +be found in the wild, and are good representative of what capabilities will get exercised +in production. + +Test `03.undisturbed.c` verifies that communication in communicators that do not have failed +processes can continue, meanwhile communication with a failed process report an error. It +also exercise `MPI_Sendrecv`. + +Test `05.err_coll.c` verifies that a blocking collective communication completes (possibly in +error) at all ranks. This test is designed for a process failure to be detected during the +collective operation, and can be used as a skeletton to track issues with a particular +collective operation by just substituting the collective call of interest. + +Test `07.err_comm_grid2d.c` is designed for a process failure to be detected during a communicator +creation function. The test issues two `MPI_COMM_SPLIT`, and then proceed to agree if all +ranks have succeeded the operation. This is a good test to track issues with the CID +algorithm. This test will also exercise the non-blocking allreduce, as this operation +is employed in the CID algorithm. + +Test `09.insulation` verifies that errors are reported in collective operations only if the +underlying communicator contains a failed process. The world is divided in two halves, +only one of which suffers a failure. + +Test `12.buddycr` is representative of the very common usage pattern where replacement processes +are spawned to take the place of failed processes. The test will perform some mockup computation +with an iterative-like compute loop with collective communication in the loop. Periodically, the +dataset is checkpointed on a *buddy* rank. When a fault is injected, it is likely detected during +the checkpoint operation, thus stress testing `MPIX_COMM_AGREE`, `MPIX_COMM_SHRINK` and `MPI_COMM_SPAWN`. + + +Examples of issues in components that are not yet fault tolerant +================================================================ + +In this section, we discuss the features that were missing in some of the major Open MPI components. +The goal of this section is to present an example of the process and methodology used to add +fault tolerance to not yet enabled components. + +OFI fault tolerance with the OFI MTL +------------------------------------ + +While OFI fault tolerance is supported using the OB1 PML, with the OFI BTL, this is a workaround +for the fact that the OFI MTL does not currently support fault tolerance. Here is a list of +issues that have been identified to prevent fault-tolerant operations with this component. + + +1. The PML CM does not have a `pml_revoke_comm` function +2. The PML CM uses requests of type `OMPI_REQUEST_PML` that are not compatible with a ` + ompi_pml_base_request_t`. This prevents checking the important flags that should be + present in such a request type, notably as used in `ompi_request_is_ft()`, ` + ompi_request_is_collective()`, and `ompi_request_is_failed_fn()`. +3. The PML CM does not support cancelling requests with a failed peer. +4. The MTL OFI has instances of calling `abort()/exit()` when communication errors are + reported. In `ompi_mtl_ofi_context_progress()`, an error may be returned from + `fi_cq_read()` as a consequence of a process failure; this will abort the process + instead of returning the error. + + +The UCX PML +----------- + +Similarly to OFI, UCX fault tolerance is supported using the OB1 PML, with the UCT BTL. Again, this +is a workaround for the fact that the UCX PML does not currently support fault tolerance. + +1. The UCX PML does not pass flags to set UCP endpoints in fault-tolerant mode +2. The UCX PML uses requests of type `OMPI_REQUEST_PML` that are not compatible with + a `ompi_pml_base_request_t`. Fields needed by `ompi_request_is_failed_fn()` are missing +3. The UCX PML does not have a `pml_revoke_comm` function. The unexpected queues, and + matching logic is within the UCP component of UCX. +4. When using the UCX PML, no BTL is initialized and the `ftagree` component and the `ompi_comm_revoke()` + function do direct calls to the BTL to send control messages. + +RMA Components +============== + +The **OSC** framework is in charge of managing RMA communication. +At the moment, fault tolerance with RMA communication is not supported. +However, the ULFM standard draft document ***does*** define semantics for +RMA fault tolerance, and contributions to enable this feature in Open MPI +are welcome. + +I/O Components +============== + +The following components manage MPI I/O in Open MPI + +- _fs_ File system functions for MPI I/O +- _fbtl_ File byte transfer layer: abstraction for individual read/write + operations for OMPIO +- _fcoll_ Collective read and write operations for MPI I/O +- _sharedfp_ Shared file pointer operations for MPI I/O + +All components in these frameworks are unmodified, **untested** (expect clean +post-failure abort). Again, the ULFM standard draft document **does** define +semantics for File operation fault tolerance, and contributions to enable +this feature in Open MPI are welcome. + +___________________________________________________________________________ + +More Information +================ + +Author: Aurelien Bouteiller + +The [README.FT.ULFM.md] gives a more complete description of current +component support in Open MPI ULFM. + +More information (tutorials, examples, build instructions for leading +top500 systems) is also available in the Fault Tolerance Research +Hub website: + + +## Bibliographic References +If you are looking for, or want to cite a general reference for ULFM, +please use + +_Wesley Bland, Aurelien Bouteiller, Thomas Herault, George Bosilca, Jack +J. Dongarra: Post-failure recovery of MPI communication capability: Design +and rationale. IJHPCA 27(3): 244-254 (2013)._ + +Available from: http://journals.sagepub.com/doi/10.1177/1094342013488238. +___________________________________________________________________________ + +Copyright +========= + +``` +Copyright (c) 2019-2020 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ +``` diff --git a/contrib/doc-ft-components/general-ulfm-fault-handling.png b/contrib/doc-ft-components/general-ulfm-fault-handling.png new file mode 100644 index 00000000000..c2510e3aa29 Binary files /dev/null and b/contrib/doc-ft-components/general-ulfm-fault-handling.png differ diff --git a/ompi/Makefile.am b/ompi/Makefile.am index ea8b0df227e..27dfc87604f 100644 --- a/ompi/Makefile.am +++ b/ompi/Makefile.am @@ -19,6 +19,8 @@ # and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. # Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. +# Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,15 +28,6 @@ # $HEADER$ # -# We always have C bindings, but do we have profiling? - -if BUILD_MPI_BINDINGS_LAYER -c_mpi_lib = mpi/c/libmpi_c_mpi.la mpi/tool/libmpi_mpit.la -else -c_mpi_lib = -endif -c_pmpi_lib = mpi/c/profile/libmpi_c_pmpi.la mpi/tool/profile/libmpi_pmpit.la - # See if we have Fortran mpif.h MPI bindings if OMPI_BUILD_FORTRAN_MPIFH_BINDINGS @@ -147,7 +140,7 @@ lib@OMPI_LIBMPI_NAME@_la_LIBADD = \ datatype/libdatatype.la \ debuggers/libdebuggers.la \ mpi/c/libmpi_c.la \ - mpi/tool/libmpi_mpit_common.la \ + mpi/tool/libmpi_mpit.la \ $(c_mpi_lib) \ $(c_pmpi_lib) \ $(mpi_fortran_base_lib) \ @@ -200,6 +193,7 @@ include patterns/net/Makefile.am include patterns/comm/Makefile.am include mca/Makefile.am include util/Makefile.am +include instance/Makefile.am distclean-local: rm -f mpiext/static-components.h diff --git a/ompi/attribute/attribute.c b/ompi/attribute/attribute.c index aaf04a685e0..d81360753f5 100644 --- a/ompi/attribute/attribute.c +++ b/ompi/attribute/attribute.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -14,6 +15,8 @@ * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2022 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -240,8 +243,10 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/communicator/communicator.h" /* ompi_communicator_t generated in [COPY|DELETE]_ATTR_CALLBACKS */ #include "ompi/win/win.h" /* ompi_win_t generated in [COPY|DELETE]_ATTR_CALLBACKS */ +#include "ompi/instance/instance.h" #include "ompi/mpi/fortran/base/fint_2_int.h" + /* * Macros */ @@ -256,9 +261,9 @@ #define attr_datatype_f d_f_to_c_index #define attr_win_f w_f_to_c_index -#define CREATE_KEY(key) opal_bitmap_find_and_set_first_unset_bit(key_bitmap, (key)) +#define CREATE_KEY(key) opal_bitmap_find_and_set_first_unset_bit(attr_subsys->key_bitmap, (key)) -#define FREE_KEY(key) opal_bitmap_clear_bit(key_bitmap, (key)) +#define FREE_KEY(key) opal_bitmap_clear_bit(attr_subsys->key_bitmap, (key)) /* Not checking for NULL_DELETE_FN here, since according to the @@ -406,6 +411,15 @@ typedef struct attribute_value_t { int av_sequence; } attribute_value_t; +/* + * struct to hold state of attr subsys + */ + +typedef struct attr_subsys_t { + opal_object_t super; + opal_hash_table_t *keyval_hash; + opal_bitmap_t *key_bitmap; + } attr_subsys_t; /* * Local functions @@ -413,6 +427,8 @@ typedef struct attribute_value_t { static void attribute_value_construct(attribute_value_t *item); static void ompi_attribute_keyval_construct(ompi_attribute_keyval_t *keyval); static void ompi_attribute_keyval_destruct(ompi_attribute_keyval_t *keyval); +static void attr_subsys_construct(attr_subsys_t *subsys); +static void attr_subsys_destruct(attr_subsys_t *subsys); static int set_value(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, int key, attribute_value_t *new_attr, @@ -425,6 +441,13 @@ static MPI_Aint translate_to_aint(attribute_value_t *val); static int compare_attr_sequence(const void *attr1, const void *attr2); +/* + * attribute_subsys_t class + */ +static OBJ_CLASS_INSTANCE(attr_subsys_t, + opal_object_t, + attr_subsys_construct, + attr_subsys_destruct); /* * attribute_value_t class @@ -443,24 +466,21 @@ static OBJ_CLASS_INSTANCE(ompi_attribute_keyval_t, ompi_attribute_keyval_construct, ompi_attribute_keyval_destruct); - /* * Static variables */ -static opal_hash_table_t *keyval_hash; -static opal_bitmap_t *key_bitmap; -static int attr_sequence; +static attr_subsys_t *attr_subsys = NULL; static unsigned int int_pos = 12345; static unsigned int integer_pos = 12345; +static int attr_sequence; /* * MPI attributes are *not* high performance, so just use a One Big Lock * approach. However, this lock is released before a user provided callback is * triggered and acquired right after, allowing for recursive behaviors. */ -static opal_mutex_t attribute_lock; - +static opal_mutex_t attribute_lock = OPAL_MUTEX_STATIC_INIT; /* * attribute_value_t constructor function @@ -507,33 +527,70 @@ ompi_attribute_keyval_destruct(ompi_attribute_keyval_t *keyval) free(keyval->bindings_extra_state); } - opal_hash_table_remove_value_uint32(keyval_hash, keyval->key); + opal_hash_table_remove_value_uint32(attr_subsys->keyval_hash, keyval->key); FREE_KEY(keyval->key); } } -/* - * This will initialize the main list to store key- attribute - * items. This will be called one time, during MPI_INIT(). - */ -int ompi_attr_init(void) +int ompi_attr_get_ref(void) +{ + int ret = OMPI_SUCCESS; + + OPAL_THREAD_LOCK(&attribute_lock); + + if (NULL == attr_subsys) { + attr_subsys = OBJ_NEW(attr_subsys_t); + if (NULL == attr_subsys) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto fn_exit; + } + if ((NULL == attr_subsys->keyval_hash) || (NULL == attr_subsys->key_bitmap)) { + OBJ_RELEASE(attr_subsys); + attr_subsys = NULL; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto fn_exit; + } + } else { + OBJ_RETAIN(attr_subsys); + } + +fn_exit: + OPAL_THREAD_UNLOCK(&attribute_lock); + + return ret; +} + +int ompi_attr_put_ref(void) +{ + if (NULL != attr_subsys) { + OBJ_RELEASE(attr_subsys); + } + return OMPI_SUCCESS; +} + +static void attr_subsys_construct(attr_subsys_t *subsys) { int ret; void *bogus = (void*) 1; int *p = (int *) &bogus; - keyval_hash = OBJ_NEW(opal_hash_table_t); - if (NULL == keyval_hash) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - key_bitmap = OBJ_NEW(opal_bitmap_t); + subsys->keyval_hash = OBJ_NEW(opal_hash_table_t); + + subsys->key_bitmap = OBJ_NEW(opal_bitmap_t); + /* * Set the max size to OMPI_FORTRAN_HANDLE_MAX to enforce bound */ - opal_bitmap_set_max_size (key_bitmap, OMPI_FORTRAN_HANDLE_MAX); - if (0 != opal_bitmap_init(key_bitmap, 32)) { - return OMPI_ERR_OUT_OF_RESOURCE; + opal_bitmap_set_max_size (subsys->key_bitmap, + OMPI_FORTRAN_HANDLE_MAX); + ret = opal_bitmap_init(subsys->key_bitmap, 32); + if (OPAL_SUCCESS != ret) { + abort(); + } + + for (int i = 0; i < MPI_ATTR_PREDEFINED_KEY_MAX; i++) { + opal_bitmap_set_bit(subsys->key_bitmap, i); } for (int_pos = 0; int_pos < (sizeof(void*) / sizeof(int)); @@ -550,31 +607,23 @@ int ompi_attr_init(void) } } - OBJ_CONSTRUCT(&attribute_lock, opal_mutex_t); - - if (OMPI_SUCCESS != (ret = opal_hash_table_init(keyval_hash, - ATTR_TABLE_SIZE))) { - return ret; - } - if (OMPI_SUCCESS != (ret = ompi_attr_create_predefined())) { - return ret; + ret = opal_hash_table_init(subsys->keyval_hash, ATTR_TABLE_SIZE); + if (OPAL_SUCCESS != ret) { + abort(); } - return OMPI_SUCCESS; + attr_sequence = 0; } /* - * Cleanup everything during MPI_Finalize(). + * Cleanup everything when no more refs to the attr subsys */ -int ompi_attr_finalize(void) +static void attr_subsys_destruct(attr_subsys_t *subsys) { ompi_attr_free_predefined(); - OBJ_DESTRUCT(&attribute_lock); - OBJ_RELEASE(keyval_hash); - OBJ_RELEASE(key_bitmap); - - return OMPI_SUCCESS; + OBJ_RELEASE(subsys->keyval_hash); + OBJ_RELEASE(subsys->key_bitmap); } /*****************************************************************************/ @@ -609,10 +658,15 @@ static int ompi_attr_create_keyval_impl(ompi_attribute_type_t type, /* Create a new unique key and fill the hash */ OPAL_THREAD_LOCK(&attribute_lock); - ret = CREATE_KEY(key); + ret = MPI_SUCCESS; + if (!(flags & OMPI_KEYVAL_PREDEFINED)) { + ret = CREATE_KEY(key); + } + if (OMPI_SUCCESS == ret) { keyval->key = *key; - ret = opal_hash_table_set_value_uint32(keyval_hash, *key, keyval); + ret = opal_hash_table_set_value_uint32(attr_subsys->keyval_hash, + *key, keyval); } if (OMPI_SUCCESS != ret) { @@ -635,11 +689,22 @@ int ompi_attr_create_keyval(ompi_attribute_type_t type, void *bindings_extra_state) { ompi_attribute_fortran_ptr_t es_tmp; + int rc; + + rc = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } es_tmp.c_ptr = extra_state; - return ompi_attr_create_keyval_impl(type, copy_attr_fn, delete_attr_fn, - key, &es_tmp, flags, - bindings_extra_state); + rc = ompi_attr_create_keyval_impl(type, copy_attr_fn, delete_attr_fn, + key, &es_tmp, flags, + bindings_extra_state); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_mpi_instance_release (); + } + + return rc; } int ompi_attr_create_keyval_fint(ompi_attribute_type_t type, @@ -651,6 +716,12 @@ int ompi_attr_create_keyval_fint(ompi_attribute_type_t type, void *bindings_extra_state) { ompi_attribute_fortran_ptr_t es_tmp; + int rc; + + rc = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } es_tmp.f_integer = extra_state; #if SIZEOF_INT == OMPI_SIZEOF_FORTRAN_INTEGER @@ -670,6 +741,12 @@ int ompi_attr_create_keyval_aint(ompi_attribute_type_t type, void *bindings_extra_state) { ompi_attribute_fortran_ptr_t es_tmp; + int rc; + + rc = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } es_tmp.f_address = extra_state; return ompi_attr_create_keyval_impl(type, copy_attr_fn, delete_attr_fn, @@ -687,7 +764,7 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, /* Find the key-value pair */ OPAL_THREAD_LOCK(&attribute_lock); - ret = opal_hash_table_get_value_uint32(keyval_hash, *key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, *key, (void **) &keyval); if ((OMPI_SUCCESS != ret) || (NULL == keyval) || (keyval->attr_type != type) || @@ -707,6 +784,9 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, opal_atomic_wmb(); OPAL_THREAD_UNLOCK(&attribute_lock); + /* balance out retain in keyval_create */ + ompi_mpi_instance_release (); + return MPI_SUCCESS; } @@ -720,7 +800,7 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, int key, void *attribute, bool predefined) { - int ret; + int ret = MPI_SUCCESS; attribute_value_t *new_attr = OBJ_NEW(attribute_value_t); if (NULL == new_attr) { return OMPI_ERR_OUT_OF_RESOURCE; @@ -942,7 +1022,7 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, /* Get the keyval in the main keyval hash - so that we know what the copy_attr_fn is */ - err = opal_hash_table_get_value_uint32(keyval_hash, key, + err = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void **) &hash_value); if (OMPI_SUCCESS != err) { /* This should not happen! */ @@ -1037,7 +1117,7 @@ static int ompi_attr_delete_impl(ompi_attribute_type_t type, void *object, attribute_value_t *attr; /* Check if the key is valid in the master keyval hash */ - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void **) &keyval); if ((OMPI_SUCCESS != ret) || (NULL == keyval) || @@ -1053,7 +1133,7 @@ static int ompi_attr_delete_impl(ompi_attribute_type_t type, void *object, goto exit; } - /* Check if the key is valid for the communicator/window/dtype. If + /* Check if the key is valid for the communicator/window/dtype/instance. If yes, then delete the attribute and key entry from the object's hash */ ret = opal_hash_table_get_value_uint32(attr_hash, key, (void**) &attr); @@ -1198,7 +1278,7 @@ static int set_value(ompi_attribute_type_t type, void *object, /* Note that this function can be invoked by ompi_attr_copy_all() to set attributes on the new object (in addition to the top-level MPI_* functions that set attributes). */ - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void **) &keyval); /* If key not found */ @@ -1242,7 +1322,7 @@ static int set_value(ompi_attribute_type_t type, void *object, had_old = true; } - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void **) &keyval); if ((OMPI_SUCCESS != ret ) || (NULL == keyval)) { /* Keyval has disappeared underneath us -- this shouldn't @@ -1288,7 +1368,7 @@ static int get_value(opal_hash_table_t *attr_hash, int key, with the key, then the call is valid and returns FALSE in the flag argument */ *flag = 0; - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void**) &keyval); if (OMPI_ERR_NOT_FOUND == ret) { return MPI_KEYVAL_INVALID; diff --git a/ompi/attribute/attribute.h b/ompi/attribute/attribute.h index 2bec4387dad..492c9e32c70 100644 --- a/ompi/attribute/attribute.h +++ b/ompi/attribute/attribute.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,7 +58,7 @@ enum ompi_attribute_type_t { * with 1 so that we can have it initialized to 0 * using memset in the constructor */ TYPE_ATTR, /**< The attribute belongs to datatype object */ - WIN_ATTR /**< The attribute belongs to a win object */ + WIN_ATTR, /**< The attribute belongs to a win object */ }; typedef enum ompi_attribute_type_t ompi_attribute_type_t; @@ -64,50 +67,34 @@ typedef enum ompi_attribute_type_t ompi_attribute_type_t; delete. These will only be used here and not in the front end functions. */ -typedef void (ompi_fint_copy_attr_function)(MPI_Fint *oldobj, - MPI_Fint *keyval, - MPI_Fint *extra_state, - MPI_Fint *attr_in, - MPI_Fint *attr_out, - ompi_fortran_logical_t *flag, - MPI_Fint *ierr); -typedef void (ompi_fint_delete_attr_function)(MPI_Fint *obj, - MPI_Fint *keyval, - MPI_Fint *attr_in, - MPI_Fint *extra_state, - MPI_Fint *ierr); +typedef void (*ompi_fint_copy_attr_function)(MPI_Fint *oldobj, MPI_Fint *keyval, + MPI_Fint *extra_state, MPI_Fint *attr_in, + MPI_Fint *attr_out, ompi_fortran_logical_t *flag, + MPI_Fint *ierr); +typedef void (*ompi_fint_delete_attr_function)(MPI_Fint *obj, MPI_Fint *keyval, MPI_Fint *attr_in, + MPI_Fint *extra_state, MPI_Fint *ierr); /* New-style MPI-2 Fortran function pointer declarations for copy and delete. These will only be used here and not in the front end functions. */ -typedef void (ompi_aint_copy_attr_function)(MPI_Fint *oldobj, - MPI_Fint *keyval, - void *extra_state, - void *attr_in, - void *attr_out, - ompi_fortran_logical_t *flag, - MPI_Fint *ierr); -typedef void (ompi_aint_delete_attr_function)(MPI_Fint *obj, - MPI_Fint *keyval, - void *attr_in, - void *extra_state, - MPI_Fint *ierr); +typedef void (*ompi_aint_copy_attr_function)(MPI_Fint *oldobj, MPI_Fint *keyval, void *extra_state, + void *attr_in, void *attr_out, + ompi_fortran_logical_t *flag, MPI_Fint *ierr); +typedef void (*ompi_aint_delete_attr_function)(MPI_Fint *obj, MPI_Fint *keyval, void *attr_in, + void *extra_state, MPI_Fint *ierr); /* * Internally the copy function for all kinds of MPI objects has one more * argument, the pointer to the new object. Therefore, we can do on the * flight modifications of the new communicator based on attributes stored * on the main communicator. */ -typedef int (MPI_Comm_internal_copy_attr_function)(MPI_Comm, int, void *, - void *, void *, int *, - MPI_Comm); -typedef int (MPI_Type_internal_copy_attr_function)(MPI_Datatype, int, void *, - void *, void *, int *, - MPI_Datatype); -typedef int (MPI_Win_internal_copy_attr_function)(MPI_Win, int, void *, - void *, void *, int *, - MPI_Win); +typedef int (*MPI_Comm_internal_copy_attr_function)(MPI_Comm, int, void *, void *, void *, int *, + MPI_Comm); +typedef int (*MPI_Type_internal_copy_attr_function)(MPI_Datatype, int, void *, void *, void *, + int *, MPI_Datatype); +typedef int (*MPI_Win_internal_copy_attr_function)(MPI_Win, int, void *, void *, void *, int *, + MPI_Win); typedef void (ompi_attribute_keyval_destructor_fn_t)(int); @@ -120,19 +107,19 @@ union ompi_attribute_fn_ptr_union_t { MPI_Type_delete_attr_function *attr_datatype_delete_fn; MPI_Win_delete_attr_function *attr_win_delete_fn; - MPI_Comm_internal_copy_attr_function *attr_communicator_copy_fn; - MPI_Type_internal_copy_attr_function *attr_datatype_copy_fn; - MPI_Win_internal_copy_attr_function *attr_win_copy_fn; + MPI_Comm_internal_copy_attr_function attr_communicator_copy_fn; + MPI_Type_internal_copy_attr_function attr_datatype_copy_fn; + MPI_Win_internal_copy_attr_function attr_win_copy_fn; /* For Fortran old MPI-1 callback functions */ - ompi_fint_delete_attr_function *attr_fint_delete_fn; - ompi_fint_copy_attr_function *attr_fint_copy_fn; + ompi_fint_delete_attr_function attr_fint_delete_fn; + ompi_fint_copy_attr_function attr_fint_copy_fn; /* For Fortran new MPI-2 callback functions */ - ompi_aint_delete_attr_function *attr_aint_delete_fn; - ompi_aint_copy_attr_function *attr_aint_copy_fn; + ompi_aint_delete_attr_function attr_aint_delete_fn; + ompi_aint_copy_attr_function attr_aint_copy_fn; }; typedef union ompi_attribute_fn_ptr_union_t ompi_attribute_fn_ptr_union_t; @@ -201,22 +188,26 @@ int ompi_attr_hash_init(opal_hash_table_t **hash) } /** - * Initialize the main attribute hash that stores the keyvals and meta data + * Increase the reference count on the attributes subsystem. Instantiate subsys if + * not yet instantiated. * * @return OMPI return code */ -int ompi_attr_init(void); +int ompi_attr_get_ref(void); + /** - * Destroy the main attribute hash that stores the keyvals and meta data + * Decrease the reference count on the attributes subsystem. Attributes subsystem + * resources are released when the count drops to zero. + * + * @return OMPI return code */ -int ompi_attr_finalize(void); - +int ompi_attr_put_ref(void); /** - * Create a new key for use by attribute of Comm/Win/Datatype + * Create a new key for use by attribute of Comm/Win/Datatype/Instance * * @param type Type of attribute (COMM/WIN/DTYPE) (IN) * @param copy_attr_fn Union variable containing the function pointer @@ -289,7 +280,7 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, * Set an attribute on the comm/win/datatype in a form valid for C. * * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN/OUT) * @param key Key val for the attribute (IN) * @param attribute The actual attribute pointer (IN) @@ -318,7 +309,7 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, * Set an int predefined attribute in a form valid for C. * * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN/OUT) * @param key Key val for the attribute (IN) * @param attribute The actual attribute value (IN) @@ -348,7 +339,7 @@ int ompi_attr_set_int(ompi_attribute_type_t type, void *object, * Fortran MPI-1. * * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN/OUT) * @param key Key val for the attribute (IN) * @param attribute The actual attribute pointer (IN) @@ -379,7 +370,7 @@ OMPI_DECLSPEC int ompi_attr_set_fint(ompi_attribute_type_t type, void *object, * Fortran MPI-2. * * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN/OUT) * @param key Key val for the attribute (IN) * @param attribute The actual attribute pointer (IN) @@ -488,7 +479,7 @@ OMPI_DECLSPEC int ompi_attr_get_aint(opal_hash_table_t *attr_hash, int key, /** * Delete an attribute on the comm/win/datatype * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN) * @param key Key val for the attribute (IN) * @param predefined Whether the key is predefined or not 0/1 (IN) @@ -503,7 +494,7 @@ int ompi_attr_delete(ompi_attribute_type_t type, void *object, /** * This to be used from functions like MPI_*_DUP in order to copy all - * the attributes from the old Comm/Win/Dtype object to a new + * the attributes from the old Comm/Win/Dtype/Instance object to a new * object. * @param type Type of attribute (COMM/WIN/DTYPE) (IN) * @param old_object The old COMM/WIN/DTYPE object (IN) @@ -520,7 +511,7 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, /** - * This to be used to delete all the attributes from the Comm/Win/Dtype + * This to be used to delete all the attributes from the Comm/Win/Dtype/Instance * object in one shot * @param type Type of attribute (COMM/WIN/DTYPE) (IN) * @param object The COMM/WIN/DTYPE object (IN) diff --git a/ompi/attribute/attribute_predefined.c b/ompi/attribute/attribute_predefined.c index df948378bb7..d0bb041d463 100644 --- a/ompi/attribute/attribute_predefined.c +++ b/ompi/attribute/attribute_predefined.c @@ -194,9 +194,10 @@ static int create_comm(int target_keyval, bool want_inherit) ompi_attribute_fn_ptr_union_t del; keyval = -1; - copy.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) - (want_inherit ? MPI_COMM_DUP_FN : MPI_COMM_NULL_COPY_FN); + copy.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function)( + want_inherit ? MPI_COMM_DUP_FN : MPI_COMM_NULL_COPY_FN); del.attr_communicator_delete_fn = MPI_COMM_NULL_DELETE_FN; + keyval = target_keyval; err = ompi_attr_create_keyval(COMM_ATTR, copy, del, &keyval, NULL, OMPI_KEYVAL_PREDEFINED, NULL); if (MPI_SUCCESS != err) { @@ -224,8 +225,9 @@ static int create_win(int target_keyval) ompi_attribute_fn_ptr_union_t del; keyval = -1; - copy.attr_win_copy_fn = (MPI_Win_internal_copy_attr_function*)MPI_WIN_NULL_COPY_FN; + copy.attr_win_copy_fn = (MPI_Win_internal_copy_attr_function) MPI_WIN_NULL_COPY_FN; del.attr_win_delete_fn = MPI_WIN_NULL_DELETE_FN; + keyval = target_keyval; err = ompi_attr_create_keyval(WIN_ATTR, copy, del, &keyval, NULL, OMPI_KEYVAL_PREDEFINED, NULL); if (MPI_SUCCESS != err) { diff --git a/ompi/communicator/Makefile.am b/ompi/communicator/Makefile.am index dcff49460b8..675d9bad881 100644 --- a/ompi/communicator/Makefile.am +++ b/ompi/communicator/Makefile.am @@ -39,3 +39,4 @@ lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ communicator/ft/comm_ft.c communicator/ft/comm_ft_reliable_bcast.c communicator/ft/comm_ft_propagator.c communicator/ft/comm_ft_detector.c communicator/ft/comm_ft_revoke.c endif # WANT_FT_MPI +dist_ompidata_DATA += communicator/help-comm.txt diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index c31a698b88e..4d2b811823c 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -24,6 +24,8 @@ * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,6 +55,8 @@ #include "ompi/mca/pml/pml.h" #include "ompi/request/request.h" +#include "ompi/runtime/params.h" + /* ** sort-function for MPI_Comm_split */ @@ -91,6 +95,10 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro opal_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req); +static int ompi_comm_get_rprocs (ompi_communicator_t *local_comm, ompi_communicator_t *bridge_comm, + int local_leader, int remote_leader, int tag, int rsize, + ompi_proc_t ***rprocs); + /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ @@ -107,15 +115,15 @@ int ompi_comm_set ( ompi_communicator_t **ncomm, int *remote_ranks, opal_hash_table_t *attr, ompi_errhandler_t *errh, - bool copy_topocomponent, ompi_group_t *local_group, - ompi_group_t *remote_group ) + ompi_group_t *remote_group, + uint32_t flags) { ompi_request_t *req; int rc; rc = ompi_comm_set_nb (ncomm, oldcomm, local_size, local_ranks, remote_size, remote_ranks, - attr, errh, copy_topocomponent, local_group, remote_group, &req); + attr, errh, local_group, remote_group, flags, &req); if (OMPI_SUCCESS != rc) { return rc; } @@ -127,23 +135,25 @@ int ompi_comm_set ( ompi_communicator_t **ncomm, return rc; } +static int ompi_comm_set_simple (ompi_communicator_t **ncomm, ompi_errhandler_t *errhandler, + ompi_group_t *local_group) +{ + return ompi_comm_set (ncomm, NULL, local_group->grp_proc_count, NULL, 0, NULL, NULL, errhandler, + local_group, NULL, 0); +} + + /* * if remote_group == &ompi_mpi_group_null, then the new communicator * is forced to be an inter communicator. */ -int ompi_comm_set_nb ( ompi_communicator_t **ncomm, - ompi_communicator_t *oldcomm, - int local_size, - int *local_ranks, - int remote_size, - int *remote_ranks, - opal_hash_table_t *attr, - ompi_errhandler_t *errh, - bool copy_topocomponent, - ompi_group_t *local_group, - ompi_group_t *remote_group, - ompi_request_t **req ) +int ompi_comm_set_nb (ompi_communicator_t **ncomm, ompi_communicator_t *oldcomm, int local_size, + int *local_ranks, int remote_size, int *remote_ranks, opal_hash_table_t *attr, + ompi_errhandler_t *errh, ompi_group_t *local_group, ompi_group_t *remote_group, + uint32_t flags, ompi_request_t **req) { + bool copy_topocomponent = !!(flags & OMPI_COMM_SET_FLAG_COPY_TOPOLOGY); + bool dup_comm = !(flags & OMPI_COMM_SET_FLAG_LOCAL_COMM_NODUP); ompi_communicator_t *newcomm = NULL; int ret; @@ -165,8 +175,6 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, newcomm->super.s_info = NULL; /* fill in the inscribing hyper-cube dimensions */ newcomm->c_cube_dim = opal_cube_dim(local_size); - newcomm->c_id_available = MPI_UNDEFINED; - newcomm->c_id_start_index = MPI_UNDEFINED; if (NULL == local_group) { /* determine how the list of local_rank can be stored most @@ -181,6 +189,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, OBJ_RETAIN(newcomm->c_local_group); } newcomm->c_my_rank = newcomm->c_local_group->grp_my_rank; + newcomm->c_assertions = 0; /* Set remote group and duplicate the local comm, if applicable */ if ( NULL != remote_group ) { @@ -199,11 +208,17 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, newcomm->c_flags |= OMPI_COMM_INTER; - old_localcomm = OMPI_COMM_IS_INTRA(oldcomm) ? oldcomm : oldcomm->c_local_comm; + if (dup_comm) { + old_localcomm = OMPI_COMM_IS_INTRA(oldcomm) ? oldcomm : oldcomm->c_local_comm; - /* NTH: use internal idup function that takes a local group argument */ - ompi_comm_idup_internal (old_localcomm, newcomm->c_local_group, NULL, NULL, - &newcomm->c_local_comm, req); + /* NTH: use internal idup function that takes a local group argument */ + ompi_comm_idup_internal (old_localcomm, newcomm->c_local_group, NULL, NULL, + &newcomm->c_local_comm, req); + } else { + /* take ownership of the old communicator (it must be an intracommunicator) */ + assert (OMPI_COMM_IS_INTRA(oldcomm)); + newcomm->c_local_comm = oldcomm; + } } else { newcomm->c_remote_group = newcomm->c_local_group; OBJ_RETAIN(newcomm->c_remote_group); @@ -221,7 +236,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, OBJ_RETAIN ( newcomm->error_handler ); /* Set Topology, if required and if available */ - if ( copy_topocomponent && (NULL != oldcomm->c_topo) ) { + if (NULL != oldcomm && copy_topocomponent && (NULL != oldcomm->c_topo) ) { /** * The MPI standard is pretty clear on this, the topology information * behave as info keys, and is copied only on MPI_Comm_dup. @@ -233,7 +248,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, } /* Copy attributes and call according copy functions, if required */ - if (NULL != oldcomm->c_keyhash) { + if (NULL != oldcomm && NULL != oldcomm->c_keyhash) { if (NULL != attr) { ompi_attr_hash_init(&newcomm->c_keyhash); if (OMPI_SUCCESS != (ret = ompi_attr_copy_all (COMM_ATTR, oldcomm, @@ -245,6 +260,10 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, } } + if (NULL != oldcomm) { + newcomm->instance = oldcomm->instance; + } + *ncomm = newcomm; return (OMPI_SUCCESS); } @@ -271,8 +290,8 @@ int ompi_comm_group ( ompi_communicator_t* comm, ompi_group_t **group ) /* ** Counterpart to MPI_Comm_create. To be used within OMPI. */ -int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, - ompi_communicator_t **newcomm ) +int ompi_comm_create_w_info (ompi_communicator_t *comm, ompi_group_t *group, opal_info_t *info, + ompi_communicator_t **newcomm) { ompi_communicator_t *newcomp = NULL; int rsize; @@ -350,9 +369,9 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, rranks, /* remote_ranks */ NULL, /* attrs */ comm->error_handler, /* error handler */ - false, /* dont copy the topo */ group, /* local group */ - remote_group); /* remote group */ + remote_group, /* remote group */ + 0); /* flags */ if ( OMPI_SUCCESS != rc ) { goto exit; @@ -364,9 +383,15 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, goto exit; } + /* Copy info if there is one. */ + newcomp->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(newcomp->super.s_info)); + } + /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d CREATE FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %s CREATE FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); /* Activate the communicator and init coll-component */ rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); @@ -397,6 +422,11 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, return ( rc ); } +int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, + ompi_communicator_t **newcomm ) +{ + return ompi_comm_create_w_info (comm, group, NULL, newcomm); +} /**********************************************************************/ /**********************************************************************/ @@ -574,9 +604,9 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key, rranks, /* remote_ranks */ NULL, /* attrs */ comm->error_handler,/* error handler */ - pass_on_topo, - local_group, /* local group */ - remote_group); /* remote group */ + local_group, /* local group */ + remote_group, /* remote group */ + pass_on_topo ? OMPI_COMM_SET_FLAG_COPY_TOPOLOGY : 0); /* flags */ if ( OMPI_SUCCESS != rc ) { goto exit; @@ -586,9 +616,8 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key, OBJ_RELEASE(local_group); if (NULL != newcomp->c_local_comm) { snprintf(newcomp->c_local_comm->c_name, MPI_MAX_OBJECT_NAME, - "MPI COMMUNICATOR %d SPLIT FROM %d", - newcomp->c_local_comm->c_contextid, - comm->c_local_comm->c_contextid ); + "MPI COMM %s SPLIT FROM %s", ompi_comm_print_cid (newcomp), + ompi_comm_print_cid (comm)); } } @@ -607,8 +636,8 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key, } /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SPLIT FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s SPLIT FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); /* Copy info if there is one */ if (info) { @@ -619,6 +648,11 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key, /* Activate the communicator and init coll-component */ rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); + /* MPI-4 §7.4.4 requires us to remove all unknown keys from the info object */ + if (NULL != newcomp->super.s_info) { + opal_info_remove_unreferenced(newcomp->super.s_info); + } + exit: free ( results ); free ( sorted ); @@ -915,8 +949,7 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key, do { rc = ompi_comm_set (&newcomp, comm, my_size, lranks, my_rsize, - rranks, NULL, comm->error_handler, false, - NULL, NULL); + rranks, NULL, comm->error_handler, NULL, NULL, 0); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { break; } @@ -927,10 +960,10 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key, break; } - // Copy info if there is one. - newcomp->super.s_info = OBJ_NEW(opal_info_t); + ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_LAZY_BARRIER); + ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_ACTIVE_POLL); if (info) { - opal_info_dup(info, &(newcomp->super.s_info)); + opal_infosubscribe_change_info(&newcomp->super, info); } /* Activate the communicator and init coll-component */ @@ -950,12 +983,16 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key, } if (!need_split) { + + /* MPI-4 §7.4.4 requires us to remove all unknown keys from the info object */ + opal_info_remove_unreferenced(newcomp->super.s_info); + /* common case. no reordering and no MPI_UNDEFINED */ *newcomm = newcomp; /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SPLIT_TYPE FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s SPLIT_TYPE FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); break; } @@ -1009,9 +1046,9 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp NULL, /* remote_procs */ comm->c_keyhash, /* attrs */ comm->error_handler, /* error handler */ - true, /* copy the topo */ comm->c_local_group, /* local group */ - remote_group ); /* remote group */ + remote_group, /* remote group */ + OMPI_COMM_SET_FLAG_COPY_TOPOLOGY); /* flags */ if ( OMPI_SUCCESS != rc) { return rc; } @@ -1024,13 +1061,14 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp } /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d DUP FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s DUP FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); // Copy info if there is one. - newcomp->super.s_info = OBJ_NEW(opal_info_t); + ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_LAZY_BARRIER); + ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_ACTIVE_POLL); if (info) { - opal_info_dup(info, &(newcomp->super.s_info)); + opal_infosubscribe_change_info(&newcomp->super, info); } /* activate communicator and init coll-module */ @@ -1040,6 +1078,9 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp return rc; } + /* MPI-4 §7.4.4 requires us to remove all unknown keys from the info object */ + opal_info_remove_unreferenced(newcomp->super.s_info); + *newcomm = newcomp; return MPI_SUCCESS; } @@ -1106,9 +1147,9 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro NULL, /* remote_procs */ comm->c_keyhash, /* attrs */ comm->error_handler, /* error handler */ - true, /* copy the topo */ group, /* local group */ remote_group, /* remote group */ + OMPI_COMM_SET_FLAG_COPY_TOPOLOGY, /* flags */ subreq); /* new subrequest */ if (OMPI_SUCCESS != rc) { ompi_comm_request_return (request); @@ -1177,8 +1218,8 @@ static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request) } /* Set name for debugging purposes */ - snprintf(context->newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d DUP FROM %d", - context->newcomp->c_contextid, context->comm->c_contextid ); + snprintf(context->newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s DUP FROM %s", + ompi_comm_print_cid (context->newcomp), ompi_comm_print_cid (context->comm)); /* activate communicator and init coll-module */ rc = ompi_comm_activate_nb (&context->newcomp, context->comm, NULL, NULL, NULL, false, mode, subreq); @@ -1194,6 +1235,11 @@ static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request) static int ompi_comm_idup_with_info_finish (ompi_comm_request_t *request) { + ompi_comm_idup_with_info_context_t *context = + (ompi_comm_idup_with_info_context_t *) request->context; + /* MPI-4 §7.4.4 requires us to remove all unknown keys from the info object */ + opal_info_remove_unreferenced(context->newcomp->super.s_info); + /* done */ return MPI_SUCCESS; } @@ -1216,9 +1262,9 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int NULL, /* remote_procs */ comm->c_keyhash, /* attrs */ comm->error_handler, /* error handler */ - true, /* copy the topo */ group, /* local group */ - NULL); /* remote group */ + NULL, /* remote group */ + OMPI_COMM_SET_FLAG_COPY_TOPOLOGY); /* flags */ if ( OMPI_SUCCESS != rc) { return rc; } @@ -1231,8 +1277,8 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int } /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d GROUP FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s GROUP FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); /* activate communicator and init coll-module */ rc = ompi_comm_activate (&newcomp, comm, NULL, &tag, NULL, false, mode); @@ -1245,6 +1291,337 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int return MPI_SUCCESS; } +int ompi_comm_create_from_group (ompi_group_t *group, const char *tag, opal_info_t *info, + ompi_errhandler_t *errhandler, ompi_communicator_t **newcomm) +{ + ompi_communicator_t *newcomp = NULL; + int rc; + + *newcomm = MPI_COMM_NULL; + + rc = ompi_comm_set_simple (&newcomp, errhandler, group); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + /* Determine context id. It is identical to f_2_c_handle */ + rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) tag, NULL, false, + OMPI_COMM_CID_GROUP_NEW); + if ( OMPI_SUCCESS != rc ) { + return rc; + } + + /* Set name for debugging purposes */ + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s FROM GROUP", + ompi_comm_print_cid (newcomp)); + + newcomp->super.s_info = OBJ_NEW(opal_info_t); + if (NULL == newcomp->super.s_info) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* activate communicator and init coll-module. use the group allreduce implementation as + * no collective module has yet been selected. the tag does not matter as any tag will + * be unique on the new communicator. */ + rc = ompi_comm_activate (&newcomp, newcomp, NULL, &(int) {0xfeed}, NULL, + false, OMPI_COMM_CID_GROUP); + if ( OMPI_SUCCESS != rc ) { + return rc; + } + + newcomp->instance = group->grp_instance; + + *newcomm = newcomp; + return MPI_SUCCESS; +} + +int ompi_intercomm_create (ompi_communicator_t *local_comm, int local_leader, ompi_communicator_t *bridge_comm, + int remote_leader, int tag, ompi_communicator_t **newintercomm) +{ + int local_size = 0, local_rank = 0, lleader = 0, rleader = 0, rc, rsize = 0; + struct ompi_proc_t **rprocs; + ompi_communicator_t *newcomp; + ompi_group_t *new_group_pointer; + + *newintercomm = MPI_COMM_NULL; + + local_size = ompi_comm_size ( local_comm ); + local_rank = ompi_comm_rank ( local_comm ); + lleader = local_leader; + rleader = remote_leader; + + if ( MPI_PARAM_CHECK ) { + if ( (0 > local_leader) || (local_leader >= local_size) ) { + return OMPI_ERR_BAD_PARAM; + } + + /* remember that the remote_leader and bridge_comm arguments + just have to be valid at the local_leader */ + if ( local_rank == local_leader ) { + if (ompi_comm_invalid (bridge_comm) || (bridge_comm->c_flags & OMPI_COMM_INTER)) { + return MPI_ERR_COMM; + } + + if ((remote_leader < 0) || (remote_leader >= ompi_comm_size(bridge_comm))) { + return OMPI_ERR_BAD_PARAM; + } + } /* if ( local_rank == local_leader ) */ + } + + if (local_rank == local_leader) { + MPI_Request req; + + /* local leader exchange group sizes lists */ + rc = MCA_PML_CALL(irecv (&rsize, 1, MPI_INT, rleader, tag, bridge_comm, &req)); + if ( rc != MPI_SUCCESS ) { + return rc; + } + rc = MCA_PML_CALL(send (&local_size, 1, MPI_INT, rleader, tag, + MCA_PML_BASE_SEND_STANDARD, bridge_comm)); + if ( rc != MPI_SUCCESS ) { + return rc; + } + rc = ompi_request_wait (&req, MPI_STATUS_IGNORE); + if ( rc != MPI_SUCCESS ) { + return rc; + } + } + + /* bcast size and list of remote processes to all processes in local_comm */ + rc = local_comm->c_coll->coll_bcast (&rsize, 1, MPI_INT, lleader, local_comm, + local_comm->c_coll->coll_bcast_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + rc = ompi_comm_get_rprocs (local_comm, bridge_comm, lleader, remote_leader, tag, rsize, &rprocs); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + /* put group elements in the list */ + new_group_pointer = ompi_group_allocate_plist_w_procs (rprocs, rsize); + if (OPAL_UNLIKELY(NULL == new_group_pointer)) { + free (rprocs); + return MPI_ERR_GROUP; + } + + if (MPI_PARAM_CHECK) { + bool overlap = ompi_group_overlap (local_comm->c_local_group, new_group_pointer); + if (overlap && MPI_THREAD_MULTIPLE != ompi_mpi_thread_provided) { + ompi_group_free (&new_group_pointer); + return OMPI_ERR_BAD_PARAM; + } + } + + rc = ompi_comm_set (&newcomp, /* new comm */ + local_comm, /* old comm */ + local_comm->c_local_group->grp_proc_count, /* local_size */ + NULL, /* local_procs*/ + rsize, /* remote_size */ + NULL, /* remote_procs */ + NULL, /* attrs */ + local_comm->error_handler, /* error handler*/ + local_comm->c_local_group, /* local group */ + new_group_pointer, /* remote group */ + 0); /* flags */ + + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_group_free (&new_group_pointer); + return rc; + } + + /* Determine context id. It is identical to f_2_c_handle */ + rc = ompi_comm_nextcid (newcomp, local_comm, bridge_comm, &lleader, + &rleader, false, OMPI_COMM_CID_INTRA_BRIDGE); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&newcomp); + return rc; + } + + /* activate comm and init coll-module */ + rc = ompi_comm_activate (&newcomp, local_comm, bridge_comm, &lleader, &rleader, + false, OMPI_COMM_CID_INTRA_BRIDGE); + if ( MPI_SUCCESS != rc ) { + ompi_comm_free (&newcomp); + return rc; + } + + *newintercomm = newcomp; + + return OMPI_SUCCESS; +} + +int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_leader, + ompi_group_t *remote_group, int remote_leader, const char *tag, + opal_info_t *info, ompi_errhandler_t *errhandler, + ompi_communicator_t **newintercomm) +{ + ompi_communicator_t *newcomp = NULL, *local_comm, *leader_comm = MPI_COMM_NULL; + ompi_comm_extended_cid_block_t new_block; + bool i_am_leader = local_leader == local_group->grp_my_rank; + ompi_proc_t **rprocs; + uint64_t data[4]; + int leader_comm_remote_leader; + char *sub_tag = NULL; + size_t rsize; + int rc; + + *newintercomm = MPI_COMM_NULL; + + /* create a local communicator first. create a unique tag for this communicator */ + asprintf (&sub_tag, "%s-OMPIi-%s", tag, OPAL_NAME_PRINT(ompi_group_get_proc_name (local_group, local_leader))); + if (OPAL_UNLIKELY(NULL == sub_tag)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + rc = ompi_comm_create_from_group (local_group, sub_tag, info, errhandler, &local_comm); + free (sub_tag); + sub_tag = NULL; + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + if (i_am_leader) { + /* create a bridge communicator for the leaders (so we can use the existing collectives + * for activation). there are probably more efficient ways to do this but for intercommunicator + * creation is not considered a performance critical operation. */ + ompi_proc_t **leader_procs, *my_proc; + ompi_group_t *leader_group; + + leader_procs = calloc (2, sizeof (*leader_procs)); + + my_proc = leader_procs[0] = ompi_group_get_proc_ptr (local_group, local_leader, true); + leader_procs[1] = ompi_group_get_proc_ptr (remote_group, remote_leader, true); + + if (leader_procs[0] != leader_procs[1]) { + /* NTH: they are definitely different (can the ever be the same) */ + if (leader_procs[0]->super.proc_name.jobid > leader_procs[1]->super.proc_name.jobid || + (leader_procs[0]->super.proc_name.jobid == leader_procs[1]->super.proc_name.jobid && + leader_procs[0]->super.proc_name.vpid > leader_procs[1]->super.proc_name.vpid)) { + ompi_proc_t *tmp = leader_procs[0]; + leader_procs[0] = leader_procs[1]; + leader_procs[1] = tmp; + } + + /* create a unique tag for allocating the leader communicator. we can eliminate this step + * if we take a CID from the newly allocated block belonging to local_comm. this is + * a note to make this change at a later time. */ + asprintf (&sub_tag, "%s-OMPIi-LC", tag); + if (OPAL_UNLIKELY(NULL == sub_tag)) { + ompi_comm_free (&local_comm); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + leader_group = ompi_group_allocate_plist_w_procs (leader_procs, 2); + ompi_set_group_rank (leader_group, my_proc); + if (OPAL_UNLIKELY(NULL == leader_group)) { + free (sub_tag); + ompi_comm_free (&local_comm); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* remote leader is whichever rank I am not */ + leader_comm_remote_leader = !(leader_group->grp_my_rank); + + rc = ompi_comm_create_from_group (leader_group, sub_tag, info, errhandler, &leader_comm); + OBJ_RELEASE(leader_group); + free (sub_tag); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&local_comm); + return rc; + } + + /* grab a CID for the intercomm while we are at it */ + ompi_comm_extended_cid_block_new (&leader_comm->c_contextidb, &new_block, false); + + data[0] = remote_group->grp_proc_count; + /* store the relevant new_block data */ + data[1] = new_block.block_cid.cid_base; + data[2] = new_block.block_cid.cid_sub.u64; + data[3] = new_block.block_level; + } else { + free (leader_procs); + } + + rsize = remote_group->grp_proc_count; + } + + /* bcast size and list of remote processes to all processes in local_comm */ + rc = local_comm->c_coll->coll_bcast (data, 4, MPI_UINT64_T, local_leader, local_comm, + local_comm->c_coll->coll_bcast_module); + rsize = data[0]; + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + ompi_comm_free (&local_comm); + return rc; + } + + /* using 0 for the tag because we control both local_comm and leader_comm */ + rc = ompi_comm_get_rprocs (local_comm, leader_comm, local_leader, leader_comm_remote_leader, 0, rsize, &rprocs); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&local_comm); + return rc; + } + + if (!i_am_leader) { + /* create a new group containing the remote processes for non-leader ranks */ + remote_group = ompi_group_allocate_plist_w_procs (rprocs, rsize); + if (OPAL_UNLIKELY(NULL == remote_group)) { + free (rprocs); + ompi_comm_free (&local_comm); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } else { + OBJ_RETAIN(remote_group); + } + + rc = ompi_comm_set (&newcomp, local_comm, local_group->grp_proc_count, NULL, remote_group->grp_proc_count, + NULL, NULL, errhandler, local_group, remote_group, OMPI_COMM_SET_FLAG_LOCAL_COMM_NODUP); + OBJ_RELEASE(remote_group); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&local_comm); + return rc; + } + + /* will be using a communicator ID derived from the bridge communicator to save some time */ + new_block.block_cid.cid_base = data[1]; + new_block.block_cid.cid_sub.u64 = data[2]; + new_block.block_nextsub = 0; + new_block.block_nexttag = 0; + new_block.block_level = (int8_t) data[3]; + + rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) tag, &new_block, false, OMPI_COMM_CID_GROUP_NEW); + if ( OMPI_SUCCESS != rc ) { + OBJ_RELEASE(newcomp); + return rc; + } + + /* Set name for debugging purposes */ + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI INTERCOMM %s FROM GROUP", ompi_comm_print_cid (newcomp)); + + // Copy info if there is one. + newcomp->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(newcomp->super.s_info)); + } + + /* activate communicator and init coll-module */ + rc = ompi_comm_activate (&newcomp, local_comm, leader_comm, &local_leader, &leader_comm_remote_leader, + false, OMPI_COMM_CID_INTRA_BRIDGE); + if (MPI_COMM_NULL != leader_comm) { + ompi_comm_free (&leader_comm); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&newcomp); + return rc; + } + + *newintercomm = newcomp; + + return MPI_SUCCESS; +} + /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ @@ -1255,10 +1632,14 @@ int ompi_comm_compare(ompi_communicator_t *comm1, ompi_communicator_t *comm2, in int lresult, rresult=MPI_CONGRUENT; int cmp_result; + if (comm1->instance != comm2->instance) { + return OMPI_ERR_BAD_PARAM; + } + comp1 = (ompi_communicator_t *) comm1; comp2 = (ompi_communicator_t *) comm2; - if ( comp1->c_contextid == comp2->c_contextid ) { + if (ompi_comm_compare_cids(comp1,comp2)) { *result = MPI_IDENT; return MPI_SUCCESS; } @@ -1452,7 +1833,7 @@ static int ompi_comm_allgather_emulate_intra( void *inbuf, int incount, int ompi_comm_free( ompi_communicator_t **comm ) { int ret; - int cid = (*comm)->c_contextid; + int cid = (*comm)->c_index; int is_extra_retain = OMPI_COMM_IS_EXTRA_RETAIN(*comm); /* Release attributes. We do this now instead of during the @@ -1535,13 +1916,13 @@ int ompi_comm_free( ompi_communicator_t **comm ) /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -int ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, - ompi_communicator_t *bridge_comm, - int local_leader, - int remote_leader, - int tag, - int rsize, - ompi_proc_t ***prprocs ) +/** + * This is a short-hand routine used in intercomm_create. + * The routine makes sure, that all processes have afterwards + * a list of ompi_proc_t pointers for the remote group. + */ +int ompi_comm_get_rprocs (ompi_communicator_t *local_comm, ompi_communicator_t *bridge_comm, + int local_leader, int remote_leader, int tag, int rsize, ompi_proc_t ***prprocs) { MPI_Request req; int rc = OMPI_SUCCESS; @@ -1731,31 +2112,6 @@ int ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -/** - * This routine verifies, whether local_group and remote group are overlapping - * in intercomm_create - */ -int ompi_comm_overlapping_groups (int size, ompi_proc_t **lprocs, - int rsize, ompi_proc_t ** rprocs) - -{ - int rc=OMPI_SUCCESS; - int i,j; - - for (i=0; ic_contextid); + opal_output(0, "Dumping information for comm_cid %s\n", ompi_comm_print_cid (comm)); opal_output(0," f2c index:%d cube_dim: %d\n", comm->c_f_to_c_index, comm->c_cube_dim); opal_output(0," Local group: size = %d my_rank = %d\n", @@ -2012,8 +2368,8 @@ static int ompi_comm_fill_rest(ompi_communicator_t *comm, /* there is no cid at this stage ... make this right and make edgars * code call this function and remove dupli cde */ - snprintf (comm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMMUNICATOR %d", - comm->c_contextid); + snprintf (comm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMMUNICATOR %s", + ompi_comm_print_cid (comm)); /* determine the cube dimensions */ comm->c_cube_dim = opal_cube_dim(comm->c_local_group->grp_proc_count); @@ -2032,3 +2388,28 @@ static int ompi_comm_copy_topo(ompi_communicator_t *oldcomm, newcomm->c_flags |= newcomm->c_topo->type; return OMPI_SUCCESS; } + +char *ompi_comm_print_cid (const ompi_communicator_t *comm) +{ +#if OPAL_HAVE_THREAD_LOCAL + static opal_thread_local char cid_buffer[2][20]; + static opal_thread_local int cid_buffer_index = 0; +#else + /* no thread local == you get what you get. upgrade your compiler */ + static char cid_buffer[2][20]; + static int cid_buffer_index = 0; +#endif + int bindex = cid_buffer_index; + + if (mca_pml_base_supports_extended_cid () && !OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + snprintf (cid_buffer[bindex], sizeof (cid_buffer[0]), "0x%" PRIx64 "%08" PRIx64, + comm->c_contextid.cid_base, + comm->c_contextid.cid_sub.u64); + } else { + snprintf (cid_buffer[bindex], sizeof (cid_buffer[0]), "%d", comm->c_index); + } + + cid_buffer_index = cid_buffer_index ? 0 : 1; + + return cid_buffer[bindex]; +} diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index 9015f26bbeb..4481c5c5dad 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -24,6 +24,8 @@ * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2020-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,10 +33,13 @@ * $HEADER$ */ + #include "ompi_config.h" #include "opal/mca/pmix/base/base.h" +#include "opal/mca/pmix/pmix-internal.h" #include "opal/util/printf.h" +#include "opal/util/show_help.h" #include "ompi/proc/proc.h" #include "ompi/communicator/communicator.h" @@ -44,9 +49,19 @@ #include "opal/class/opal_list.h" #include "ompi/mca/pml/pml.h" #include "ompi/runtime/ompi_rte.h" +#include "ompi/mca/pml/base/base.h" #include "ompi/mca/coll/base/base.h" #include "ompi/request/request.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/runtime/ompi_rte.h" + +#include "pmix.h" + +/* for use when we don't have a PMIx that supports CID generation */ +opal_atomic_int64_t ompi_comm_next_base_cid = 1; + +/* A macro comparing two CIDs */ +#define OMPI_COMM_CID_IS_LOWER(comm1,comm2) ( ((comm1)->c_index < (comm2)->c_index)? 1:0) struct ompi_comm_cid_context_t; @@ -216,6 +231,7 @@ static ompi_comm_cid_context_t *mca_comm_cid_context_alloc (ompi_communicator_t context->allreduce_fn = ompi_comm_allreduce_inter_nb; break; case OMPI_COMM_CID_GROUP: + case OMPI_COMM_CID_GROUP_NEW: context->allreduce_fn = ompi_comm_allreduce_group_nb; context->pml_tag = ((int *) arg0)[0]; break; @@ -287,6 +303,118 @@ static volatile int64_t ompi_comm_cid_lowest_id = INT64_MAX; static int ompi_comm_cid_epoch = INT_MAX; #endif /* OPAL_ENABLE_FT_MPI */ +static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_communicator_t *comm, + ompi_comm_extended_cid_block_t *new_block, + const void *arg0, const void *arg1, bool send_first, int mode, + ompi_request_t **req) +{ + pmix_info_t pinfo, *results = NULL; + size_t nresults; + opal_process_name_t *name_array; + char *tag = NULL; + size_t proc_count, cid_base = 0UL; + int rc, leader_rank; + pmix_proc_t *procs; + + rc = ompi_group_to_proc_name_array (newcomm->c_local_group, &name_array, &proc_count); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + switch (mode) { + case OMPI_COMM_CID_GROUP_NEW: + tag = (char *) arg0; + break; + case OMPI_COMM_CID_GROUP: + ompi_group_translate_ranks (newcomm->c_local_group, 1, &(int){0}, + comm->c_local_group, &leader_rank); + + tag = ompi_comm_extended_cid_get_unique_tag (&comm->c_contextidb, *((int *) arg0), leader_rank); + break; + case OMPI_COMM_CID_INTRA: + tag = ompi_comm_extended_cid_get_unique_tag (&comm->c_contextidb, -1, 0); + break; + } + + PMIX_INFO_LOAD(&pinfo, PMIX_GROUP_ASSIGN_CONTEXT_ID, NULL, PMIX_BOOL); + + PMIX_PROC_CREATE(procs, proc_count); + for (size_t i = 0 ; i < proc_count; ++i) { + OPAL_PMIX_CONVERT_NAME(&procs[i],&name_array[i]); + } + + rc = PMIx_Group_construct(tag, procs, proc_count, &pinfo, 1, &results, &nresults); + PMIX_INFO_DESTRUCT(&pinfo); + + if (NULL != results) { + PMIX_VALUE_GET_NUMBER(rc, &results[0].value, cid_base, size_t); + PMIX_INFO_FREE(results, nresults); + } + + PMIX_PROC_FREE(procs, proc_count); + free (name_array); + + rc = PMIx_Group_destruct (tag, NULL, 0); + + ompi_comm_extended_cid_block_initialize (new_block, cid_base, 0, 0); + + return OMPI_SUCCESS; +} + +static int ompi_comm_nextcid_ext_nb (ompi_communicator_t *newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, + bool send_first, int mode, ompi_request_t **req) +{ + ompi_comm_extended_cid_block_t *block; + bool is_new_block = false; + int rc; + + if (OMPI_COMM_CID_GROUP == mode || OMPI_COMM_CID_GROUP_NEW == mode) { + /* new block belongs to the new communicator */ + block = &newcomm->c_contextidb; + } else { + block = &comm->c_contextidb; + } + + if (NULL == arg1) { + if (OMPI_COMM_CID_GROUP == mode || OMPI_COMM_CID_GROUP_NEW == mode || + !ompi_comm_extended_cid_block_available (&comm->c_contextidb)) { + /* need a new block. it will be either assigned the the new communicator (MPI_Comm_create*_group) + * or the parent (which has no more CIDs in its block) */ + rc = ompi_comm_ext_cid_new_block (newcomm, comm, block, arg0, arg1, send_first, mode, req); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + is_new_block = true; + } + } else { + /* got a block already */ + *block = *((ompi_comm_extended_cid_block_t *) arg1); + is_new_block = true; + } + + if (block != &newcomm->c_contextidb) { + (void) ompi_comm_extended_cid_block_new (block, &newcomm->c_contextidb, is_new_block); + } + + for (unsigned int i = ompi_mpi_communicators.lowest_free ; i < mca_pml.pml_max_contextid ; ++i) { + bool flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i, newcomm); + if (true == flag) { + newcomm->c_index = i; + break; + } + } + + newcomm->c_contextid = newcomm->c_contextidb.block_cid; + + opal_hash_table_set_value_ptr (&ompi_comm_hash, &newcomm->c_contextid, + sizeof (newcomm->c_contextid), (void *) newcomm); + *req = &ompi_request_empty; + /* nothing more to do here */ + return OMPI_SUCCESS; +} + int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *comm, ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, bool send_first, int mode, ompi_request_t **req) @@ -294,6 +422,29 @@ int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *com ompi_comm_cid_context_t *context; ompi_comm_request_t *request; + if (mca_pml_base_supports_extended_cid() && NULL == comm) { + return ompi_comm_nextcid_ext_nb (newcomm, comm, bridgecomm, arg0, arg1, send_first, mode, req); + } + + /* old CID algorighm */ + + /* if we got here and comm is NULL then that means the app is invoking MPI-4 Sessions or later + functions but the pml does not support these functions so return not supported */ + if (NULL == comm) { + char msg_string[1024]; + sprintf(msg_string,"The PML being used - %s - does not support MPI sessions related features", + mca_pml_base_selected_component.pmlm_version.mca_component_name); + opal_show_help("help-comm.txt", + "MPI function not supported", + true, + "MPI_Comm_from_group/MPI_Intercomm_from_groups", + msg_string); + + return MPI_ERR_UNSUPPORTED_OPERATION; + } + + newcomm->c_flags |= OMPI_COMM_GLOBAL_INDEX; + context = mca_comm_cid_context_alloc (newcomm, comm, bridgecomm, arg0, arg1, "nextcid", send_first, mode); if (NULL == context) { @@ -332,9 +483,11 @@ int ompi_comm_nextcid (ompi_communicator_t *newcomm, ompi_communicator_t *comm, return rc; } - ompi_request_wait_completion (req); - rc = req->req_status.MPI_ERROR; - ompi_comm_request_return ((ompi_comm_request_t *) req); + if (&ompi_request_empty != req) { + ompi_request_wait_completion (req); + rc = req->req_status.MPI_ERROR; + ompi_comm_request_return ((ompi_comm_request_t *) req); + } return rc; } @@ -342,7 +495,7 @@ int ompi_comm_nextcid (ompi_communicator_t *newcomm, ompi_communicator_t *comm, static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; - int64_t my_id = ((int64_t) ompi_comm_get_cid (context->comm) << 32 | context->pml_tag); + int64_t my_id = ((int64_t) ompi_comm_get_local_cid (context->comm) << 32 | context->pml_tag); ompi_request_t *subreq; bool flag = false; int ret = OMPI_SUCCESS; @@ -503,11 +656,16 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) } /* set the according values to the newcomm */ - context->newcomm->c_contextid = context->nextcid; #if OPAL_ENABLE_FT_MPI context->newcomm->c_epoch = INT_MAX - context->rflag; /* reorder for simpler debugging */ ompi_comm_cid_epoch -= 1; /* protected by the cid_lock */ #endif /* OPAL_ENABLE_FT_MPI */ + context->newcomm->c_index = context->nextcid; + + /* to simplify coding always set the global CID even if it isn't used by the + * active PML */ + context->newcomm->c_contextid.cid_base = 0; + context->newcomm->c_contextid.cid_sub.u64 = context->nextcid; opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, context->newcomm); /* unlock the cid generator */ @@ -553,6 +711,74 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) /* Non-blocking version of ompi_comm_activate */ static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request); +static int ompi_comm_activate_complete (ompi_communicator_t **newcomm, ompi_communicator_t *comm) +{ + int ret; + + /** + * Check to see if this process is in the new communicator. + * + * Specifically, this function is invoked by all proceses in the + * old communicator, regardless of whether they are in the new + * communicator or not. This is because it is far simpler to use + * MPI collective functions on the old communicator to determine + * some data for the new communicator (e.g., remote_leader) than + * to kludge up our own pseudo-collective routines over just the + * processes in the new communicator. Hence, *all* processes in + * the old communicator need to invoke this function. + * + * That being said, only processes in the new communicator need to + * select a coll module for the new communicator. More + * specifically, proceses who are not in the new communicator + * should *not* select a coll module -- for example, + * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who + * are not in the new communicator. This can cause errors in the + * selection / initialization of a coll module. Plus, it's + * wasteful -- processes in the new communicator will end up + * freeing the new communicator anyway, so we might as well leave + * the coll selection as NULL (the coll base comm unselect code + * handles that case properly). + */ + if (MPI_UNDEFINED == (*newcomm)->c_local_group->grp_my_rank) { + return OMPI_SUCCESS; + } + + /* Let the collectives components fight over who will do + collective on this new comm. */ + if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(*newcomm))) { + OBJ_RELEASE(*newcomm); + *newcomm = MPI_COMM_NULL; + return ret; + } + + /* For an inter communicator, we have to deal with the potential + * problem of what is happening if the local_comm that we created + * has a lower CID than the parent comm. This is not a problem + * as long as the user calls MPI_Comm_free on the inter communicator. + * However, if the communicators are not freed by the user but released + * by Open MPI in MPI_Finalize, we walk through the list of still available + * communicators and free them one by one. Thus, local_comm is freed before + * the actual inter-communicator. However, the local_comm pointer in the + * inter communicator will still contain the 'previous' address of the local_comm + * and thus this will lead to a segmentation violation. In order to prevent + * that from happening, we increase the reference counter local_comm + * by one if its CID is lower than the parent. We cannot increase however + * its reference counter if the CID of local_comm is larger than + * the CID of the inter communicators, since a regular MPI_Comm_free would + * leave in that the case the local_comm hanging around and thus we would not + * recycle CID's properly, which was the reason and the cause for this trouble. + */ + if (OMPI_COMM_IS_INTER(*newcomm)) { + if (OMPI_COMM_CID_IS_LOWER(*newcomm, comm)) { + OMPI_COMM_SET_EXTRA_RETAIN (*newcomm); + OBJ_RETAIN (*newcomm); + } + } + + /* done */ + return OMPI_SUCCESS; +} + int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *comm, ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, bool send_first, int mode, ompi_request_t **req) @@ -562,6 +788,8 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c ompi_request_t *subreq; int ret = 0; + /* the caller should not pass NULL for comm (it may be the same as *newcomm) */ + assert (NULL != comm); context = mca_comm_cid_context_alloc (*newcomm, comm, bridgecomm, arg0, arg1, "activate", send_first, mode); if (NULL == context) { @@ -605,7 +833,7 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c *req = &request->super; - return OMPI_SUCCESS; + return ret; } int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm, @@ -620,9 +848,11 @@ int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm return rc; } - ompi_request_wait_completion (req); - rc = req->req_status.MPI_ERROR; - ompi_comm_request_return ((ompi_comm_request_t *) req); + if (&ompi_request_empty != req) { + ompi_request_wait_completion (req); + rc = req->req_status.MPI_ERROR; + ompi_comm_request_return ((ompi_comm_request_t *) req); + } return rc; } @@ -630,70 +860,7 @@ int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; - int ret; - - /** - * Check to see if this process is in the new communicator. - * - * Specifically, this function is invoked by all proceses in the - * old communicator, regardless of whether they are in the new - * communicator or not. This is because it is far simpler to use - * MPI collective functions on the old communicator to determine - * some data for the new communicator (e.g., remote_leader) than - * to kludge up our own pseudo-collective routines over just the - * processes in the new communicator. Hence, *all* processes in - * the old communicator need to invoke this function. - * - * That being said, only processes in the new communicator need to - * select a coll module for the new communicator. More - * specifically, proceses who are not in the new communicator - * should *not* select a coll module -- for example, - * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who - * are not in the new communicator. This can cause errors in the - * selection / initialization of a coll module. Plus, it's - * wasteful -- processes in the new communicator will end up - * freeing the new communicator anyway, so we might as well leave - * the coll selection as NULL (the coll base comm unselect code - * handles that case properly). - */ - if (MPI_UNDEFINED == (context->newcomm)->c_local_group->grp_my_rank) { - return OMPI_SUCCESS; - } - - /* Let the collectives components fight over who will do - collective on this new comm. */ - if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(context->newcomm))) { - OBJ_RELEASE(context->newcomm); - *context->newcommp = MPI_COMM_NULL; - return ret; - } - - /* For an inter communicator, we have to deal with the potential - * problem of what is happening if the local_comm that we created - * has a lower CID than the parent comm. This is not a problem - * as long as the user calls MPI_Comm_free on the inter communicator. - * However, if the communicators are not freed by the user but released - * by Open MPI in MPI_Finalize, we walk through the list of still available - * communicators and free them one by one. Thus, local_comm is freed before - * the actual inter-communicator. However, the local_comm pointer in the - * inter communicator will still contain the 'previous' address of the local_comm - * and thus this will lead to a segmentation violation. In order to prevent - * that from happening, we increase the reference counter local_comm - * by one if its CID is lower than the parent. We cannot increase however - * its reference counter if the CID of local_comm is larger than - * the CID of the inter communicators, since a regular MPI_Comm_free would - * leave in that the case the local_comm hanging around and thus we would not - * recycle CID's properly, which was the reason and the cause for this trouble. - */ - if (OMPI_COMM_IS_INTER(context->newcomm)) { - if (OMPI_COMM_CID_IS_LOWER(context->newcomm, context->comm)) { - OMPI_COMM_SET_EXTRA_RETAIN (context->newcomm); - OBJ_RETAIN (context->newcomm); - } - } - - /* done */ - return OMPI_SUCCESS; + return ompi_comm_activate_complete (context->newcommp, context->comm); } /**************************************************************************/ diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index bdd3499a801..54b2a81f12e 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -23,6 +23,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,6 +50,7 @@ #include "ompi/attribute/attribute.h" #include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" +#include "ompi/instance/instance.h" /* ** Table for Fortran <-> C communicator handle conversion @@ -57,12 +60,15 @@ */ opal_pointer_array_t ompi_mpi_communicators = {{0}}; opal_pointer_array_t ompi_comm_f_to_c_table = {{0}}; +opal_hash_table_t ompi_comm_hash = {{0}}; ompi_predefined_communicator_t ompi_mpi_comm_world = {{{{0}}}}; ompi_predefined_communicator_t ompi_mpi_comm_self = {{{{0}}}}; ompi_predefined_communicator_t ompi_mpi_comm_null = {{{{0}}}}; ompi_communicator_t *ompi_mpi_comm_parent = NULL; +static bool ompi_comm_intrinsic_init; + ompi_predefined_communicator_t *ompi_mpi_comm_world_addr = &ompi_mpi_comm_world; ompi_predefined_communicator_t *ompi_mpi_comm_self_addr = @@ -82,14 +88,13 @@ OBJ_CLASS_INSTANCE(ompi_communicator_t, opal_infosubscriber_t, shortcut for finalize and abort. */ int ompi_comm_num_dyncomm=0; +static int ompi_comm_finalize (void); + /* * Initialize comm world/self/null/parent. */ int ompi_comm_init(void) { - ompi_group_t *group; - size_t size; - /* Setup communicator array */ OBJ_CONSTRUCT(&ompi_mpi_communicators, opal_pointer_array_t); if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_communicators, 16, @@ -97,46 +102,108 @@ int ompi_comm_init(void) return OMPI_ERROR; } + OBJ_CONSTRUCT(&ompi_comm_hash, opal_hash_table_t); + if (OPAL_SUCCESS != opal_hash_table_init (&ompi_comm_hash, 1024)) { + return OMPI_ERROR; + } + /* Setup f to c table (we can no longer use the cid as the fortran handle) */ OBJ_CONSTRUCT(&ompi_comm_f_to_c_table, opal_pointer_array_t); - if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_comm_f_to_c_table, 8, - OMPI_FORTRAN_HANDLE_MAX, 32) ) { + if( OPAL_SUCCESS != opal_pointer_array_init (&ompi_comm_f_to_c_table, 8, + OMPI_FORTRAN_HANDLE_MAX, 32) ) { + return OMPI_ERROR; + } + + /* + * reserve indices in the F to C table for: + * MPI_COMM_WORLD + * MPI_COMM_SELF + * MPI_COMM_NULL + */ + + if (OPAL_SUCCESS != opal_pointer_array_set_item(&ompi_comm_f_to_c_table, + 0, + (void *)-1L)) { return OMPI_ERROR; } + if (OPAL_SUCCESS != opal_pointer_array_set_item(&ompi_comm_f_to_c_table, + 1, + (void *)-1L)) { + return OMPI_ERROR; + } + + if (OPAL_SUCCESS != opal_pointer_array_set_item(&ompi_comm_f_to_c_table, + 2, + (void *)-1L)) { + return OMPI_ERROR; + } + + /* Setup MPI_COMM_NULL */ + OBJ_CONSTRUCT(&ompi_mpi_comm_null, ompi_communicator_t); + assert(ompi_mpi_comm_null.comm.c_f_to_c_index == 2); + ompi_mpi_comm_null.comm.c_local_group = &ompi_mpi_group_null.group; + ompi_mpi_comm_null.comm.c_remote_group = &ompi_mpi_group_null.group; + OBJ_RETAIN(&ompi_mpi_group_null.group); + OBJ_RETAIN(&ompi_mpi_group_null.group); + + (void) ompi_comm_extended_cid_block_new (&ompi_mpi_comm_world.comm.c_contextidb, + &ompi_mpi_comm_null.comm.c_contextidb, false); + ompi_mpi_comm_null.comm.c_contextid = ompi_mpi_comm_null.comm.c_contextidb.block_cid; + ompi_mpi_comm_null.comm.c_index = 2; + ompi_mpi_comm_null.comm.c_my_rank = MPI_PROC_NULL; + + ompi_mpi_comm_null.comm.error_handler = &ompi_mpi_errors_are_fatal.eh; + OBJ_RETAIN( &ompi_mpi_errors_are_fatal.eh ); + opal_pointer_array_set_item (&ompi_mpi_communicators, 2, &ompi_mpi_comm_null); + + opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL", + sizeof(ompi_mpi_comm_null.comm.c_name)); + ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | + OMPI_COMM_GLOBAL_INDEX; + + /* Initialize the parent communicator to MPI_COMM_NULL */ + ompi_mpi_comm_parent = &ompi_mpi_comm_null.comm; + OBJ_RETAIN(&ompi_mpi_comm_null); + OBJ_RETAIN(&ompi_mpi_group_null.group); + + /* initialize communicator requests (for ompi_comm_idup) */ + ompi_comm_request_init (); + + /* get a reference on the attributes subsys */ + ompi_attr_get_ref(); + + ompi_mpi_instance_append_finalize (ompi_comm_finalize); + + return OMPI_SUCCESS; +} + +int ompi_comm_init_mpi3 (void) +{ + ompi_group_t *group; + int ret; + + /* the intrinsic communicators have been initialized */ + ompi_comm_intrinsic_init = true; + /* Setup MPI_COMM_WORLD */ OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t); assert(ompi_mpi_comm_world.comm.c_f_to_c_index == 0); - group = OBJ_NEW(ompi_group_t); - - size = ompi_process_info.num_procs; - group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *)); - group->grp_proc_count = size; - - for (size_t i = 0 ; i < size ; ++i) { - opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid}; - /* look for existing ompi_proc_t that matches this name */ - group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name); - if (NULL == group->grp_proc_pointers[i]) { - /* set sentinel value */ - group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name); - } else { - OBJ_RETAIN (group->grp_proc_pointers[i]); - } + + ret = ompi_group_from_pset (ompi_mpi_instance_default, "mpi://WORLD", &group); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; } OMPI_GROUP_SET_INTRINSIC (group); - OMPI_GROUP_SET_DENSE (group); - ompi_set_group_rank(group, ompi_proc_local()); - - ompi_mpi_comm_world.comm.c_contextid = 0; - ompi_mpi_comm_world.comm.c_id_start_index = 4; - ompi_mpi_comm_world.comm.c_id_available = 4; + ompi_comm_extended_cid_block_initialize (&ompi_mpi_comm_world.comm.c_contextidb, 0, 0, 0); + ompi_mpi_comm_world.comm.c_contextid = ompi_mpi_comm_world.comm.c_contextidb.block_cid; + ompi_mpi_comm_world.comm.c_index = 0; ompi_mpi_comm_world.comm.c_my_rank = group->grp_my_rank; ompi_mpi_comm_world.comm.c_local_group = group; ompi_mpi_comm_world.comm.c_remote_group = group; OBJ_RETAIN(ompi_mpi_comm_world.comm.c_remote_group); - ompi_mpi_comm_world.comm.c_cube_dim = opal_cube_dim((int)size); + ompi_mpi_comm_world.comm.c_cube_dim = opal_cube_dim ((int) group->grp_proc_count); ompi_mpi_comm_world.comm.error_handler = ompi_initial_error_handler_eh; OBJ_RETAIN( ompi_mpi_comm_world.comm.error_handler ); OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world.comm); @@ -144,8 +211,11 @@ int ompi_comm_init(void) opal_string_copy(ompi_mpi_comm_world.comm.c_name, "MPI_COMM_WORLD", sizeof(ompi_mpi_comm_world.comm.c_name)); - ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_NAMEISSET; - ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_INTRINSIC; + ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | + OMPI_COMM_GLOBAL_INDEX; + + /* get a reference on the attributes subsys */ + ompi_attr_get_ref(); /* We have to create a hash (although it is legal to leave this filed NULL -- the attribute accessor functions will intepret @@ -176,16 +246,18 @@ int ompi_comm_init(void) /* Setup MPI_COMM_SELF */ OBJ_CONSTRUCT(&ompi_mpi_comm_self, ompi_communicator_t); assert(ompi_mpi_comm_self.comm.c_f_to_c_index == 1); - group = OBJ_NEW(ompi_group_t); - group->grp_proc_pointers = ompi_proc_self(&size); - group->grp_my_rank = 0; - group->grp_proc_count = (int)size; + + ret = ompi_group_from_pset (ompi_mpi_instance_default, "mpi://SELF", &group); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + OMPI_GROUP_SET_INTRINSIC (group); - OMPI_GROUP_SET_DENSE (group); - ompi_mpi_comm_self.comm.c_contextid = 1; - ompi_mpi_comm_self.comm.c_id_start_index = 20; - ompi_mpi_comm_self.comm.c_id_available = 20; + (void) ompi_comm_extended_cid_block_new (&ompi_mpi_comm_world.comm.c_contextidb, + &ompi_mpi_comm_self.comm.c_contextidb, false); + ompi_mpi_comm_self.comm.c_contextid = ompi_mpi_comm_self.comm.c_contextidb.block_cid; + ompi_mpi_comm_self.comm.c_index = 1; ompi_mpi_comm_self.comm.c_my_rank = group->grp_my_rank; ompi_mpi_comm_self.comm.c_local_group = group; ompi_mpi_comm_self.comm.c_remote_group = group; @@ -197,47 +269,23 @@ int ompi_comm_init(void) opal_string_copy(ompi_mpi_comm_self.comm.c_name, "MPI_COMM_SELF", sizeof(ompi_mpi_comm_self.comm.c_name)); - ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_NAMEISSET; - ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_INTRINSIC; + ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | + OMPI_COMM_GLOBAL_INDEX; /* We can set MPI_COMM_SELF's keyhash to NULL because it has no predefined attributes. If a user defines an attribute on MPI_COMM_SELF, the keyhash will automatically be created. */ ompi_mpi_comm_self.comm.c_keyhash = NULL; - /* Setup MPI_COMM_NULL */ - OBJ_CONSTRUCT(&ompi_mpi_comm_null, ompi_communicator_t); - assert(ompi_mpi_comm_null.comm.c_f_to_c_index == 2); - ompi_mpi_comm_null.comm.c_local_group = &ompi_mpi_group_null.group; - ompi_mpi_comm_null.comm.c_remote_group = &ompi_mpi_group_null.group; - OBJ_RETAIN(&ompi_mpi_group_null.group); - OBJ_RETAIN(&ompi_mpi_group_null.group); + /* + * finally here we set the predefined attribute keyvals + */ + ompi_attr_create_predefined(); - ompi_mpi_comm_null.comm.c_contextid = 2; - ompi_mpi_comm_null.comm.c_my_rank = MPI_PROC_NULL; - - /* unlike world, self, and parent, comm_null does not inherit the initial error - * handler */ - ompi_mpi_comm_null.comm.error_handler = &ompi_mpi_errors_are_fatal.eh; - OBJ_RETAIN( ompi_mpi_comm_null.comm.error_handler ); - opal_pointer_array_set_item (&ompi_mpi_communicators, 2, &ompi_mpi_comm_null); - - opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL", - sizeof(ompi_mpi_comm_null.comm.c_name)); - ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_NAMEISSET; - ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_INTRINSIC; - - /* Initialize the parent communicator to MPI_COMM_NULL */ - ompi_mpi_comm_parent = &ompi_mpi_comm_null.comm; - OBJ_RETAIN(&ompi_mpi_comm_null); - OBJ_RETAIN(&ompi_mpi_group_null.group); OBJ_RETAIN(&ompi_mpi_errors_are_fatal.eh); /* During dyn_init, the comm_parent error handler will be set to the same * as comm_world (thus, the initial error handler). */ - /* initialize communicator requests (for ompi_comm_idup) */ - ompi_comm_request_init (); - return OMPI_SUCCESS; } @@ -268,28 +316,30 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) return new_comm; } -int ompi_comm_finalize(void) +static int ompi_comm_finalize (void) { int max, i; ompi_communicator_t *comm; - /* Shut down MPI_COMM_SELF */ - OBJ_DESTRUCT( &ompi_mpi_comm_self ); - /* disconnect all dynamic communicators */ ompi_dpm_dyn_finalize(); - /* Free the attributes on comm world. This is not done in the - * destructor as we delete attributes in ompi_comm_free (which - * is not called for comm world) */ - if (NULL != ompi_mpi_comm_world.comm.c_keyhash) { - /* Ignore errors when deleting attributes on comm_world */ - (void) ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_world.comm, ompi_mpi_comm_world.comm.c_keyhash); - OBJ_RELEASE(ompi_mpi_comm_world.comm.c_keyhash); - } + if (ompi_comm_intrinsic_init) { + /* tear down MPI-3 predefined communicators (not initialized unless using MPI_Init) */ + /* Free the attributes on comm world. This is not done in the + * destructor as we delete attributes in ompi_comm_free (which + * is not called for comm world) */ + if (NULL != ompi_mpi_comm_world.comm.c_keyhash) { + /* Ignore errors when deleting attributes on comm_world */ + (void) ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_world.comm, ompi_mpi_comm_world.comm.c_keyhash); + OBJ_RELEASE(ompi_mpi_comm_world.comm.c_keyhash); + } - /* Shut down MPI_COMM_WORLD */ - OBJ_DESTRUCT( &ompi_mpi_comm_world ); + /* Shut down MPI_COMM_SELF */ + OBJ_DESTRUCT( &ompi_mpi_comm_self ); + /* Shut down MPI_COMM_WORLD */ + OBJ_DESTRUCT( &ompi_mpi_comm_world ); + } /* Shut down the parent communicator, if it exists */ if( ompi_mpi_comm_parent != &ompi_mpi_comm_null.comm ) { @@ -355,12 +405,14 @@ int ompi_comm_finalize(void) } OBJ_DESTRUCT (&ompi_mpi_communicators); + OBJ_DESTRUCT (&ompi_comm_hash); OBJ_DESTRUCT (&ompi_comm_f_to_c_table); /* finalize communicator requests */ ompi_comm_request_fini (); - return OMPI_SUCCESS; + /* release a reference to the attributes subsys */ + return ompi_attr_put_ref(); } /********************************************************************************/ @@ -370,11 +422,9 @@ int ompi_comm_finalize(void) static void ompi_comm_construct(ompi_communicator_t* comm) { - comm->c_f_to_c_index = opal_pointer_array_add(&ompi_comm_f_to_c_table, comm); + int idx; comm->c_name[0] = '\0'; - comm->c_contextid = MPI_UNDEFINED; - comm->c_id_available = MPI_UNDEFINED; - comm->c_id_start_index = MPI_UNDEFINED; + comm->c_index = MPI_UNDEFINED; comm->c_flags = 0; comm->c_my_rank = 0; comm->c_cube_dim = 0; @@ -385,6 +435,21 @@ static void ompi_comm_construct(ompi_communicator_t* comm) comm->c_topo = NULL; comm->c_coll = NULL; comm->c_nbc_tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE; + comm->instance = NULL; + + /* + * magic numerology - see TOPDIR/ompi/include/mpif-values.pl + */ + idx = (comm == (ompi_communicator_t*)ompi_mpi_comm_world_addr) ? 0 : + (comm == (ompi_communicator_t*)ompi_mpi_comm_self_addr) ? 1 : + (comm == (ompi_communicator_t*)ompi_mpi_comm_null_addr) ? 2 : -1; + if (-1 == idx) { + comm->c_f_to_c_index = opal_pointer_array_add(&ompi_comm_f_to_c_table, + comm); + } else { + opal_pointer_array_set_item(&ompi_comm_f_to_c_table, idx, comm); + comm->c_f_to_c_index = idx; + } /* A keyhash will be created if/when an attribute is cached on this communicator */ @@ -473,11 +538,15 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) #endif /* OPAL_ENABLE_FT_MPI */ /* mark this cid as available */ - if ( MPI_UNDEFINED != (int)comm->c_contextid && + if ( MPI_UNDEFINED != (int)comm->c_index && NULL != opal_pointer_array_get_item(&ompi_mpi_communicators, - comm->c_contextid)) { + comm->c_index)) { opal_pointer_array_set_item ( &ompi_mpi_communicators, - comm->c_contextid, NULL); + comm->c_index, NULL); + if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + opal_hash_table_remove_value_ptr (&ompi_comm_hash, &comm->c_contextid, + sizeof (comm->c_contextid)); + } } /* reset the ompi_comm_f_to_c_table entry */ @@ -509,6 +578,8 @@ OMPI_COMM_SET_INFO_FN(no_any_source, OMPI_COMM_ASSERT_NO_ANY_SOURCE) OMPI_COMM_SET_INFO_FN(no_any_tag, OMPI_COMM_ASSERT_NO_ANY_TAG) OMPI_COMM_SET_INFO_FN(allow_overtake, OMPI_COMM_ASSERT_ALLOW_OVERTAKE) OMPI_COMM_SET_INFO_FN(exact_length, OMPI_COMM_ASSERT_EXACT_LENGTH) +OMPI_COMM_SET_INFO_FN(lazy_barrier, OMPI_COMM_ASSERT_LAZY_BARRIER) +OMPI_COMM_SET_INFO_FN(active_poll, OMPI_COMM_ASSERT_ACTIVE_POLL) void ompi_comm_assert_subscribe (ompi_communicator_t *comm, int32_t assert_flag) { @@ -525,5 +596,11 @@ void ompi_comm_assert_subscribe (ompi_communicator_t *comm, int32_t assert_flag) case OMPI_COMM_ASSERT_EXACT_LENGTH: opal_infosubscribe_subscribe (&comm->super, "mpi_assert_exact_length", "false", ompi_comm_set_exact_length); break; + case OMPI_COMM_ASSERT_LAZY_BARRIER: + opal_infosubscribe_subscribe (&comm->super, "ompi_assert_lazy_barrier", "false", ompi_comm_set_lazy_barrier); + break; + case OMPI_COMM_ASSERT_ACTIVE_POLL: + opal_infosubscribe_subscribe (&comm->super, "ompi_assert_active_poll", "true", ompi_comm_set_active_poll); + break; } } diff --git a/ompi/communicator/comm_request.c b/ompi/communicator/comm_request.c index e1092deb400..876c1f4e4d1 100644 --- a/ompi/communicator/comm_request.c +++ b/ompi/communicator/comm_request.c @@ -8,6 +8,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +32,7 @@ typedef struct ompi_comm_request_item_t { opal_list_item_t super; ompi_comm_request_callback_fn_t callback; ompi_request_t *subreqs[OMPI_COMM_REQUEST_MAX_SUBREQ]; + uint32_t flags; int subreq_count; } ompi_comm_request_item_t; OBJ_CLASS_DECLARATION(ompi_comm_request_item_t); @@ -70,6 +73,12 @@ void ompi_comm_request_fini (void) int ompi_comm_request_schedule_append (ompi_comm_request_t *request, ompi_comm_request_callback_fn_t callback, ompi_request_t *subreqs[], int subreq_count) +{ + return ompi_comm_request_schedule_append_w_flags(request, callback, subreqs, subreq_count, 0); +} + +int ompi_comm_request_schedule_append_w_flags(ompi_comm_request_t *request, ompi_comm_request_callback_fn_t callback, + ompi_request_t *subreqs[], int subreq_count, uint32_t flags) { ompi_comm_request_item_t *request_item; int i; @@ -84,6 +93,7 @@ int ompi_comm_request_schedule_append (ompi_comm_request_t *request, ompi_comm_r } request_item->callback = callback; + request_item->flags = flags; for (i = 0 ; i < subreq_count ; ++i) { request_item->subreqs[i] = subreqs[i]; @@ -125,7 +135,9 @@ static int ompi_comm_request_progress (void) * that it does some subreqs cleanup */ request->super.req_status.MPI_ERROR = subreq->req_status.MPI_ERROR; } - ompi_request_free (&subreq); + if (!(request_item->flags & OMPI_COMM_REQ_FLAG_RETAIN_SUBREQ)) { + ompi_request_free (&subreq); + } request_item->subreq_count--; completed++; } else { @@ -269,6 +281,10 @@ ompi_comm_request_t *ompi_comm_request_get (void) void ompi_comm_request_return (ompi_comm_request_t *request) { + if ((void *) &ompi_request_empty == (void *) request) { + return; + } + if (request->context) { OBJ_RELEASE (request->context); request->context = NULL; diff --git a/ompi/communicator/comm_request.h b/ompi/communicator/comm_request.h index 1c025fc9311..6b11d149252 100644 --- a/ompi/communicator/comm_request.h +++ b/ompi/communicator/comm_request.h @@ -2,6 +2,8 @@ /* * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reseved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,6 +20,9 @@ /* increase this number if more subrequests are needed */ #define OMPI_COMM_REQUEST_MAX_SUBREQ 2 +/* indicate that the caller will free subrequests */ +#define OMPI_COMM_REQ_FLAG_RETAIN_SUBREQ 0x00000001 + typedef struct ompi_comm_request_t { ompi_request_t super; @@ -32,6 +37,8 @@ void ompi_comm_request_init (void); void ompi_comm_request_fini (void); int ompi_comm_request_schedule_append (ompi_comm_request_t *request, ompi_comm_request_callback_fn_t callback, ompi_request_t *subreqs[], int subreq_count); +int ompi_comm_request_schedule_append_w_flags(ompi_comm_request_t *request, ompi_comm_request_callback_fn_t callback, + ompi_request_t *subreqs[], int subreq_count, uint32_t flags); void ompi_comm_request_start (ompi_comm_request_t *request); ompi_comm_request_t *ompi_comm_request_get (void); void ompi_comm_request_return (ompi_comm_request_t *request); diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index d1e82a7dcfd..a90f1f076e0 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -16,12 +16,14 @@ * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Universite Bordeaux 1 - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,6 +49,8 @@ #include "ompi/info/info.h" #include "ompi/proc/proc.h" +#include "opal/util/printf.h" + BEGIN_C_DECLS OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); @@ -63,6 +67,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_PML_ADDED 0x00001000 #define OMPI_COMM_EXTRA_RETAIN 0x00004000 #define OMPI_COMM_MAPBY_NODE 0x00008000 +#define OMPI_COMM_GLOBAL_INDEX 0x00010000 /* some utility #defines */ #define OMPI_COMM_IS_INTER(comm) ((comm)->c_flags & OMPI_COMM_INTER) @@ -80,6 +85,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); OMPI_COMM_IS_GRAPH((comm)) || \ OMPI_COMM_IS_DIST_GRAPH((comm))) #define OMPI_COMM_IS_MAPBY_NODE(comm) ((comm)->c_flags & OMPI_COMM_MAPBY_NODE) +#define OMPI_COMM_IS_GLOBAL_INDEX(comm) ((comm)->c_flags & OMPI_COMM_GLOBAL_INDEX) #define OMPI_COMM_SET_DYNAMIC(comm) ((comm)->c_flags |= OMPI_COMM_DYNAMIC) #define OMPI_COMM_SET_INVALID(comm) ((comm)->c_flags |= OMPI_COMM_INVALID) @@ -92,12 +98,16 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_ASSERT_NO_ANY_SOURCE 0x00000002 #define OMPI_COMM_ASSERT_EXACT_LENGTH 0x00000004 #define OMPI_COMM_ASSERT_ALLOW_OVERTAKE 0x00000008 +#define OMPI_COMM_ASSERT_LAZY_BARRIER 0x00000010 +#define OMPI_COMM_ASSERT_ACTIVE_POLL 0x00000020 #define OMPI_COMM_CHECK_ASSERT(comm, flag) !!((comm)->c_assertions & flag) #define OMPI_COMM_CHECK_ASSERT_NO_ANY_TAG(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ANY_TAG) #define OMPI_COMM_CHECK_ASSERT_NO_ANY_SOURCE(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ANY_SOURCE) #define OMPI_COMM_CHECK_ASSERT_EXACT_LENGTH(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_EXACT_LENGTH) #define OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ALLOW_OVERTAKE) +#define OMPI_COMM_CHECK_ASSERT_LAZY_BARRIER(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_LAZY_BARRIER) +#define OMPI_COMM_CHECK_ASSERT_ACTIVE_POLL(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ACTIVE_POLL) /** * Modes required for acquiring the new comm-id. @@ -111,10 +121,11 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_CID_INTRA_BRIDGE 0x00000080 #define OMPI_COMM_CID_INTRA_PMIX 0x00000100 #define OMPI_COMM_CID_GROUP 0x00000200 +#define OMPI_COMM_CID_GROUP_NEW 0x00000400 #if OPAL_ENABLE_FT_MPI -#define OMPI_COMM_CID_INTRA_FT 0x00000400 -#define OMPI_COMM_CID_INTER_FT 0x00000800 -#define OMPI_COMM_CID_INTRA_PMIX_FT 0x00001000 +#define OMPI_COMM_CID_INTRA_FT 0x00000800 +#define OMPI_COMM_CID_INTER_FT 0x00001000 +#define OMPI_COMM_CID_INTRA_PMIX_FT 0x00002000 #endif /* OPAL_ENABLE_FT_MPI */ /** @@ -125,10 +136,103 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_BLOCK_OTHERS 8 /* A macro comparing two CIDs */ -#define OMPI_COMM_CID_IS_LOWER(comm1,comm2) ( ((comm1)->c_contextid < (comm2)->c_contextid)? 1:0) +#define OMPI_COMM_CID_IS_LOWER(comm1,comm2) ( ((comm1)->c_index < (comm2)->c_index)? 1:0) +OMPI_DECLSPEC extern opal_hash_table_t ompi_comm_hash; OMPI_DECLSPEC extern opal_pointer_array_t ompi_mpi_communicators; OMPI_DECLSPEC extern opal_pointer_array_t ompi_comm_f_to_c_table; + +struct ompi_comm_extended_cid_t { + uint64_t cid_base; + union { + uint64_t u64; + uint8_t u8[8]; + } cid_sub; +}; +typedef struct ompi_comm_extended_cid_t ompi_comm_extended_cid_t; + +struct ompi_comm_extended_cid_block_t { + ompi_comm_extended_cid_t block_cid; + /** can be used to get a unique string tag for pmix context creation */ + uint64_t block_nexttag; + uint8_t block_nextsub; + uint8_t block_level; +}; +typedef struct ompi_comm_extended_cid_block_t ompi_comm_extended_cid_block_t; + +static inline void ompi_comm_extended_cid_block_initialize (ompi_comm_extended_cid_block_t *block, uint64_t cid_base, + uint64_t cid_sub, uint8_t block_level) +{ + block->block_cid.cid_base = cid_base; + block->block_cid.cid_sub.u64 = cid_sub; + block->block_level = block_level; + block->block_nextsub = 0; + block->block_nexttag = 0; +} + +static inline bool ompi_comm_extended_cid_block_available (ompi_comm_extended_cid_block_t *block) +{ + return (4 > block->block_level && 0xff > block->block_nextsub); +} + +static inline char *ompi_comm_extended_cid_get_unique_tag (ompi_comm_extended_cid_block_t *block, int tag, + int leader) +{ + char *id; + + /* create a unique ID for this */ + if (-1 == tag) { + opal_asprintf (&id, "ALL:%" PRIx64 "-%" PRIx64 "-%" PRIx64, block->block_cid.cid_base, + block->block_cid.cid_sub.u64, ++block->block_nexttag); + } else { + opal_asprintf (&id, "GROUP:%" PRIx64 "-%" PRIx64 "-%d-%d", block->block_cid.cid_base, + block->block_cid.cid_sub.u64, tag, leader); + } + + return id; +} + +/** + * Create a new sub-block from an existing block + * + * @param[in] block block + * @param[out] new_block new CID block + * @param[in] use_current use the current CID of the existing block as the base + * + * This function creates a new CID block from an existing block. The use_current flag + * can be used to indicate that the new block should use the existing CID. This can + * be used to assign the first CID in a new block. + */ +static inline int ompi_comm_extended_cid_block_new (ompi_comm_extended_cid_block_t *block, + ompi_comm_extended_cid_block_t *new_block, + bool use_current) +{ + if (!ompi_comm_extended_cid_block_available (block)) { + /* a new block is needed */ + return OMPI_ERR_OUT_OF_RESOURCE; + } + + new_block->block_cid = block->block_cid; + if (!use_current) { + new_block->block_cid.cid_sub.u8[3 - block->block_level] = ++block->block_nextsub; + } + + new_block->block_level = block->block_level + 1; + new_block->block_nextsub = 0; + + return OMPI_SUCCESS; +} + +struct ompi_comm_cid_t { + opal_object_t super; + ompi_group_t cid_group; + ompi_comm_extended_cid_t cid_value; + uint8_t cid_sublevel; +}; +typedef struct ompi_comm_cid_t ompi_comm_cid_t; + +OBJ_CLASS_DECLARATION(ompi_comm_cid_t); + #if OPAL_ENABLE_FT_MPI /** * This array holds the number of time each id has been used. In the case where a communicator @@ -158,12 +262,13 @@ struct ompi_communicator_t { opal_mutex_t c_lock; /* mutex for name and potentially attributes */ char c_name[MPI_MAX_OBJECT_NAME]; - uint32_t c_contextid; - int c_my_rank; - uint32_t c_flags; /* flags, e.g. intercomm, - topology, etc. */ - uint32_t c_assertions; /* info assertions */ - + ompi_comm_extended_cid_t c_contextid; + ompi_comm_extended_cid_block_t c_contextidb; + uint32_t c_index; + int c_my_rank; + uint32_t c_flags; /* flags, e.g. intercomm, + topology, etc. */ + uint32_t c_assertions; /* info assertions */ int c_id_available; /* the currently available Cid for allocation to a child*/ int c_id_start_index; /* the starting index of the block of cids @@ -209,6 +314,9 @@ struct ompi_communicator_t { /* Hooks for PML to hang things */ struct mca_pml_comm_t *c_pml_comm; + /* Hooks for MTL to hang things */ + struct mca_mtl_comm_t *c_mtl_comm; + /* Collectives module interface and data */ mca_coll_base_comm_coll_t *c_coll; @@ -219,6 +327,9 @@ struct ompi_communicator_t { */ opal_atomic_int32_t c_nbc_tag; + /* instance that this comm belongs to */ + ompi_instance_t* instance; + #if OPAL_ENABLE_FT_MPI /** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */ int any_source_offset; @@ -326,7 +437,7 @@ typedef struct ompi_communicator_t ompi_communicator_t; * the PREDEFINED_COMMUNICATOR_PAD macro? * A: Most likely not, but it would be good to check. */ -#define PREDEFINED_COMMUNICATOR_PAD 512 +#define PREDEFINED_COMMUNICATOR_PAD 1024 struct ompi_predefined_communicator_t { struct ompi_communicator_t comm; @@ -372,7 +483,7 @@ OMPI_DECLSPEC extern ompi_predefined_communicator_t *ompi_mpi_comm_null_addr; * ompi_comm_invalid() but also explictily checks to see if the * handle is MPI_COMM_NULL. */ -static inline int ompi_comm_invalid(ompi_communicator_t* comm) +static inline int ompi_comm_invalid (const ompi_communicator_t* comm) { if ((NULL == comm) || (MPI_COMM_NULL == comm) || (OMPI_COMM_IS_FREED(comm)) || (OMPI_COMM_IS_INVALID(comm)) ) @@ -384,7 +495,7 @@ static inline int ompi_comm_invalid(ompi_communicator_t* comm) /** * rank w/in the communicator */ -static inline int ompi_comm_rank(ompi_communicator_t* comm) +static inline int ompi_comm_rank (const ompi_communicator_t* comm) { return comm->c_my_rank; } @@ -392,7 +503,7 @@ static inline int ompi_comm_rank(ompi_communicator_t* comm) /** * size of the communicator */ -static inline int ompi_comm_size(ompi_communicator_t* comm) +static inline int ompi_comm_size (const ompi_communicator_t* comm) { return comm->c_local_group->grp_proc_count; } @@ -401,7 +512,7 @@ static inline int ompi_comm_size(ompi_communicator_t* comm) * size of the remote group for inter-communicators. * returns zero for an intra-communicator */ -static inline int ompi_comm_remote_size(ompi_communicator_t* comm) +static inline int ompi_comm_remote_size (const ompi_communicator_t* comm) { return (comm->c_flags & OMPI_COMM_INTER ? comm->c_remote_group->grp_proc_count : 0); } @@ -410,20 +521,46 @@ static inline int ompi_comm_remote_size(ompi_communicator_t* comm) * Context ID for the communicator, suitable for passing to * ompi_comm_lookup for getting the communicator back */ -static inline uint32_t ompi_comm_get_cid(ompi_communicator_t* comm) +static inline uint32_t ompi_comm_get_local_cid (const ompi_communicator_t* comm) +{ + return comm->c_index; +} + +/** + * Get the extended context ID for the communicator, suitable for passing + * to ompi_comm_lookup_cid for getting the communicator back + */ +static inline ompi_comm_extended_cid_t ompi_comm_get_extended_cid (const ompi_communicator_t *comm) { return comm->c_contextid; } +static inline bool ompi_comm_cid_compare (const ompi_communicator_t *comm, const ompi_comm_extended_cid_t cid) +{ + return comm->c_contextid.cid_base == cid.cid_base && comm->c_contextid.cid_sub.u64 == cid.cid_sub.u64; +} + +static inline bool ompi_comm_compare_cids (const ompi_communicator_t *comm1, const ompi_communicator_t *comm2) +{ + return comm1->c_contextid.cid_base == comm2->c_contextid.cid_base && comm1->c_contextid.cid_sub.u64 == comm2->c_contextid.cid_sub.u64; +} + /* return pointer to communicator associated with context id cid, * No error checking is done*/ -static inline ompi_communicator_t *ompi_comm_lookup(uint32_t cid) +static inline ompi_communicator_t *ompi_comm_lookup (const uint32_t c_index) { /* array of pointers to communicators, indexed by context ID */ - return (ompi_communicator_t*)opal_pointer_array_get_item(&ompi_mpi_communicators, cid); + return (ompi_communicator_t *) opal_pointer_array_get_item (&ompi_mpi_communicators, c_index); } -static inline struct ompi_proc_t* ompi_comm_peer_lookup(ompi_communicator_t* comm, int peer_id) +static inline ompi_communicator_t *ompi_comm_lookup_cid (const ompi_comm_extended_cid_t cid) +{ + ompi_communicator_t *comm = NULL; + (void) opal_hash_table_get_value_ptr (&ompi_comm_hash, &cid, sizeof (cid), (void *) &comm); + return comm; +} + +static inline struct ompi_proc_t* ompi_comm_peer_lookup (const ompi_communicator_t* comm, const int peer_id) { #if OPAL_ENABLE_DEBUG if(peer_id >= comm->c_remote_group->grp_proc_count) { @@ -435,6 +572,11 @@ static inline struct ompi_proc_t* ompi_comm_peer_lookup(ompi_communicator_t* com return ompi_group_peer_lookup(comm->c_remote_group,peer_id); } +static inline bool ompi_comm_instances_same(const ompi_communicator_t *comm1, const ompi_communicator_t *comm2) +{ + return comm1->instance == comm2->instance; +} + #if OPAL_ENABLE_FT_MPI /* * Support for MPI_ANY_SOURCE point-to-point operations @@ -616,7 +758,7 @@ OMPI_DECLSPEC int ompi_comm_revoke_finalize(void); #endif /* OPAL_ENABLE_FT_MPI */ -static inline bool ompi_comm_peer_invalid(ompi_communicator_t* comm, int peer_id) +static inline bool ompi_comm_peer_invalid (const ompi_communicator_t* comm, const int peer_id) { if(peer_id < 0 || peer_id >= comm->c_remote_group->grp_proc_count) { return true; @@ -624,12 +766,18 @@ static inline bool ompi_comm_peer_invalid(ompi_communicator_t* comm, int peer_id return false; } +char *ompi_comm_print_cid (const ompi_communicator_t *comm); /** - * Initialise MPI_COMM_WORLD and MPI_COMM_SELF + * @brief Initialize the communicator subsystem as well as MPI_COMM_NULL. */ int ompi_comm_init(void); +/** + * Initialise MPI_COMM_WORLD and MPI_COMM_SELF + */ +int ompi_comm_init_mpi3 (void); + /** * extract the local group from a communicator */ @@ -641,6 +789,9 @@ OMPI_DECLSPEC int ompi_comm_group (ompi_communicator_t *comm, ompi_group_t **gro int ompi_comm_create (ompi_communicator_t* comm, ompi_group_t *group, ompi_communicator_t** newcomm); +int ompi_comm_create_w_info (ompi_communicator_t *comm, ompi_group_t *group, opal_info_t *info, + ompi_communicator_t **newcomm); + /** * Non-collective create communicator based on a group @@ -648,6 +799,26 @@ int ompi_comm_create (ompi_communicator_t* comm, ompi_group_t *group, int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int tag, ompi_communicator_t **newcomm); +/** + * Non-collective create communicator based on a group with no base communicator + */ +int ompi_comm_create_from_group (ompi_group_t *group, const char *tag, opal_info_t *info, + ompi_errhandler_t *errhandler, ompi_communicator_t **newcomm); + +/** + * create an intercommunicator + */ +int ompi_intercomm_create (ompi_communicator_t *local_comm, int local_leader, ompi_communicator_t *bridge_comm, + int remote_leader, int tag, ompi_communicator_t **newintercomm); + +/** + * Non-collective create intercommunicator based on a group with no base communicator + */ +int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_leader, + ompi_group_t *remote_group, int remote_leader, const char *tag, + opal_info_t *info, ompi_errhandler_t *errhandler, + ompi_communicator_t **newintercomm); + /** * Take an almost complete communicator and reserve the CID as well * as activate it (initialize the collective and the topologies). @@ -821,11 +992,6 @@ OMPI_DECLSPEC int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_commu ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, bool send_first, int mode, ompi_request_t **req); -/** - * shut down the communicator infrastructure. - */ -int ompi_comm_finalize (void); - /** * This is THE routine, where all the communicator stuff * is really set. @@ -841,6 +1007,7 @@ int ompi_comm_finalize (void); * @param[in] copy_topocomponent whether to copy the topology * @param[in] local_group local process group (may be NULL if local_ranks array supplied) * @param[in] remote_group remote process group (may be NULL) + * @param[in] flags flags to control the behavior of ompi_comm_set_nb */ OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, ompi_communicator_t* oldcomm, @@ -850,9 +1017,20 @@ OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, int *remote_ranks, opal_hash_table_t *attr, ompi_errhandler_t *errh, - bool copy_topocomponent, ompi_group_t *local_group, - ompi_group_t *remote_group ); + ompi_group_t *remote_group, + uint32_t flags); + +/** + * @brief Don't duplicate the local communicator. just reference it directly. This + * flag passes ownership to the new communicator. + */ +#define OMPI_COMM_SET_FLAG_LOCAL_COMM_NODUP 0x00000001 + +/** + * @brief Copy the topology from the old communicator + */ +#define OMPI_COMM_SET_FLAG_COPY_TOPOLOGY 0x00000002 /** * This is THE routine, where all the communicator stuff @@ -869,6 +1047,7 @@ OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, * @param[in] copy_topocomponent whether to copy the topology * @param[in] local_group local process group (may be NULL if local_ranks array supplied) * @param[in] remote_group remote process group (may be NULL) + * @param[in] flags flags to control the behavior of ompi_comm_set_nb * @param[out] req ompi_request_t object for tracking completion */ OMPI_DECLSPEC int ompi_comm_set_nb ( ompi_communicator_t **ncomm, @@ -879,30 +1058,10 @@ OMPI_DECLSPEC int ompi_comm_set_nb ( ompi_communicator_t **ncomm, int *remote_ranks, opal_hash_table_t *attr, ompi_errhandler_t *errh, - bool copy_topocomponent, ompi_group_t *local_group, ompi_group_t *remote_group, - ompi_request_t **req ); - -/** - * This is a short-hand routine used in intercomm_create. - * The routine makes sure, that all processes have afterwards - * a list of ompi_proc_t pointers for the remote group. - */ -int ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, - ompi_communicator_t *bridge_comm, - int local_leader, - int remote_leader, - int tag, - int rsize, - struct ompi_proc_t ***prprocs ); - -/** - * This routine verifies, whether local_group and remote group are overlapping - * in intercomm_create - */ -int ompi_comm_overlapping_groups (int size, struct ompi_proc_t ** lprocs, - int rsize, struct ompi_proc_t ** rprocs); + uint32_t flags, + ompi_request_t **req); /** * This is a routine determining whether the local or the diff --git a/ompi/communicator/ft/comm_ft.c b/ompi/communicator/ft/comm_ft.c index 3532954f06b..d74a36ef071 100644 --- a/ompi/communicator/ft/comm_ft.c +++ b/ompi/communicator/ft/comm_ft.c @@ -169,8 +169,8 @@ int ompi_comm_shrink_internal(ompi_communicator_t* comm, ompi_communicator_t** n */ /* --------------------------------------------------------- */ OPAL_OUTPUT_VERBOSE((5, ompi_ftmpi_output_handle, - "%s ompi: comm_shrink: Determine ranking for new communicator", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME) )); + "%s ompi: comm_shrink: Determine ranking for new communicator intra %d", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), OMPI_COMM_IS_INTRA(comm))); start = PMPI_Wtime(); /* Create 'alive' groups */ @@ -198,9 +198,9 @@ int ompi_comm_shrink_internal(ompi_communicator_t* comm, ompi_communicator_t** n NULL, /* remote_ranks */ comm->c_keyhash, /* attrs */ comm->error_handler, /* error handler */ - NULL, /* topo component */ alive_group, /* local group */ - alive_rgroup /* remote group */ + alive_rgroup, /* remote group */ + 0 /* flags */ ); if( OMPI_SUCCESS != ret ) { exit_status = ret; @@ -246,7 +246,8 @@ int ompi_comm_shrink_internal(ompi_communicator_t* comm, ompi_communicator_t** n /* --------------------------------------------------------- */ /* Set name for debugging purposes */ snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SHRUNK FROM %d", - newcomp->c_contextid, comm->c_contextid ); + ompi_comm_get_local_cid(newcomp), + ompi_comm_get_local_cid(comm)); start = PMPI_Wtime(); /* activate communicator and init coll-module */ ret = ompi_comm_activate( &newcomp, /* new communicator */ diff --git a/ompi/communicator/ft/comm_ft_detector.c b/ompi/communicator/ft/comm_ft_detector.c index 03eccda908b..55a15ce16b6 100644 --- a/ompi/communicator/ft/comm_ft_detector.c +++ b/ompi/communicator/ft/comm_ft_detector.c @@ -1,9 +1,11 @@ /* - * Copyright (c) 2016-2020 The University of Tennessee and The University + * Copyright (c) 2016-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -337,8 +339,8 @@ static int fd_heartbeat_request(comm_detector_t* detector) { /* if everybody else is dead, I don't need to monitor myself. */ if( rank == comm->c_my_rank ) { OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, - "%s %s: Every other node is dead on communicator %3d:%d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, comm->c_contextid, comm->c_epoch)); + "%s %s: Every other node is dead on communicator %s:%d", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, ompi_comm_print_cid(comm), comm->c_epoch)); detector->hb_observer = detector->hb_observing = MPI_PROC_NULL; detector->hb_rstamp = INFINITY; detector->hb_period = INFINITY; @@ -354,8 +356,8 @@ static int fd_heartbeat_request(comm_detector_t* detector) { #endif OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, - "%s %s: Sending observe request to %d on communicator %3d:%d stamp %g", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, rank, comm->c_contextid, comm->c_epoch, detector->hb_rstamp-startdate )); + "%s %s: Sending observe request to %d on communicator %s:%d stamp %g", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, rank, ompi_comm_print_cid(comm), comm->c_epoch, detector->hb_rstamp-startdate )); if( comm_detector_use_rdma_hb ) { mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint(proc); @@ -380,7 +382,7 @@ static int fd_heartbeat_request(comm_detector_t* detector) { detector->hb_observing = rank; ompi_comm_heartbeat_req_t* msg = calloc(sizeof(*msg)+regsize, 1); - msg->super.cid = comm->c_contextid; + msg->super.cid = ompi_comm_get_local_cid(comm); msg->super.epoch = comm->c_epoch; msg->super.type = comm_heartbeat_request_cb_type; msg->from = comm->c_my_rank; @@ -389,7 +391,7 @@ static int fd_heartbeat_request(comm_detector_t* detector) { memcpy(&msg->rdma_rreg[0], detector->hb_rdma_flag_lreg, regsize); msg->rdma_raddr = (uint64_t)&detector->hb_rdma_flag; } - ret = ompi_comm_rbcast_send_msg(proc, &msg->super, sizeof(*msg)+regsize); + ret = ompi_comm_rbcast_send_msg(proc, (ompi_comm_rbcast_message_t*)msg, sizeof(*msg)+regsize); free(msg); break; } @@ -407,13 +409,13 @@ static int fd_heartbeat_request_cb(ompi_communicator_t* comm, ompi_comm_heartbea ro = (np-comm->c_my_rank+detector->hb_observer) % np; /* same for the observer rank */ if( rr < ro ) { opal_output_verbose(1, ompi_ftmpi_output_handle, - "%s %s: Received heartbeat request from %d on communicator %3d:%d but I am monitored by %d -- this is stall information, ignoring.", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->from, comm->c_contextid, comm->c_epoch, detector->hb_observer ); + "%s %s: Received heartbeat request from %d on communicator %s:%d but I am monitored by %d -- this is stall information, ignoring.", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->from, ompi_comm_print_cid(comm), comm->c_epoch, detector->hb_observer ); return false; /* never forward on the rbcast */ } OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, - "%s %s: Recveived heartbeat request from %d on communicator %3d:%d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->from, comm->c_contextid, comm->c_epoch)); + "%s %s: Recveived heartbeat request from %d on communicator %s:%d", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->from, ompi_comm_print_cid(comm), comm->c_epoch)); detector->hb_observer = msg->from; detector->hb_sstamp = 0.; @@ -667,19 +669,19 @@ static int fd_heartbeat_send(comm_detector_t* detector) { } detector->hb_sstamp = now; OPAL_OUTPUT_VERBOSE((9, ompi_ftmpi_output_handle, - "%s %s: Sending heartbeat to %d on communicator %3d:%d stamp %g", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, detector->hb_observer, comm->c_contextid, comm->c_epoch, detector->hb_sstamp-startdate )); + "%s %s: Sending heartbeat to %d on communicator %s:%d stamp %g", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, detector->hb_observer, ompi_comm_print_cid(comm), comm->c_epoch, detector->hb_sstamp-startdate )); if( comm_detector_use_rdma_hb ) return fd_heartbeat_rdma_put(detector); /* send the heartbeat with eager send */ ompi_comm_heartbeat_message_t msg; - msg.super.cid = comm->c_contextid; + msg.super.cid = ompi_comm_get_local_cid(comm); msg.super.epoch = comm->c_epoch; msg.super.type = comm_heartbeat_recv_cb_type; msg.from = detector->hb_rdma_rank; /* comm->c_my_rank; except during finalize when it is equal to detector->hb_observer */ ompi_proc_t* proc = ompi_comm_peer_lookup(comm, detector->hb_observer); - ompi_comm_rbcast_send_msg(proc, &msg.super, sizeof(msg)); + ompi_comm_rbcast_send_msg(proc, (ompi_comm_rbcast_message_t*)&msg, sizeof(msg)); return OMPI_SUCCESS; } @@ -701,15 +703,15 @@ static int fd_heartbeat_recv_cb(ompi_communicator_t* comm, ompi_comm_heartbeat_m if( msg->from != detector->hb_observing ) { OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, - "%s %s: Received heartbeat from %d on communicator %3d:%d but I am now monitoring %d -- ignored.", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->from, comm->c_contextid, comm->c_epoch, detector->hb_observing )); + "%s %s: Received heartbeat from %d on communicator %s:%d but I am now monitoring %d -- ignored.", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->from, ompi_comm_print_cid(comm), comm->c_epoch, detector->hb_observing )); } else { double stamp = PMPI_Wtime(); double grace = detector->hb_timeout - (stamp - detector->hb_rstamp); OPAL_OUTPUT_VERBOSE((9, ompi_ftmpi_output_handle, - "%s %s: Received heartbeat from %d on communicator %3d:%d at timestamp %g (remained %.1e of %.1e before suspecting)", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->from, comm->c_contextid, comm->c_epoch, stamp-startdate, grace, detector->hb_timeout )); + "%s %s: Received heartbeat from %d on communicator %s:%d at timestamp %g (remained %.1e of %.1e before suspecting)", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->from, ompi_comm_print_cid(comm), comm->c_epoch, stamp-startdate, grace, detector->hb_timeout )); detector->hb_rstamp = stamp; if( grace < 0.0 ) { opal_output_verbose(1, ompi_ftmpi_output_handle, diff --git a/ompi/communicator/ft/comm_ft_propagator.c b/ompi/communicator/ft/comm_ft_propagator.c index dbb20275d53..d203f11c7c5 100644 --- a/ompi/communicator/ft/comm_ft_propagator.c +++ b/ompi/communicator/ft/comm_ft_propagator.c @@ -2,7 +2,8 @@ * Copyright (c) 2011-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * * $COPYRIGHT$ * @@ -68,12 +69,12 @@ int ompi_comm_failure_propagate(ompi_communicator_t* comm, ompi_proc_t* proc, in if( -1 == comm_failure_propagator_cb_type ) return OMPI_SUCCESS; OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, - "%s %s: Initiate a propagation for failure of %s (state %d) on communicator %3d:%d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, OMPI_NAME_PRINT(&proc->super.proc_name), state, comm->c_contextid, comm->c_epoch )); + "%s %s: Initiate a propagation for failure of %s (state %d) on communicator %s:%d", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, OMPI_NAME_PRINT(&proc->super.proc_name), state, ompi_comm_print_cid(comm), comm->c_epoch )); ompi_comm_failure_propagator_message_t msg; /* Broadcast the 'failure_propagator' signal to all other processes. */ - msg.rbcast_msg.cid = comm->c_contextid; + msg.rbcast_msg.cid = ompi_comm_get_local_cid(comm); msg.rbcast_msg.epoch = comm->c_epoch; msg.rbcast_msg.type = comm_failure_propagator_cb_type; msg.proc_name = proc->super.proc_name; @@ -90,13 +91,13 @@ static int ompi_comm_failure_propagator_local(ompi_communicator_t* comm, ompi_co ompi_proc_t* proc = (ompi_proc_t*)ompi_proc_for_name(msg->proc_name); if( !ompi_proc_is_active(proc) ) { OPAL_OUTPUT_VERBOSE((9, ompi_ftmpi_output_handle, - "%s %s: failure of %s has already been propagated on comm %3d:%d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, OMPI_NAME_PRINT(&msg->proc_name), comm->c_contextid, comm->c_epoch)); + "%s %s: failure of %s has already been propagated on comm %s:%d", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, OMPI_NAME_PRINT(&msg->proc_name), ompi_comm_print_cid(comm), comm->c_epoch)); return false; /* already propagated, done. */ } OPAL_OUTPUT_VERBOSE((9, ompi_ftmpi_output_handle, - "%s %s: failure of %s needs to be propagated on comm %3d:%d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, OMPI_NAME_PRINT(&msg->proc_name), comm->c_contextid, comm->c_epoch)); + "%s %s: failure of %s needs to be propagated on comm %s:%d", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, OMPI_NAME_PRINT(&msg->proc_name), ompi_comm_print_cid(comm), comm->c_epoch)); ompi_errhandler_proc_failed_internal(proc, msg->proc_state, false); return true; } diff --git a/ompi/communicator/ft/comm_ft_reliable_bcast.c b/ompi/communicator/ft/comm_ft_reliable_bcast.c index 7302737f1bd..e434324035f 100644 --- a/ompi/communicator/ft/comm_ft_reliable_bcast.c +++ b/ompi/communicator/ft/comm_ft_reliable_bcast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2020 The University of Tennessee and The University + * Copyright (c) 2013-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * @@ -73,6 +73,7 @@ static int ompi_comm_rbcast_bmg(ompi_communicator_t* comm, ompi_comm_rbcast_mess proc = ompi_group_peer_lookup(lgrp, idx); } else { + assert(NULL != hgrp); assert(OMPI_COMM_IS_INTER(comm)); proc = ompi_group_peer_lookup(hgrp, idx-ompi_group_size(lgrp)); } if( ompi_proc_is_active(proc) ) { @@ -181,7 +182,7 @@ static void ompi_comm_rbcast_bml_recv_cb( OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->cid, msg->epoch)); return; } - if(OPAL_UNLIKELY( msg->cid != comm->c_contextid )) { + if(OPAL_UNLIKELY( msg->cid != ompi_comm_get_local_cid(comm))) { OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "%s %s: Info: received a late rbcast message with CID %3d:%d during an MPI_COMM_DUP that is trying to reuse that CID (thus increasing the epoch) - ignoring, nothing to do", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->cid, msg->epoch)); @@ -196,7 +197,7 @@ static void ompi_comm_rbcast_bml_recv_cb( } /* invoke the local registered callback for the type */ - assert( 0 <= msg->type && RBCAST_CB_TYPE_MAX >= msg->type ); + assert( RBCAST_CB_TYPE_MAX >= msg->type ); if( NULL != ompi_comm_rbcast_cb[msg->type] ) { if( ompi_comm_rbcast_cb[msg->type](comm, msg) ) { /* forward the rbcast */ diff --git a/ompi/communicator/ft/comm_ft_revoke.c b/ompi/communicator/ft/comm_ft_revoke.c index 027e659814a..0e4c3158afa 100644 --- a/ompi/communicator/ft/comm_ft_revoke.c +++ b/ompi/communicator/ft/comm_ft_revoke.c @@ -3,6 +3,8 @@ * Copyright (c) 2011-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * * * $COPYRIGHT$ @@ -49,14 +51,14 @@ int ompi_comm_revoke_internal(ompi_communicator_t* comm) int ret = OMPI_SUCCESS;; OPAL_OUTPUT_VERBOSE((1, ompi_ftmpi_output_handle, - "%s %s: Initiate a revoke on communicator %3d:%d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, comm->c_contextid, comm->c_epoch )); + "%s %s: Initiate a revoke on communicator %s:%d", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, ompi_comm_print_cid(comm), comm->c_epoch )); /* Mark locally revoked */ if( ompi_comm_revoke_local(comm, NULL) ) { /* Broadcast the 'revoke' signal to all other processes. */ ompi_comm_rbcast_message_t msg; - msg.cid = comm->c_contextid; + msg.cid = ompi_comm_get_local_cid(comm); msg.epoch = comm->c_epoch; msg.type = comm_revoke_cb_type; ret = ompi_comm_rbcast(comm, &msg, sizeof(msg)); @@ -71,13 +73,13 @@ static int ompi_comm_revoke_local(ompi_communicator_t* comm, ompi_comm_rbcast_me { if( comm->comm_revoked ) { OPAL_OUTPUT_VERBOSE((9, ompi_ftmpi_output_handle, - "%s %s: comm %3d:%d is already revoked, nothing to do", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, comm->c_contextid, comm->c_epoch)); + "%s %s: comm %s:%d is already revoked, nothing to do", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, ompi_comm_print_cid(comm), comm->c_epoch)); return false; } OPAL_OUTPUT_VERBOSE((9, ompi_ftmpi_output_handle, - "%s %s: comm %3d:%d is marked revoked locally", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, comm->c_contextid, comm->c_epoch)); + "%s %s: comm %s:%d is marked revoked locally", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, ompi_comm_print_cid(comm), comm->c_epoch)); /* * Locally revoke the communicator * diff --git a/ompi/communicator/help-comm.txt b/ompi/communicator/help-comm.txt new file mode 100644 index 00000000000..a5c179fd908 --- /dev/null +++ b/ompi/communicator/help-comm.txt @@ -0,0 +1,28 @@ +# -*- text -*- +# +# Copyright (c) 2006 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2018 IBM Corporation. All rights reserved. +# Copyright (c) 2020 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open MPI. +# +[MPI function not supported] +Your application has invoked an MPI function that is not supported in +this environment. + + MPI function: %s + Reason: %s +[info-set-with-reserved-prefix] +Comments +MPI_Info_set warning, key is using a reserved prefix. + Key: %s + Reserved prefix: %s diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index 26978d0867e..97f87d53bdf 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -118,7 +118,6 @@ OMPI_DECLSPEC extern opal_convertor_t* ompi_mpi_local_convertor; extern struct opal_pointer_array_t ompi_datatype_f_to_c_table; OMPI_DECLSPEC int32_t ompi_datatype_init( void ); -OMPI_DECLSPEC int32_t ompi_datatype_finalize( void ); OMPI_DECLSPEC int32_t ompi_datatype_default_convertors_init( void ); OMPI_DECLSPEC int32_t ompi_datatype_default_convertors_fini( void ); diff --git a/ompi/datatype/ompi_datatype_external32.c b/ompi/datatype/ompi_datatype_external32.c index 9f1e6242412..d8eb81dc897 100644 --- a/ompi/datatype/ompi_datatype_external32.c +++ b/ompi/datatype/ompi_datatype_external32.c @@ -125,8 +125,12 @@ int32_t ompi_datatype_default_convertors_init( void ) int32_t ompi_datatype_default_convertors_fini( void ) { - OBJ_RELEASE( ompi_mpi_external32_convertor ); - OBJ_RELEASE( ompi_mpi_local_convertor ); + if (NULL != ompi_mpi_external32_convertor) { + OBJ_RELEASE( ompi_mpi_external32_convertor ); + } + if (NULL != ompi_mpi_local_convertor) { + OBJ_RELEASE( ompi_mpi_local_convertor ); + } return OMPI_SUCCESS; } diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index 5a9a0aa9110..fc19209214c 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -18,6 +18,8 @@ * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,11 +36,16 @@ #include "opal/util/output.h" #include "opal/util/string_copy.h" #include "opal/class/opal_pointer_array.h" +#include "ompi/attribute/attribute.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype_internal.h" +#include "ompi/instance/instance.h" +#include "ompi/attribute/attribute.h" #include "mpi.h" +static int ompi_datatype_finalize (void); + /** * This is the number of predefined datatypes. It is different than the MAX_PREDEFINED * as it include all the optional datatypes (such as MPI_INTEGER?, MPI_REAL?). @@ -472,6 +479,7 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; int32_t ompi_datatype_init( void ) { int32_t i; + int ret = OMPI_SUCCESS; opal_datatype_init(); @@ -669,24 +677,34 @@ int32_t ompi_datatype_init( void ) datatype->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS; } } + + /* get a reference to the attributes subsys */ + ret = ompi_attr_get_ref(); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_default_convertors_init(); + + /* get a reference to the attributes subsys */ + ret = ompi_attr_get_ref(); + if (OMPI_SUCCESS != ret) { + return ret; + } + + ompi_mpi_instance_append_finalize (ompi_datatype_finalize); return OMPI_SUCCESS; } -int32_t ompi_datatype_finalize( void ) +static int ompi_datatype_finalize (void) { + int ret = OMPI_SUCCESS; + /* As the synonyms are just copies of the internal data we should not free them. * Anyway they are over the limit of OMPI_DATATYPE_MPI_MAX_PREDEFINED so they will never get freed. */ - /* As they are statically allocated they cannot be released. - * But we can call OBJ_DESTRUCT, just to free all internally allocated ressources. - */ - for( int i = 0; i < ompi_datatype_number_of_predefined_data; i++ ) { - opal_datatype_t* datatype = (opal_datatype_t*)opal_pointer_array_get_item(&ompi_datatype_f_to_c_table, i ); - OBJ_DESTRUCT(datatype); - } /* Get rid of the Fortran2C translation table */ OBJ_DESTRUCT(&ompi_datatype_f_to_c_table); @@ -697,7 +715,8 @@ int32_t ompi_datatype_finalize( void ) /* don't call opal_datatype_finalize () as it no longer exists. the function will be called * opal_finalize_util (). */ - return OMPI_SUCCESS; + /* release a reference to the attributes subsys */ + return ompi_attr_put_ref(); } diff --git a/ompi/debuggers/ompi_common_dll.c b/ompi/debuggers/ompi_common_dll.c index 9395d93241a..4fe181948a1 100644 --- a/ompi/debuggers/ompi_common_dll.c +++ b/ompi/debuggers/ompi_common_dll.c @@ -67,9 +67,9 @@ static int host_is_big_endian = 0; { \ out_name = mqs_field_offset((qh_type), #field_name); \ if (out_name < 0) { \ - fprintf(stderr, "WARNING: Open MPI is unable to find " \ + fprintf(stderr, "WARNING: " OMPI_IDENT_STRING " is unable to find " \ "field " #field_name " in the " #struct_name \ - " type. This can happen can if Open MPI is built " \ + " type. This can happen if " OMPI_IDENT_STRING " is built " \ "without debugging information, or is stripped " \ "after building.\n"); \ } \ @@ -324,6 +324,10 @@ int ompi_fill_in_type_info(mqs_image *image, char **message) qh_type, ompi_communicator_t, c_name); ompi_field_offset(i_info->ompi_communicator_t.offset.c_contextid, qh_type, ompi_communicator_t, c_contextid); + ompi_field_offset(i_info->ompi_communicator_t.offset.c_contextidb, + qh_type, ompi_communicator_t, c_contextidb); + ompi_field_offset(i_info->ompi_communicator_t.offset.c_index, + qh_type, ompi_communicator_t, c_index); ompi_field_offset(i_info->ompi_communicator_t.offset.c_my_rank, qh_type, ompi_communicator_t, c_my_rank); ompi_field_offset(i_info->ompi_communicator_t.offset.c_local_group, @@ -512,7 +516,7 @@ int ompi_fill_in_type_info(mqs_image *image, char **message) * did our best but here we're at our limit. Give up! */ *message = missing_in_action; - fprintf(stderr, "WARNING: Open MPI is unable to find debugging information about the \"%s\" type. This can happen if Open MPI was built without debugging information, or was stripped after building.\n", + fprintf(stderr, "WARNING: " OMPI_IDENT_STRING " is unable to find debugging information about the \"%s\" type. This can happen if " OMPI_IDENT_STRING " was built without debugging information, or was stripped after building.\n", missing_in_action); return err_missing_type; } @@ -634,7 +638,7 @@ int ompi_fetch_opal_pointer_array_item(mqs_process *proc, mqs_taddr_t addr, int ompi_get_lib_version(char * buf, int size) { int ret; - ret = snprintf(buf, size-1, "Open MPI v%d.%d.%d%s%s%s%s%s%s%s%s%s", + ret = snprintf(buf, size-1, OMPI_IDENT_STRING " v%d.%d.%d%s%s%s%s%s%s%s%s%s", OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION, (strlen(OMPI_GREEK_VERSION) > 0)?OMPI_GREEK_VERSION:"", (strlen(OPAL_PACKAGE_STRING) > 0)?", package: ":"", diff --git a/ompi/debuggers/ompi_common_dll_defs.h b/ompi/debuggers/ompi_common_dll_defs.h index 6f4e6b89381..5fe11d3986e 100644 --- a/ompi/debuggers/ompi_common_dll_defs.h +++ b/ompi/debuggers/ompi_common_dll_defs.h @@ -198,6 +198,8 @@ typedef struct struct { int c_name; int c_contextid; + int c_contextidb; + int c_index; int c_my_rank; int c_local_group; int c_remote_group; diff --git a/ompi/debuggers/ompi_msgq_dll.c b/ompi/debuggers/ompi_msgq_dll.c index f5afab5d2e7..fad3d786cfa 100644 --- a/ompi/debuggers/ompi_msgq_dll.c +++ b/ompi/debuggers/ompi_msgq_dll.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2007-2018 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2022 Cisco Systems, Inc. All rights reserved * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -650,11 +650,13 @@ static int rebuild_communicator_list (mqs_process *proc) if( 0 == comm_ptr ) continue; commcount++; /* Now let's grab the data we want from inside */ + /* NTH: XXXXXXXXXXXXX FIXME!!!!!!!!!!!!!! c_index is local but MSGQ needs a global identifier + * that is sizeof (void *) or smaller. */ DEBUG(VERBOSE_GENERAL, ("Retrieve context_id from 0x%llx and local_rank from 0x%llx\n", - (long long)(comm_ptr + i_info->ompi_communicator_t.offset.c_contextid), + (long long)(comm_ptr + i_info->ompi_communicator_t.offset.c_index), (long long)(comm_ptr + i_info->ompi_communicator_t.offset.c_my_rank))); context_id = ompi_fetch_int( proc, - comm_ptr + i_info->ompi_communicator_t.offset.c_contextid, + comm_ptr + i_info->ompi_communicator_t.offset.c_index, p_info ); /* Do we already have this communicator ? */ old = find_communicator(p_info, context_id); @@ -1165,7 +1167,7 @@ static int fetch_request( mqs_process *proc, mpi_process_info *p_info, // data_name in res->extra_text[2] (vs. extra_text[1]), // where it is guaranteed to fit. data_name[4] = '\0'; - snprintf( (char*)res->extra_text[1], 64, "Data: %d", + snprintf( (char*)res->extra_text[1], 64, "Data: %d instances of MPI datatype", (int)res->desired_length); snprintf( (char*)res->extra_text[2], 64, "%s", data_name ); diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index 9ac16d46ad8..c3b723c707f 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -21,7 +21,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * Copyright (c) 2021 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -268,6 +268,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, /* initiate a list of participants for the connect, * starting with our own members */ OBJ_CONSTRUCT(&mlist, opal_list_t); + assert(NULL != members /* would mean comm had 0-sized group! */); for (i=0; NULL != members[i]; i++) { OPAL_PMIX_CONVERT_STRING_TO_PROCT(&pxproc, members[i]); plt = OBJ_NEW(opal_proclist_t); @@ -491,10 +492,9 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, NULL , /* remote_procs */ NULL, /* attrs */ comm->error_handler, /* error handler */ - NULL, /* topo component */ group, /* local group */ - new_group_pointer /* remote group */ - ); + new_group_pointer, /* remote group */ + 0); /* flags */ if (OMPI_SUCCESS != rc) { goto exit; } @@ -689,7 +689,7 @@ static int dpm_convert(opal_list_t *infos, char *ck, *ptr, *help_str = NULL; int rc; char **tmp; - dpm_conflicts_t *modifiers; + dpm_conflicts_t *modifiers = NULL; const char *attr; /* pick the modifiers to be checked */ @@ -782,8 +782,9 @@ static int dpm_convert(opal_list_t *infos, /**** Get here if the specified option is not found in the **** current list - add it ****/ - - if (NULL == directive) { + if (NULL == directive && NULL == modifier) { + return OMPI_ERR_BAD_PARAM; + } else if (NULL == directive) { opal_asprintf(&ptr, ":%s", modifier); } else if (NULL == modifier) { ptr = strdup(directive); @@ -1674,6 +1675,9 @@ int ompi_dpm_dyn_init(void) ptr = &tmp[0]; } port_name = strdup(ptr); + if (NULL == port_name) { + return OMPI_ERR_OUT_OF_RESOURCE; + } rc = ompi_dpm_connect_accept(MPI_COMM_WORLD, root, port_name, send_first, &newcomm); free(port_name); @@ -1699,15 +1703,6 @@ int ompi_dpm_dyn_init(void) return OMPI_SUCCESS; } - -/* - * finalize the module - */ -int ompi_dpm_finalize(void) -{ - return OMPI_SUCCESS; -} - static void cleanup_dpm_disconnect_objs(ompi_dpm_disconnect_obj **objs, int count) { for(int i = 0; i < count; i++) { diff --git a/ompi/dpm/dpm.h b/ompi/dpm/dpm.h index 34084480f87..f954f141ac2 100644 --- a/ompi/dpm/dpm.h +++ b/ompi/dpm/dpm.h @@ -13,6 +13,8 @@ * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -96,11 +98,6 @@ int ompi_dpm_open_port(char *port_name); */ int ompi_dpm_close_port(const char *port_name); -/* - * Finalize the DPM - */ -int ompi_dpm_finalize(void); - END_C_DECLS #endif /* OMPI_DPM_H */ diff --git a/ompi/errhandler/errcode-internal.c b/ompi/errhandler/errcode-internal.c index 8d76030a6c6..dd90cca6b95 100644 --- a/ompi/errhandler/errcode-internal.c +++ b/ompi/errhandler/errcode-internal.c @@ -15,6 +15,8 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reseved. * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +33,7 @@ #include "opal/util/string_copy.h" #include "ompi/errhandler/errcode-internal.h" +#include "ompi/instance/instance.h" /* Table holding all error codes */ opal_pointer_array_t ompi_errcodes_intern = {{0}}; @@ -62,6 +65,7 @@ static ompi_errcode_intern_t ompi_err_rma_flavor_intern; static void ompi_errcode_intern_construct(ompi_errcode_intern_t* errcode); static void ompi_errcode_intern_destruct(ompi_errcode_intern_t* errcode); +static int ompi_errcode_intern_finalize (void); OBJ_CLASS_INSTANCE(ompi_errcode_intern_t,opal_object_t,ompi_errcode_intern_construct, ompi_errcode_intern_destruct); @@ -286,10 +290,21 @@ int ompi_errcode_intern_init (void) &ompi_err_rma_flavor_intern); ompi_errcode_intern_lastused=pos; + + ompi_mpi_instance_append_finalize (ompi_errcode_intern_finalize); + return OMPI_SUCCESS; } -int ompi_errcode_intern_finalize(void) +/** + * Finalize the error codes. + * + * @returns OMPI_SUCCESS Always + * + * Invoked from instance teardown if ompi_errcode_intern_init() was called; + * tears down the error code array. + */ +static int ompi_errcode_intern_finalize (void) { OBJ_DESTRUCT(&ompi_success_intern); diff --git a/ompi/errhandler/errcode-internal.h b/ompi/errhandler/errcode-internal.h index 745098b5a0d..ec64faa9fc0 100644 --- a/ompi/errhandler/errcode-internal.h +++ b/ompi/errhandler/errcode-internal.h @@ -14,6 +14,8 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -87,15 +89,6 @@ static inline int ompi_errcode_get_mpi_code(int errcode) */ int ompi_errcode_intern_init(void); -/** - * Finalize the error codes. - * - * @returns OMPI_SUCCESS Always - * - * Invokes from ompi_mpi_finalize(); tears down the error code array. - */ -int ompi_errcode_intern_finalize(void); - END_C_DECLS #endif /* OMPI_ERRCODE_INTERNAL_H */ diff --git a/ompi/errhandler/errcode.c b/ompi/errhandler/errcode.c index c52c5789c16..995acac6d0c 100644 --- a/ompi/errhandler/errcode.c +++ b/ompi/errhandler/errcode.c @@ -17,6 +17,8 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,6 +37,7 @@ #include "ompi/errhandler/errcode.h" #include "ompi/constants.h" +#include "ompi/instance/instance.h" /* Table holding all error codes */ opal_pointer_array_t ompi_mpi_errcodes = {{0}}; @@ -243,11 +246,22 @@ int ompi_mpi_errcode_init (void) MPI_ERR_LASTCODE. So just start it as == MPI_ERR_LASTCODE. */ ompi_mpi_errcode_lastused = MPI_ERR_LASTCODE; ompi_mpi_errcode_lastpredefined = MPI_ERR_LASTCODE; + opal_mutex_unlock(&errcode_init_lock); + + ompi_mpi_instance_append_finalize (ompi_mpi_errcode_finalize); + return OMPI_SUCCESS; } -int ompi_mpi_errcode_finalize(void) +/** + * Finalize the error codes. + * + * @returns OMPI_SUCCESS Always + * + * Invoked from instance teardown if ompi_mpi_errcode_init() was called; tears down the error code array. + */ +int ompi_mpi_errcode_finalize (void) { int i; ompi_mpi_errcode_t *errc; diff --git a/ompi/errhandler/errcode.h b/ompi/errhandler/errcode.h index 033abd24167..24d070fb4f3 100644 --- a/ompi/errhandler/errcode.h +++ b/ompi/errhandler/errcode.h @@ -14,6 +14,8 @@ * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -215,6 +217,50 @@ static inline char* ompi_mpi_errnum_get_string (int errnum) } +/** + * Initialize the error codes + * + * @returns OMPI_SUCCESS Upon success + * @returns OMPI_ERROR Otherwise + * + * Invoked from ompi_mpi_init(); sets up all static MPI error codes, + */ +int ompi_mpi_errcode_init(void); + +/** + * Add an error code + * + * @param: error class to which this new error code belongs to + * + * @returns the new error code on SUCCESS (>0) + * @returns OMPI_ERROR otherwise + * + */ +int ompi_mpi_errcode_add (int errclass); + +/** + * Add an error class + * + * @param: none + * + * @returns the new error class on SUCCESS (>0) + * @returns OMPI_ERROR otherwise + * + */ +int ompi_mpi_errclass_add (void); + +/** + * Add an error string to an error code + * + * @param: error code for which the string is defined + * @param: error string to add + * @param: length of the string + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERROR on error + */ +int ompi_mpi_errnum_add_string (int errnum, const char* string, int len); + END_C_DECLS #endif /* OMPI_MPI_ERRCODE_H */ diff --git a/ompi/errhandler/errhandler.c b/ompi/errhandler/errhandler.c index 3a2d2eab162..3752bd08d39 100644 --- a/ompi/errhandler/errhandler.c +++ b/ompi/errhandler/errhandler.c @@ -17,6 +17,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,7 +38,7 @@ #include "opal/mca/pmix/pmix-internal.h" #include "opal/util/string_copy.h" #include "opal/mca/backtrace/backtrace.h" - +#include "ompi/runtime/mpiruntime.h" /* * Table for Fortran <-> C errhandler handle conversion @@ -137,56 +139,60 @@ int ompi_initial_errhandler_init(void) { return OMPI_SUCCESS; } +static int ompi_errhandler_finalize (void); + /* * Initialize OMPI errhandler infrastructure */ int ompi_errhandler_init(void) { - /* initialize ompi_errhandler_f_to_c_table */ + OBJ_CONSTRUCT( &ompi_errhandler_f_to_c_table, opal_pointer_array_t); + if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_errhandler_f_to_c_table, 8, + OMPI_FORTRAN_HANDLE_MAX, 16) ) { + return OMPI_ERROR; + } - OBJ_CONSTRUCT( &ompi_errhandler_f_to_c_table, opal_pointer_array_t); - if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_errhandler_f_to_c_table, 8, - OMPI_FORTRAN_HANDLE_MAX, 16) ) { - return OMPI_ERROR; - } + /* Initialize the predefined error handlers */ + OBJ_CONSTRUCT( &ompi_mpi_errhandler_null.eh, ompi_errhandler_t ); + if( ompi_mpi_errhandler_null.eh.eh_f_to_c_index != OMPI_ERRHANDLER_NULL_FORTRAN ) { + return OMPI_ERROR; + } - /* Initialize the predefined error handlers */ - OBJ_CONSTRUCT( &ompi_mpi_errhandler_null.eh, ompi_errhandler_t ); - if( ompi_mpi_errhandler_null.eh.eh_f_to_c_index != OMPI_ERRHANDLER_NULL_FORTRAN ) - return OMPI_ERROR; - ompi_mpi_errhandler_null.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; - ompi_mpi_errhandler_null.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; - ompi_mpi_errhandler_null.eh.eh_comm_fn = NULL; - ompi_mpi_errhandler_null.eh.eh_file_fn = NULL; - ompi_mpi_errhandler_null.eh.eh_win_fn = NULL ; - ompi_mpi_errhandler_null.eh.eh_fort_fn = NULL; - opal_string_copy(ompi_mpi_errhandler_null.eh.eh_name, "MPI_ERRHANDLER_NULL", - sizeof(ompi_mpi_errhandler_null.eh.eh_name)); - - OBJ_CONSTRUCT( &ompi_mpi_errors_are_fatal.eh, ompi_errhandler_t ); - if( ompi_mpi_errors_are_fatal.eh.eh_f_to_c_index != OMPI_ERRORS_ARE_FATAL_FORTRAN ) - return OMPI_ERROR; - ompi_mpi_errors_are_fatal.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; - ompi_mpi_errors_are_fatal.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; - ompi_mpi_errors_are_fatal.eh.eh_comm_fn = ompi_mpi_errors_are_fatal_comm_handler; - ompi_mpi_errors_are_fatal.eh.eh_file_fn = ompi_mpi_errors_are_fatal_file_handler; - ompi_mpi_errors_are_fatal.eh.eh_win_fn = ompi_mpi_errors_are_fatal_win_handler ; - ompi_mpi_errors_are_fatal.eh.eh_fort_fn = NULL; - opal_string_copy(ompi_mpi_errors_are_fatal.eh.eh_name, - "MPI_ERRORS_ARE_FATAL", - sizeof(ompi_mpi_errors_are_fatal.eh.eh_name)); - - OBJ_CONSTRUCT( &ompi_mpi_errors_return.eh, ompi_errhandler_t ); - if( ompi_mpi_errors_return.eh.eh_f_to_c_index != OMPI_ERRORS_RETURN_FORTRAN ) - return OMPI_ERROR; - ompi_mpi_errors_return.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; - ompi_mpi_errors_return.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; - ompi_mpi_errors_return.eh.eh_comm_fn = ompi_mpi_errors_return_comm_handler; - ompi_mpi_errors_return.eh.eh_file_fn = ompi_mpi_errors_return_file_handler; - ompi_mpi_errors_return.eh.eh_win_fn = ompi_mpi_errors_return_win_handler; - ompi_mpi_errors_return.eh.eh_fort_fn = NULL; - opal_string_copy(ompi_mpi_errors_return.eh.eh_name, "MPI_ERRORS_RETURN", - sizeof(ompi_mpi_errors_return.eh.eh_name)); + ompi_mpi_errhandler_null.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; + ompi_mpi_errhandler_null.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; + ompi_mpi_errhandler_null.eh.eh_comm_fn = NULL; + ompi_mpi_errhandler_null.eh.eh_file_fn = NULL; + ompi_mpi_errhandler_null.eh.eh_win_fn = NULL ; + ompi_mpi_errhandler_null.eh.eh_fort_fn = NULL; + opal_string_copy (ompi_mpi_errhandler_null.eh.eh_name, "MPI_ERRHANDLER_NULL", + sizeof(ompi_mpi_errhandler_null.eh.eh_name)); + + OBJ_CONSTRUCT( &ompi_mpi_errors_are_fatal.eh, ompi_errhandler_t ); + if( ompi_mpi_errors_are_fatal.eh.eh_f_to_c_index != OMPI_ERRORS_ARE_FATAL_FORTRAN ) + return OMPI_ERROR; + ompi_mpi_errors_are_fatal.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; + ompi_mpi_errors_are_fatal.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; + ompi_mpi_errors_are_fatal.eh.eh_comm_fn = ompi_mpi_errors_are_fatal_comm_handler; + ompi_mpi_errors_are_fatal.eh.eh_file_fn = ompi_mpi_errors_are_fatal_file_handler; + ompi_mpi_errors_are_fatal.eh.eh_win_fn = ompi_mpi_errors_are_fatal_win_handler; + ompi_mpi_errors_are_fatal.eh.eh_instance_fn = ompi_mpi_errors_are_fatal_instance_handler; + ompi_mpi_errors_are_fatal.eh.eh_fort_fn = NULL; + opal_string_copy(ompi_mpi_errors_are_fatal.eh.eh_name, + "MPI_ERRORS_ARE_FATAL", + sizeof(ompi_mpi_errors_are_fatal.eh.eh_name)); + + OBJ_CONSTRUCT( &ompi_mpi_errors_return.eh, ompi_errhandler_t ); + if( ompi_mpi_errors_return.eh.eh_f_to_c_index != OMPI_ERRORS_RETURN_FORTRAN ) + return OMPI_ERROR; + ompi_mpi_errors_return.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; + ompi_mpi_errors_return.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; + ompi_mpi_errors_return.eh.eh_comm_fn = ompi_mpi_errors_return_comm_handler; + ompi_mpi_errors_return.eh.eh_file_fn = ompi_mpi_errors_return_file_handler; + ompi_mpi_errors_return.eh.eh_win_fn = ompi_mpi_errors_return_win_handler; + ompi_mpi_errors_return.eh.eh_instance_fn = ompi_mpi_errors_return_instance_handler; + ompi_mpi_errors_return.eh.eh_fort_fn = NULL; + opal_string_copy(ompi_mpi_errors_return.eh.eh_name, "MPI_ERRORS_RETURN", + sizeof(ompi_mpi_errors_return.eh.eh_name)); OBJ_CONSTRUCT( &ompi_mpi_errors_abort.eh, ompi_errhandler_t ); if( ompi_mpi_errors_abort.eh.eh_f_to_c_index != OMPI_ERRORS_ABORT_FORTRAN ) @@ -206,14 +212,23 @@ int ompi_errhandler_init(void) if( NULL != env ) { ompi_process_info.initial_errhandler = strndup(env, MPI_MAX_INFO_VAL); } - return ompi_initial_errhandler_init(); + + ompi_initial_errhandler_init(); + ompi_mpi_instance_append_finalize (ompi_errhandler_finalize); + + return OMPI_SUCCESS; } -/* - * Clean up the errorhandler resources +/** + * Finalize the error handler interface. + * + * @returns OMPI_SUCCESS Always + * + * Invoked on instance teardown if ompi_errhandler_init() was called; tears down the error handler + * interface, and destroys the F2C translation table. */ -int ompi_errhandler_finalize(void) +static int ompi_errhandler_finalize (void) { OBJ_DESTRUCT(&ompi_mpi_errhandler_null.eh); OBJ_DESTRUCT(&ompi_mpi_errors_return.eh); @@ -232,46 +247,62 @@ int ompi_errhandler_finalize(void) return OMPI_SUCCESS; } +void ompi_errhandler_free (ompi_errhandler_t *errhandler) +{ + OBJ_RELEASE(errhandler); + ompi_mpi_instance_release (); +} ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type, - ompi_errhandler_generic_handler_fn_t *func, + ompi_errhandler_generic_handler_fn_t *func, ompi_errhandler_lang_t lang) { - ompi_errhandler_t *new_errhandler; - - /* Create a new object and ensure that it's valid */ - - new_errhandler = OBJ_NEW(ompi_errhandler_t); - if (NULL != new_errhandler) { - if (0 > new_errhandler->eh_f_to_c_index) { - OBJ_RELEASE(new_errhandler); - new_errhandler = NULL; - } else { - - /* We cast the user's callback function to any one of the - function pointer types in the union; it doesn't matter which. - It only matters that we dereference/use the right member when - invoking the callback. */ - - new_errhandler->eh_mpi_object_type = object_type; - new_errhandler->eh_lang = lang; - switch (object_type ) { - case (OMPI_ERRHANDLER_TYPE_COMM): - new_errhandler->eh_comm_fn = (MPI_Comm_errhandler_function *)func; - break; - case (OMPI_ERRHANDLER_TYPE_FILE): - new_errhandler->eh_file_fn = (ompi_file_errhandler_function *)func; - break; - case (OMPI_ERRHANDLER_TYPE_WIN): - new_errhandler->eh_win_fn = (MPI_Win_errhandler_function *)func; - break; - default: - break; - } - - new_errhandler->eh_fort_fn = (ompi_errhandler_fortran_handler_fn_t *)func; + ompi_errhandler_t *new_errhandler; + int ret; + + /* make sure the infrastructure is initialized */ + ret = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return NULL; + } + + /* Create a new object and ensure that it's valid */ + + new_errhandler = OBJ_NEW(ompi_errhandler_t); + if (NULL != new_errhandler) { + if (0 > new_errhandler->eh_f_to_c_index) { + OBJ_RELEASE(new_errhandler); + new_errhandler = NULL; + } else { + + /* We cast the user's callback function to any one of the + function pointer types in the union; it doesn't matter which. + It only matters that we dereference/use the right member when + invoking the callback. */ + + new_errhandler->eh_mpi_object_type = object_type; + new_errhandler->eh_lang = lang; + switch (object_type ) { + case OMPI_ERRHANDLER_TYPE_COMM: + new_errhandler->eh_comm_fn = (MPI_Comm_errhandler_function *)func; + break; + case OMPI_ERRHANDLER_TYPE_FILE: + new_errhandler->eh_file_fn = (ompi_file_errhandler_function *)func; + break; + case OMPI_ERRHANDLER_TYPE_WIN: + new_errhandler->eh_win_fn = (MPI_Win_errhandler_function *)func; + break; + case OMPI_ERRHANDLER_TYPE_INSTANCE: + new_errhandler->eh_instance_fn = (MPI_Session_errhandler_function *)func; + break; + default: + break; + } + } + + new_errhandler->eh_fort_fn = (ompi_errhandler_fortran_handler_fn_t *)func; + } - } /* All done */ @@ -350,10 +381,10 @@ int ompi_errhandler_proc_failed_internal(ompi_proc_t* ompi_proc, int status, boo if(OPAL_UNLIKELY( OMPI_SUCCESS != rc )) goto cleanup; } OPAL_OUTPUT_VERBOSE((10, ompi_ftmpi_output_handle, - "%s ompi: Process %s is in comm (%d) with rank %d. [%s]", + "%s ompi: Process %s is in comm (%s) with rank %d. [%s]", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), OMPI_NAME_PRINT(&ompi_proc->super.proc_name), - comm->c_contextid, + ompi_comm_print_cid(comm), proc_rank, (OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->errhandler_type ? "P" : (OMPI_ERRHANDLER_TYPE_COMM == comm->errhandler_type ? "C" : @@ -397,11 +428,12 @@ int ompi_errhandler_proc_failed_internal(ompi_proc_t* ompi_proc, int status, boo pmix_info_t pmix_info[1]; pmix_status_t prc; + assert(OPAL_ERR_PROC_ABORTED == status); OPAL_PMIX_CONVERT_NAME(&pmix_source, OMPI_PROC_MY_NAME); OPAL_PMIX_CONVERT_NAME(&pmix_proc, &ompi_proc->super.proc_name); PMIX_INFO_CONSTRUCT(&pmix_info[0]); PMIX_INFO_LOAD(&pmix_info[0], PMIX_EVENT_AFFECTED_PROC, &pmix_proc, PMIX_PROC); - prc = PMIx_Notify_event(status, &pmix_source, PMIX_RANGE_LOCAL, + prc = PMIx_Notify_event(PMIX_ERR_PROC_ABORTED, &pmix_source, PMIX_RANGE_LOCAL, pmix_info, 1, NULL, &active); if( PMIX_SUCCESS != prc && PMIX_OPERATION_SUCCEEDED != prc ) { @@ -450,7 +482,7 @@ static void *ompi_errhandler_event_cb(int fd, int flags, void *context) { continue; /* we are not 'MPI connected' with this proc. */ } assert( !ompi_proc_is_sentinel(proc) ); - ompi_errhandler_proc_failed_internal(proc, status, false); + ompi_errhandler_proc_failed_internal(proc, OPAL_ERR_PROC_ABORTED, false); } opal_event_del(&event->super); free(event); diff --git a/ompi/errhandler/errhandler.h b/ompi/errhandler/errhandler.h index 572deeb9bf3..97305dec541 100644 --- a/ompi/errhandler/errhandler.h +++ b/ompi/errhandler/errhandler.h @@ -17,6 +17,8 @@ * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,7 +85,8 @@ enum ompi_errhandler_type_t { OMPI_ERRHANDLER_TYPE_PREDEFINED, OMPI_ERRHANDLER_TYPE_COMM, OMPI_ERRHANDLER_TYPE_WIN, - OMPI_ERRHANDLER_TYPE_FILE + OMPI_ERRHANDLER_TYPE_FILE, + OMPI_ERRHANDLER_TYPE_INSTANCE, }; typedef enum ompi_errhandler_type_t ompi_errhandler_type_t; @@ -109,6 +112,7 @@ struct ompi_errhandler_t { MPI_Comm_errhandler_function *eh_comm_fn; ompi_file_errhandler_function *eh_file_fn; MPI_Win_errhandler_function *eh_win_fn; + MPI_Session_errhandler_function *eh_instance_fn; ompi_errhandler_fortran_handler_fn_t *eh_fort_fn; /* index in Fortran <-> C translation array */ @@ -188,6 +192,10 @@ OMPI_DECLSPEC extern void (*ompi_initial_error_handler)(struct ompi_communicator struct ompi_request_t; +/* declared here because we can't include instance.h from this header + * because it would create a circular dependency */ +extern opal_atomic_int32_t ompi_instance_count; + /** * This is the macro to check the state of MPI and determine whether * it was properly initialized and not yet finalized. @@ -203,15 +211,13 @@ struct ompi_request_t; * potentially-performance-critical code paths) before reading the * variable. */ -#define OMPI_ERR_INIT_FINALIZE(name) \ - { \ - int32_t state = ompi_mpi_state; \ - if (OPAL_UNLIKELY(state < OMPI_MPI_STATE_INIT_COMPLETED || \ - state > OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT)) { \ - ompi_errhandler_invoke(NULL, NULL, -1, \ +#define OMPI_ERR_INIT_FINALIZE(name) \ + { \ + if (OPAL_UNLIKELY(0 == ompi_instance_count)) { \ + ompi_errhandler_invoke(NULL, NULL, -1, \ ompi_errcode_get_mpi_code(MPI_ERR_ARG), \ - name); \ - } \ + name); \ + } \ } /** @@ -327,16 +333,6 @@ struct ompi_request_t; */ int ompi_errhandler_init(void); - /** - * Finalize the error handler interface. - * - * @returns OMPI_SUCCESS Always - * - * Invokes from ompi_mpi_finalize(); tears down the error handler - * interface, and destroys the F2C translation table. - */ - int ompi_errhandler_finalize(void); - /** * \internal * @@ -382,8 +378,9 @@ struct ompi_request_t; /** * Create a ompi_errhandler_t * - * @param object_type Enum of the type of MPI object - * @param func Function pointer of the error handler + * @param[in] object_type Enum of the type of MPI object + * @param[in] func Function pointer of the error handler + * @param[in] language Calling language * * @returns errhandler Pointer to the ompi_errorhandler_t that will be * created and returned @@ -402,9 +399,11 @@ struct ompi_request_t; * same as sizeof(void(*)). */ OMPI_DECLSPEC ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type, - ompi_errhandler_generic_handler_fn_t *func, + ompi_errhandler_generic_handler_fn_t *func, ompi_errhandler_lang_t language); + OMPI_DECLSPEC void ompi_errhandler_free (ompi_errhandler_t *errhandler); + /** * Callback function to alert the MPI layer of an error or notification * from the internal RTE and/or the resource manager. diff --git a/ompi/errhandler/errhandler_predefined.c b/ompi/errhandler/errhandler_predefined.c index 4c9353465f2..e317fe98b05 100644 --- a/ompi/errhandler/errhandler_predefined.c +++ b/ompi/errhandler/errhandler_predefined.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -17,6 +18,8 @@ * All rights reserved. * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,14 +44,24 @@ #include "ompi/communicator/communicator.h" #include "ompi/file/file.h" #include "ompi/win/win.h" +#include "ompi/instance/instance.h" #include "opal/util/printf.h" #include "opal/util/output.h" +#include "ompi/runtime/mpiruntime.h" /* * Local functions */ static void backend_abort(int fatal, char *type, struct ompi_communicator_t *comm, char *name, int *error_code, va_list arglist); +static void backend_abort_aggregate(int fatal, char *type, + struct ompi_communicator_t *comm, + char *name, int *error_code, + va_list arglist); +static void backend_abort_no_aggregate(int fatal, char *type, + struct ompi_communicator_t *comm, + char *name, int *error_code, + va_list arglist); static void out(char *str, char *arg); @@ -172,6 +185,36 @@ void ompi_mpi_errors_abort_win_handler(struct ompi_win_t **win, va_end(arglist); } +void ompi_mpi_errors_are_fatal_instance_handler (struct ompi_instance_t **instance, + int *error_code, ...) +{ + char *name; + va_list arglist; + int err = MPI_ERR_UNKNOWN; + + va_start(arglist, error_code); + + if (NULL != instance) { + name = (*instance)->i_name; + } else { + name = NULL; + } + + if (NULL != error_code) { + err = *error_code; + } + + /* We only want aggregation while the rte is initialized */ + if (ompi_rte_initialized) { + backend_abort_aggregate(true, "session", NULL, name, error_code, arglist); + } else { + backend_abort_no_aggregate(true, "session", NULL, name, error_code, arglist); + } + va_end(arglist); + + ompi_mpi_abort(NULL, err); +} + void ompi_mpi_errors_return_comm_handler(struct ompi_communicator_t **comm, int *error_code, ...) { @@ -208,6 +251,18 @@ void ompi_mpi_errors_return_win_handler(struct ompi_win_t **win, } +void ompi_mpi_errors_return_instance_handler (struct ompi_instance_t **instance, + int *error_code, ...) +{ + /* Don't need anything more -- just need this function to exist */ + /* Silence some compiler warnings */ + + va_list arglist; + va_start(arglist, error_code); + va_end(arglist); +} + + static void out(char *str, char *arg) { if (ompi_rte_initialized && @@ -403,11 +458,11 @@ static void backend_abort_no_aggregate(int fatal, char *type, len -= strlen(type); if (len > 0) { - strncat(str, " ", len); + strncat(str, " ", len - 1); --len; if (len > 0) { - strncat(str, name, len); + strncat(str, name, len - 1); } } out("*** on %s", str); diff --git a/ompi/errhandler/errhandler_predefined.h b/ompi/errhandler/errhandler_predefined.h index 07e306e9a08..c663d962220 100644 --- a/ompi/errhandler/errhandler_predefined.h +++ b/ompi/errhandler/errhandler_predefined.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,6 +10,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +27,7 @@ struct ompi_communicator_t; struct ompi_file_t; struct ompi_win_t; +struct ompi_instance_t; /** * Handler function for MPI_ERRORS_ARE_FATAL @@ -34,6 +38,8 @@ OMPI_DECLSPEC void ompi_mpi_errors_are_fatal_file_handler(struct ompi_file_t **f int *error_code, ...); OMPI_DECLSPEC void ompi_mpi_errors_are_fatal_win_handler(struct ompi_win_t **win, int *error_code, ...); +OMPI_DECLSPEC void ompi_mpi_errors_are_fatal_instance_handler(struct ompi_instance_t **win, + int *error_code, ...); /** * Handler function for MPI_ERRORS_ABORT @@ -54,6 +60,8 @@ OMPI_DECLSPEC void ompi_mpi_errors_return_file_handler(struct ompi_file_t **file int *error_code, ...); OMPI_DECLSPEC void ompi_mpi_errors_return_win_handler(struct ompi_win_t **win, int *error_code, ...); +OMPI_DECLSPEC void ompi_mpi_errors_return_instance_handler(struct ompi_instance_t **win, + int *error_code, ...); #endif /* OMPI_ERRHANDLER_PREDEFINED_H */ diff --git a/ompi/file/file.c b/ompi/file/file.c index bf546a55694..9d53c73e993 100644 --- a/ompi/file/file.c +++ b/ompi/file/file.c @@ -16,6 +16,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,6 +56,7 @@ ompi_predefined_file_t *ompi_mpi_file_null_addr = &ompi_mpi_file_null; */ static void file_constructor(ompi_file_t *obj); static void file_destructor(ompi_file_t *obj); +static int ompi_file_finalize (void); /* @@ -89,6 +92,7 @@ int ompi_file_init(void) &ompi_mpi_file_null.file); /* All done */ + ompi_mpi_instance_append_finalize (ompi_file_finalize); return OMPI_SUCCESS; } @@ -137,6 +141,9 @@ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename, return ret; } + /* MPI-4 §14.2.8 requires us to remove all unknown keys from the info object */ + opal_info_remove_unreferenced(file->super.s_info); + /* All done */ *fh = file; @@ -160,10 +167,14 @@ int ompi_file_close(ompi_file_t **file) } -/* - * Shut down the MPI_File bookkeeping +/** + * Tear down MPI_File handling. + * + * @retval OMPI_SUCCESS Always. + * + * Invoked during instance teardown if ompi_file_init() is called. */ -int ompi_file_finalize(void) +static int ompi_file_finalize (void) { int i, max; size_t num_unnamed; diff --git a/ompi/file/file.h b/ompi/file/file.h index bb50903ae5d..cb90c56fa6c 100644 --- a/ompi/file/file.h +++ b/ompi/file/file.h @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -16,6 +16,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -185,15 +187,6 @@ int ompi_file_set_name(ompi_file_t *file, char *name); */ int ompi_file_close(ompi_file_t **file); -/** - * Tear down MPI_File handling. - * - * @retval OMPI_SUCCESS Always. - * - * Invoked during ompi_mpi_finalize(). - */ -int ompi_file_finalize(void); - /** * Check to see if an MPI_File handle is valid. * diff --git a/ompi/group/group.c b/ompi/group/group.c index ad60a0d7ea7..dff579aba43 100644 --- a/ompi/group/group.c +++ b/ompi/group/group.c @@ -18,6 +18,7 @@ * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights * $COPYRIGHT$ * * Additional copyrights may follow @@ -606,3 +607,36 @@ int ompi_group_count_local_peers (ompi_group_t *group) return local_peers; } + +int ompi_group_to_proc_name_array (ompi_group_t *group, opal_process_name_t **name_array, size_t *name_array_size) +{ + opal_process_name_t *array = calloc (group->grp_proc_count, sizeof (array[0])); + + if (NULL == array) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (int i = 0 ; i < group->grp_proc_count ; ++i) { + array[i] = ompi_group_get_proc_name (group, i); + } + + *name_array = array; + *name_array_size = group->grp_proc_count; + + return OMPI_SUCCESS; +} + +bool ompi_group_overlap (const ompi_group_t *group1, const ompi_group_t *group2) +{ + for (int i = 0 ; i < group1->grp_proc_count ; ++i) { + opal_process_name_t proc1 = ompi_group_get_proc_name (group1, i); + for (int j = 0 ; j < group2->grp_proc_count ; ++j) { + opal_process_name_t proc2 = ompi_group_get_proc_name (group2, j); + if (0 == opal_compare_proc (proc1, proc2)) { + return true; + } + } + } + + return false; +} diff --git a/ompi/group/group.h b/ompi/group/group.h index 966ab5f8306..1e87ecd8556 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -14,10 +14,12 @@ * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,6 +41,7 @@ #include "opal/class/opal_pointer_array.h" #include "opal/mca/threads/threads.h" #include "opal/util/output.h" +#include "ompi/instance/instance.h" BEGIN_C_DECLS @@ -98,6 +101,8 @@ struct ompi_group_t { struct ompi_group_strided_data_t grp_strided; struct ompi_group_bitmap_data_t grp_bitmap; } sparse_data; + + ompi_instance_t *grp_instance; /**< instance this group was allocated within */ }; typedef struct ompi_group_t ompi_group_t; @@ -168,6 +173,19 @@ ompi_group_t *ompi_group_allocate_sporadic(int group_size); ompi_group_t *ompi_group_allocate_strided(void); ompi_group_t *ompi_group_allocate_bmap(int orig_group_size, int group_size); +/** + * @brief Allocate a dense group from a group + * + * @param[in] group group + * + * @returns new group pointer on success + * @returns NULL on error + * + * This function duplicates a group. The new group will have a dense process + * table. + */ +ompi_group_t *ompi_group_flatten (ompi_group_t *group, int max_procs); + /** * Increment the reference count of the proc structures. * @@ -193,14 +211,6 @@ OMPI_DECLSPEC void ompi_group_decrement_proc_count(ompi_group_t *group); int ompi_group_init(void); -/** - * Clean up OMPI group infrastructure. - * - * @return Error code - */ -int ompi_group_finalize(void); - - /** * Get group size. * @@ -384,15 +394,15 @@ static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int ran #if OMPI_GROUP_SPARSE do { if (OMPI_GROUP_IS_DENSE(group)) { - return ompi_group_dense_lookup (group, rank, allocate); + break; } int ranks1 = rank; ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank); group = group->grp_parent_group_ptr; } while (1); -#else - return ompi_group_dense_lookup (group, rank, allocate); #endif + + return ompi_group_dense_lookup (group, rank, allocate); } /** @@ -402,9 +412,23 @@ static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int ran * or cached in the proc hash table) or a sentinel value representing the proc. This * differs from ompi_group_get_proc_ptr() which returns the ompi_proc_t or NULL. */ -ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank); +static inline ompi_proc_t *ompi_group_get_proc_ptr_raw (const ompi_group_t *group, int rank) +{ +#if OMPI_GROUP_SPARSE + do { + if (OMPI_GROUP_IS_DENSE(group)) { + break; + } + int ranks1 = rank; + ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank); + group = group->grp_parent_group_ptr; + } while (1); +#endif -static inline opal_process_name_t ompi_group_get_proc_name (ompi_group_t *group, int rank) + return group->grp_proc_pointers[rank]; +} + +static inline opal_process_name_t ompi_group_get_proc_name (const ompi_group_t *group, int rank) { ompi_proc_t *proc = ompi_group_get_proc_ptr_raw (group, rank); if (ompi_proc_is_sentinel (proc)) { @@ -472,6 +496,17 @@ bool ompi_group_have_remote_peers (ompi_group_t *group); */ int ompi_group_count_local_peers (ompi_group_t *group); +/** + * @brief Check if groups overlap + * + * @param[in] group1 ompi group + * @param[in] group2 ompi group + * + * @returns true if any proc in group1 is also in group2 + * @returns false otherwise + */ +bool ompi_group_overlap (const ompi_group_t *group1, const ompi_group_t *group2); + /** * Function to print the group info */ @@ -482,5 +517,19 @@ int ompi_group_dump (ompi_group_t* group); */ int ompi_group_div_ceil (int num, int den); +/** + * Create a process name array from a group + */ +int ompi_group_to_proc_name_array (ompi_group_t *group, opal_process_name_t **name_array, size_t *name_array_size); + +/** + * Return instance from a group + */ + +static inline ompi_instance_t *ompi_group_get_instance(ompi_group_t *group) +{ + return group->grp_instance; +} + END_C_DECLS #endif /* OMPI_GROUP_H */ diff --git a/ompi/group/group_init.c b/ompi/group/group_init.c index fed47997218..3a92d888f2e 100644 --- a/ompi/group/group_init.c +++ b/ompi/group/group_init.c @@ -16,6 +16,8 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,6 +34,8 @@ static void ompi_group_construct(ompi_group_t *); static void ompi_group_destruct(ompi_group_t *); +static int ompi_group_finalize (void); + OBJ_CLASS_INSTANCE(ompi_group_t, opal_object_t, ompi_group_construct, @@ -227,6 +231,71 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) return new_group; } +/** + * @brief Allocate a dense group from a group + * + * @param[in] group group + * + * @returns new group pointer on success + * @returns NULL on error + * + * This function duplicates a group. The new group will have a dense process + * table. + */ +ompi_group_t *ompi_group_flatten (ompi_group_t *group, int max_procs) +{ + int proc_count = (max_procs > group->grp_proc_count) ? group->grp_proc_count : max_procs; + size_t proc_pointer_array_size = proc_count * sizeof (group->grp_proc_pointers[0]); + ompi_group_t *new_group = OBJ_NEW(ompi_group_t);; + if (NULL == new_group) { + return NULL; + } + + if (0 > new_group->grp_f_to_c_index) { + OBJ_RELEASE (new_group); + return NULL; + } + + if (0 != proc_count) { + new_group->grp_proc_pointers = malloc (proc_pointer_array_size); + if (OPAL_UNLIKELY(NULL == new_group->grp_proc_pointers)) { + OBJ_RELEASE(new_group); + return NULL; + } + + /* + * Allocate array of (ompi_proc_t *)'s, one for each + * process in the group. + */ + if (!OMPI_GROUP_IS_DENSE(group)) { + for (int i = 0 ; i < proc_count ; i++) { + new_group->grp_proc_pointers[i] = ompi_group_peer_lookup (group, i); + } + } else { + memcpy (new_group->grp_proc_pointers, group->grp_proc_pointers, proc_pointer_array_size); + } + } + + /* set the group size */ + new_group->grp_proc_count = proc_count; + + if (group->grp_my_rank >= max_procs) { + /* initialize our rank to MPI_UNDEFINED */ + new_group->grp_my_rank = MPI_UNDEFINED; + } else { + /* rank is the same as in the old group */ + new_group->grp_my_rank = group->grp_my_rank; + } + + new_group->grp_instance = group->grp_instance; + + OMPI_GROUP_SET_DENSE(new_group); + + ompi_group_increment_proc_count (new_group); + + return new_group; +} + /* * increment the reference count of the proc structures */ @@ -363,6 +432,8 @@ int ompi_group_init(void) ompi_mpi_group_empty.group.grp_flags |= OMPI_GROUP_DENSE; ompi_mpi_group_empty.group.grp_flags |= OMPI_GROUP_INTRINSIC; + ompi_mpi_instance_append_finalize (ompi_group_finalize); + return OMPI_SUCCESS; } @@ -370,7 +441,7 @@ int ompi_group_init(void) /* * Clean up group infrastructure */ -int ompi_group_finalize(void) +static int ompi_group_finalize (void) { ompi_mpi_group_null.group.grp_flags = 0; OBJ_DESTRUCT(&ompi_mpi_group_null); diff --git a/ompi/group/group_plist.c b/ompi/group/group_plist.c index 16816a20659..771bd921efa 100644 --- a/ompi/group/group_plist.c +++ b/ompi/group/group_plist.c @@ -61,38 +61,6 @@ static int ompi_group_dense_overlap (ompi_group_t *group1, ompi_group_t *group2, return overlap_count; } -static struct ompi_proc_t *ompi_group_dense_lookup_raw (ompi_group_t *group, const int peer_id) -{ - if (OPAL_UNLIKELY(ompi_proc_is_sentinel (group->grp_proc_pointers[peer_id]))) { - ompi_proc_t *proc = - (ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((uintptr_t) group->grp_proc_pointers[peer_id])); - if (NULL != proc) { - /* replace sentinel value with an actual ompi_proc_t */ - group->grp_proc_pointers[peer_id] = proc; - /* retain the proc */ - OBJ_RETAIN(group->grp_proc_pointers[peer_id]); - } - } - - return group->grp_proc_pointers[peer_id]; -} - -ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank) -{ -#if OMPI_GROUP_SPARSE - do { - if (OMPI_GROUP_IS_DENSE(group)) { - return ompi_group_dense_lookup_raw (group, rank); - } - int ranks1 = rank; - ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank); - group = group->grp_parent_group_ptr; - } while (1); -#else - return ompi_group_dense_lookup_raw (group, rank); -#endif -} - int ompi_group_calc_plist ( int n , const int *ranks ) { return sizeof(char *) * n ; } diff --git a/ompi/include/Makefile.am b/ompi/include/Makefile.am index dcb55c27730..222ec75bbbc 100644 --- a/ompi/include/Makefile.am +++ b/ompi/include/Makefile.am @@ -127,6 +127,6 @@ distclean-local: mpif-sizeof.h \ mpif-c-constants-decl.h mpif-c-constants.h mpif-f08-types.h -mpi_portable_platform.h: $(top_srcdir)/opal/include/opal/opal_portable_platform.h +mpi_portable_platform.h: $(top_srcdir)/opal/include/opal/opal_portable_platform_real.h -@rm -f mpi_portable_platform.h - $(OMPI_V_LN_S) $(LN_S) $(top_srcdir)/opal/include/opal/opal_portable_platform.h mpi_portable_platform.h + $(OMPI_V_LN_S) $(LN_S) $(top_srcdir)/opal/include/opal/opal_portable_platform_real.h mpi_portable_platform.h diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index 66e0c51e3fb..cf80693beae 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -17,11 +17,17 @@ * reserved. * Copyright (c) 2011-2013 INRIA. All rights reserved. * Copyright (c) 2015 University of Houston. All rights reserved. - * Copyright (c) 2015-2020 Research Organization for Information Science + * Copyright (c) 2015-2021 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017-2019 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2021-2022 Google, LLC. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,6 +84,9 @@ /* The size of a `int', as computed by sizeof. */ #undef OPAL_SIZEOF_INT +/* The size of a `void*', as computed by sizeof. */ +#undef OPAL_SIZEOF_VOID_P + /* Maximum length of datarep string (default is 128) */ #undef OPAL_MAX_DATAREP_STRING @@ -99,6 +108,12 @@ /* Maximum length of processor names (default is 256) */ #undef OPAL_MAX_PROCESSOR_NAME +/* Maximum length of processor names (default is 1024) */ +#undef OPAL_MAX_PSET_NAME_LEN + +/* Maximum length of from group tag (default is 256) */ +#undef OPAL_MAX_STRINGTAG_LEN + /* The number or Fortran INTEGER in MPI Status */ #undef OMPI_FORTRAN_STATUS_SIZE @@ -249,10 +264,29 @@ #if !OMPI_BUILDING /* * Figure out which compiler is being invoked (in order to compare if - * it was different than what OMPI was built with). + * it was different than what OMPI was built with). Do some preprocessor + * hacks to eliminate warnings in the portable_platform.h file. */ +# ifndef SIZEOF_VOID_P +# define CLEANUP_SIZEOF_VOID_P 1 +# define SIZEOF_VOID_P OPAL_SIZEOF_VOID_P +# else +# define CLEANUP_SIZEOF_VOID_P 0 +# endif + +# ifndef _PORTABLE_PLATFORM_H +# define _PORTABLE_PLATFORM_H 0 +# endif +# ifndef PLATFORM_HEADER_VERSION +# define PLATFORM_HEADER_VERSION 0 +# endif + # include "mpi_portable_platform.h" +#if CLEANUP_SIZEOF_VOID_P +#undef SIZEOF_VOID_P +#endif + /* * If we're currently using the same compiler that was used to * build Open MPI, enable compile-time warning of user-level code @@ -418,6 +452,7 @@ typedef struct mca_base_var_enum_t *MPI_T_enum; typedef struct ompi_mpit_cvar_handle_t *MPI_T_cvar_handle; typedef struct mca_base_pvar_handle_t *MPI_T_pvar_handle; typedef struct mca_base_pvar_session_t *MPI_T_pvar_session; +typedef struct ompi_instance_t *MPI_Session; /* * MPI_Status @@ -457,6 +492,7 @@ typedef int (MPI_Datarep_extent_function)(MPI_Datatype, MPI_Aint *, void *); typedef int (MPI_Datarep_conversion_function)(void *, MPI_Datatype, int, void *, MPI_Offset, void *); typedef void (MPI_Comm_errhandler_function)(MPI_Comm *, int *, ...); +typedef void (MPI_Session_errhandler_function) (MPI_Session *, int *, ...); /* This is a little hackish, but errhandler.h needs space for a MPI_File_errhandler_function. While it could just be removed, this @@ -475,6 +511,7 @@ typedef int (MPI_Type_delete_attr_function)(MPI_Datatype, int, typedef int (MPI_Win_copy_attr_function)(MPI_Win, int, void *, void *, void *, int *); typedef int (MPI_Win_delete_attr_function)(MPI_Win, int, void *, void *); +typedef int (MPI_Session_delete_attr_function)(MPI_Session, int, void *, void *); typedef int (MPI_Grequest_query_function)(void *, MPI_Status *); typedef int (MPI_Grequest_free_function)(void *); typedef int (MPI_Grequest_cancel_function)(void *, int); @@ -529,6 +566,8 @@ typedef MPI_Win_errhandler_function MPI_Win_errhandler_fn #define MPI_DISTRIBUTE_CYCLIC 1 /* cyclic distribution */ #define MPI_DISTRIBUTE_NONE 2 /* not distributed */ #define MPI_DISTRIBUTE_DFLT_DARG (-1) /* default distribution arg */ +#define MPI_MAX_PSET_NAME_LEN OPAL_MAX_PSET_NAME_LEN /* max pset name len */ +#define MPI_MAX_STRINGTAG_LEN OPAL_MAX_STRINGTAG_LEN /* max length of string arg to comm from group funcs*/ /* * Since these values are arbitrary to Open MPI, we might as well make @@ -599,6 +638,7 @@ enum { /* MPI-4 */ MPI_FT, /* used by OPAL_ENABLE_FT_MPI */ + MPI_ATTR_PREDEFINED_KEY_MAX, }; /* @@ -827,6 +867,7 @@ enum { /* * NULL handles */ +#define MPI_SESSION_NULL OMPI_PREDEFINED_GLOBAL(MPI_Session, ompi_mpi_instance_null) #define MPI_GROUP_NULL OMPI_PREDEFINED_GLOBAL(MPI_Group, ompi_mpi_group_null) #define MPI_COMM_NULL OMPI_PREDEFINED_GLOBAL(MPI_Comm, ompi_mpi_comm_null) #define MPI_REQUEST_NULL OMPI_PREDEFINED_GLOBAL(MPI_Request, ompi_request_null) @@ -960,6 +1001,8 @@ OMPI_DECLSPEC extern struct ompi_predefined_communicator_t ompi_mpi_comm_null; OMPI_DECLSPEC extern struct ompi_predefined_group_t ompi_mpi_group_empty; OMPI_DECLSPEC extern struct ompi_predefined_group_t ompi_mpi_group_null; +OMPI_DECLSPEC extern struct ompi_predefined_instance_t ompi_mpi_instance_null; + OMPI_DECLSPEC extern struct ompi_predefined_request_t ompi_request_null; OMPI_DECLSPEC extern struct ompi_predefined_message_t ompi_message_null; @@ -1086,6 +1129,9 @@ OMPI_DECLSPEC extern struct ompi_predefined_info_t ompi_mpi_info_env; OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUS_IGNORE; OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE; +OMPI_DECLSPEC extern MPI_Fint *MPI_F08_STATUS_IGNORE; +OMPI_DECLSPEC extern MPI_Fint *MPI_F08_STATUSES_IGNORE; + /* * Removed datatypes. These datatypes are only available if Open MPI * was configured with --enable-mpi1-compatibility. @@ -1285,6 +1331,11 @@ OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_ub; #define PMPI_Aint_add(base, disp) MPI_Aint_add(base, disp) #define PMPI_Aint_diff(addr1, addr2) MPI_Aint_diff(addr1, addr2) +/* + * Predefined info keys + */ +#define MPI_INFO_KEY_SESSION_PSET_SIZE "size" + /* * MPI API */ @@ -1300,47 +1351,66 @@ OMPI_DECLSPEC int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatyp void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, - MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); + void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], - const int displs[], MPI_Datatype recvtype, MPI_Comm comm); + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm); OMPI_DECLSPEC int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], - const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr); -OMPI_DECLSPEC int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Allreduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, - MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); -OMPI_DECLSPEC int MPI_Alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], - MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], - const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm); -OMPI_DECLSPEC int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], - MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], - const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Alltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm); OMPI_DECLSPEC int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], - void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], - MPI_Comm comm, MPI_Request *request); + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Barrier(MPI_Comm comm); OMPI_DECLSPEC int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Barrier_init(MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Bcast_init(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Bsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); -OMPI_DECLSPEC int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, - int root, MPI_Comm comm, - MPI_Request *request); OMPI_DECLSPEC int MPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC int MPI_Buffer_attach(void *buffer, int size); @@ -1372,12 +1442,16 @@ OMPI_DECLSPEC int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_copy MPI_Comm_delete_attr_function *comm_delete_attr_fn, int *comm_keyval, void *extra_state); OMPI_DECLSPEC int MPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_create_from_group(MPI_Group group, const char *tag, MPI_Info info, + MPI_Errhandler errhandler, MPI_Comm *newcomm); OMPI_DECLSPEC int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm); OMPI_DECLSPEC int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval); OMPI_DECLSPEC int MPI_Comm_disconnect(MPI_Comm *comm); OMPI_DECLSPEC int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm); OMPI_DECLSPEC int MPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request); OMPI_DECLSPEC int MPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_idup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm, + MPI_Request *request); OMPI_DECLSPEC MPI_Comm MPI_Comm_f2c(MPI_Fint comm); OMPI_DECLSPEC int MPI_Comm_free_keyval(int *comm_keyval); OMPI_DECLSPEC int MPI_Comm_free(MPI_Comm *comm); @@ -1437,12 +1511,14 @@ OMPI_DECLSPEC MPI_Errhandler MPI_Errhandler_f2c(MPI_Fint errhandler); OMPI_DECLSPEC int MPI_Errhandler_free(MPI_Errhandler *errhandler); OMPI_DECLSPEC int MPI_Error_class(int errorcode, int *errorclass); OMPI_DECLSPEC int MPI_Error_string(int errorcode, char *string, int *resultlen); -OMPI_DECLSPEC int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Exscan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Fetch_and_op(const void *origin_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win); -OMPI_DECLSPEC int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC MPI_Fint MPI_File_c2f(MPI_File file); OMPI_DECLSPEC MPI_File MPI_File_f2c(MPI_Fint file); OMPI_DECLSPEC int MPI_File_call_errhandler(MPI_File fh, int errorcode); @@ -1505,7 +1581,7 @@ OMPI_DECLSPEC int MPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, OMPI_DECLSPEC int MPI_File_read_shared(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status); OMPI_DECLSPEC int MPI_File_write_shared(MPI_File fh, const void *buf, int count, - MPI_Datatype datatype, MPI_Status *status); + MPI_Datatype datatype, MPI_Status *status); OMPI_DECLSPEC int MPI_File_iread_shared(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Request *request); OMPI_DECLSPEC int MPI_File_iwrite_shared(MPI_File fh, const void *buf, int count, @@ -1546,14 +1622,20 @@ OMPI_DECLSPEC int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype s void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); OMPI_DECLSPEC int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int root, MPI_Comm comm, MPI_Request *request); + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm); OMPI_DECLSPEC int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], const int displs[], - MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Get_address(const void *location, MPI_Aint *address); OMPI_DECLSPEC int MPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count); OMPI_DECLSPEC int MPI_Get_elements(const MPI_Status *status, MPI_Datatype datatype, int *count); @@ -1592,6 +1674,7 @@ OMPI_DECLSPEC int MPI_Group_excl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); OMPI_DECLSPEC MPI_Group MPI_Group_f2c(MPI_Fint group); OMPI_DECLSPEC int MPI_Group_free(MPI_Group *group); +OMPI_DECLSPEC int MPI_Group_from_session_pset (MPI_Session session, const char *pset_name, MPI_Group *newgroup); OMPI_DECLSPEC int MPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); OMPI_DECLSPEC int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, @@ -1625,6 +1708,8 @@ OMPI_DECLSPEC int MPI_Info_get_nkeys(MPI_Info info, int *nkeys); OMPI_DECLSPEC int MPI_Info_get_nthkey(MPI_Info info, int n, char *key); OMPI_DECLSPEC int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, int *flag); +OMPI_DECLSPEC int MPI_Info_get_string(MPI_Info info, const char *key, int *buflen, + char *value, int *flag); OMPI_DECLSPEC int MPI_Info_set(MPI_Info info, const char *key, const char *value); OMPI_DECLSPEC int MPI_Init(int *argc, char ***argv); OMPI_DECLSPEC int MPI_Initialized(int *flag); @@ -1633,6 +1718,9 @@ OMPI_DECLSPEC int MPI_Init_thread(int *argc, char ***argv, int required, OMPI_DECLSPEC int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm bridge_comm, int remote_leader, int tag, MPI_Comm *newintercomm); +OMPI_DECLSPEC int MPI_Intercomm_create_from_groups (MPI_Group local_group, int local_leader, MPI_Group remote_group, + int remote_leader, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newintercomm); OMPI_DECLSPEC int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintercomm); OMPI_DECLSPEC int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, @@ -1643,6 +1731,13 @@ OMPI_DECLSPEC int MPI_Irsend(const void *buf, int count, MPI_Datatype datatype, int tag, MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Isendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + int dest, int sendtag, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Isendrecv_replace(void * buf, int count, MPI_Datatype datatype, + int dest, int sendtag, int source, int recvtag, + MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC int MPI_Is_thread_main(int *flag); @@ -1663,27 +1758,42 @@ OMPI_DECLSPEC int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, M OMPI_DECLSPEC int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], const int displs[], - MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm); OMPI_DECLSPEC int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC MPI_Fint MPI_Op_c2f(MPI_Op op); OMPI_DECLSPEC int MPI_Op_commutative(MPI_Op op, int *commute); OMPI_DECLSPEC int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op); @@ -1707,11 +1817,11 @@ OMPI_DECLSPEC int MPI_Pready_range(int partition_low, int partition_high, OMPI_DECLSPEC int MPI_Pready_list(int length, int partition_list[], MPI_Request request); OMPI_DECLSPEC int MPI_Precv_init(void* buf, int partitions, MPI_Count count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, - MPI_Request *request); + MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status); OMPI_DECLSPEC int MPI_Psend_init(const void* buf, int partitions, MPI_Count count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, - MPI_Request *request); + MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Publish_name(const char *service_name, MPI_Info info, const char *port_name); OMPI_DECLSPEC int MPI_Put(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, @@ -1725,20 +1835,26 @@ OMPI_DECLSPEC int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, in int tag, MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status); -OMPI_DECLSPEC int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); -OMPI_DECLSPEC int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Reduce_local(const void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op); -OMPI_DECLSPEC int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); -OMPI_DECLSPEC int MPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Register_datarep(const char *datarep, MPI_Datarep_conversion_function *read_conversion_fn, MPI_Datarep_conversion_function *write_conversion_fn, @@ -1765,22 +1881,30 @@ OMPI_DECLSPEC int MPI_Rsend(const void *ibuf, int count, MPI_Datatype datatype, OMPI_DECLSPEC int MPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); -OMPI_DECLSPEC int MPI_Scan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); OMPI_DECLSPEC int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int root, MPI_Comm comm, MPI_Request *request); -OMPI_DECLSPEC int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], - MPI_Datatype sendtype, void *recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm); -OMPI_DECLSPEC int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[], - MPI_Datatype sendtype, void *recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scatterv_init(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); @@ -1793,6 +1917,18 @@ OMPI_DECLSPEC int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype OMPI_DECLSPEC int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC MPI_Fint MPI_Session_c2f (const MPI_Session session); +OMPI_DECLSPEC int MPI_Session_create_errhandler (MPI_Session_errhandler_function *session_errhandler_fn, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int MPI_Session_finalize (MPI_Session *session); +OMPI_DECLSPEC int MPI_Session_get_info (MPI_Session session, MPI_Info *info_used); +OMPI_DECLSPEC int MPI_Session_get_num_psets (MPI_Session session, MPI_Info info, int *npset_names); +OMPI_DECLSPEC int MPI_Session_get_nth_pset (MPI_Session session, MPI_Info info, int n, int *len, char *pset_name); +OMPI_DECLSPEC int MPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used); +OMPI_DECLSPEC int MPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, + MPI_Session *session); +OMPI_DECLSPEC MPI_Session MPI_Session_f2c (MPI_Fint session); +OMPI_DECLSPEC int MPI_Session_set_info (MPI_Session session, MPI_Info info); OMPI_DECLSPEC int MPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); @@ -1974,38 +2110,55 @@ OMPI_DECLSPEC int PMPI_Allgather(const void *sendbuf, int sendcount, MPI_Dataty void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, - MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); + void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], - const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr); OMPI_DECLSPEC int PMPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int PMPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Allreduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, - MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); -OMPI_DECLSPEC int PMPI_Alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], - MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], - const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm); -OMPI_DECLSPEC int PMPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], - MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], - const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Alltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm); OMPI_DECLSPEC int PMPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], - void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], - MPI_Comm comm, MPI_Request *request); + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Dist_graph_create(MPI_Comm comm_old, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], MPI_Info info, @@ -2029,11 +2182,13 @@ OMPI_DECLSPEC int PMPI_Dist_graph_neighbors_count(MPI_Comm comm, int *weighted); OMPI_DECLSPEC int PMPI_Barrier(MPI_Comm comm); OMPI_DECLSPEC int PMPI_Ibarrier(MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Barrier_init(MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, - int root, MPI_Comm comm, - MPI_Request *request); + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Bcast_init(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Bsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype, @@ -2067,12 +2222,16 @@ OMPI_DECLSPEC int PMPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_cop MPI_Comm_delete_attr_function *comm_delete_attr_fn, int *comm_keyval, void *extra_state); OMPI_DECLSPEC int PMPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_create_from_group(MPI_Group group, const char *tag, MPI_Info info, + MPI_Errhandler errhandler, MPI_Comm *newcomm); OMPI_DECLSPEC int PMPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm); OMPI_DECLSPEC int PMPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval); OMPI_DECLSPEC int PMPI_Comm_disconnect(MPI_Comm *comm); OMPI_DECLSPEC int PMPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm); OMPI_DECLSPEC int PMPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request); OMPI_DECLSPEC int PMPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_idup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm, + MPI_Request *request); OMPI_DECLSPEC MPI_Comm PMPI_Comm_f2c(MPI_Fint comm); OMPI_DECLSPEC int PMPI_Comm_free_keyval(int *comm_keyval); OMPI_DECLSPEC int PMPI_Comm_free(MPI_Comm *comm); @@ -2111,12 +2270,14 @@ OMPI_DECLSPEC MPI_Errhandler PMPI_Errhandler_f2c(MPI_Fint errhandler); OMPI_DECLSPEC int PMPI_Errhandler_free(MPI_Errhandler *errhandler); OMPI_DECLSPEC int PMPI_Error_class(int errorcode, int *errorclass); OMPI_DECLSPEC int PMPI_Error_string(int errorcode, char *string, int *resultlen); -OMPI_DECLSPEC int PMPI_Exscan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Exscan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Fetch_and_op(const void *origin_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win); -OMPI_DECLSPEC int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC MPI_Fint PMPI_File_c2f(MPI_File file); OMPI_DECLSPEC MPI_File PMPI_File_f2c(MPI_Fint file); OMPI_DECLSPEC int PMPI_File_call_errhandler(MPI_File fh, int errorcode); @@ -2220,14 +2381,20 @@ OMPI_DECLSPEC int PMPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int root, MPI_Comm comm, MPI_Request *request); + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], const int displs[], - MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Get_address(const void *location, MPI_Aint *address); OMPI_DECLSPEC int PMPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count); OMPI_DECLSPEC int PMPI_Get_elements(const MPI_Status *status, MPI_Datatype datatype, @@ -2268,6 +2435,7 @@ OMPI_DECLSPEC int PMPI_Group_excl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); OMPI_DECLSPEC MPI_Group PMPI_Group_f2c(MPI_Fint group); OMPI_DECLSPEC int PMPI_Group_free(MPI_Group *group); +OMPI_DECLSPEC int PMPI_Group_from_session_pset (MPI_Session session, const char *pset_name, MPI_Group *newgroup); OMPI_DECLSPEC int PMPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); OMPI_DECLSPEC int PMPI_Group_intersection(MPI_Group group1, MPI_Group group2, @@ -2299,6 +2467,8 @@ OMPI_DECLSPEC int PMPI_Info_get(MPI_Info info, const char *key, int valuelen, char *value, int *flag); OMPI_DECLSPEC int PMPI_Info_get_nkeys(MPI_Info info, int *nkeys); OMPI_DECLSPEC int PMPI_Info_get_nthkey(MPI_Info info, int n, char *key); +OMPI_DECLSPEC int PMPI_Info_get_string(MPI_Info info, const char *key, int *buflen, + char *value, int *flag); OMPI_DECLSPEC int PMPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, int *flag); OMPI_DECLSPEC int PMPI_Info_set(MPI_Info info, const char *key, const char *value); @@ -2309,6 +2479,9 @@ OMPI_DECLSPEC int PMPI_Init_thread(int *argc, char ***argv, int required, OMPI_DECLSPEC int PMPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm bridge_comm, int remote_leader, int tag, MPI_Comm *newintercomm); +OMPI_DECLSPEC int PMPI_Intercomm_create_from_groups (MPI_Group local_group, int local_leader, MPI_Group remote_group, + int remote_leader, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newintercomm); OMPI_DECLSPEC int PMPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintercomm); OMPI_DECLSPEC int PMPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, @@ -2319,14 +2492,21 @@ OMPI_DECLSPEC int PMPI_Irsend(const void *buf, int count, MPI_Datatype datatype int tag, MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC int PMPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Isendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + int dest, int sendtag, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Isendrecv_replace(void * buf, int count, MPI_Datatype datatype, + int dest, int sendtag, int source, int recvtag, + MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC int PMPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC int PMPI_Precv_init(void* buf, int partitions, MPI_Count count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, - MPI_Request *request); + MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Psend_init(const void* buf, int partitions, MPI_Count count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, - MPI_Request *request); + MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Pready(int partitions, MPI_Request request); OMPI_DECLSPEC int PMPI_Pready_range(int partition_low, int partition_high, MPI_Request request); @@ -2347,30 +2527,45 @@ OMPI_DECLSPEC int PMPI_Neighbor_allgather(const void *sendbuf, int sendcount, M OMPI_DECLSPEC int PMPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], const int displs[], - MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm); OMPI_DECLSPEC int PMPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC MPI_Fint PMPI_Op_c2f(MPI_Op op); OMPI_DECLSPEC int PMPI_Op_commutative(MPI_Op op, int *commute); OMPI_DECLSPEC int PMPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op); @@ -2401,20 +2596,26 @@ OMPI_DECLSPEC int PMPI_Recv_init(void *buf, int count, MPI_Datatype datatype, i int tag, MPI_Comm comm, MPI_Request *request); OMPI_DECLSPEC int PMPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status); -OMPI_DECLSPEC int PMPI_Reduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); -OMPI_DECLSPEC int PMPI_Ireduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Reduce_local(const void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op); -OMPI_DECLSPEC int PMPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int PMPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); -OMPI_DECLSPEC int PMPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int PMPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Register_datarep(const char *datarep, MPI_Datarep_conversion_function *read_conversion_fn, MPI_Datarep_conversion_function *write_conversion_fn, @@ -2441,22 +2642,30 @@ OMPI_DECLSPEC int PMPI_Rsend(const void *ibuf, int count, MPI_Datatype datatype OMPI_DECLSPEC int PMPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); -OMPI_DECLSPEC int PMPI_Scan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int PMPI_Iscan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); OMPI_DECLSPEC int PMPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int root, MPI_Comm comm, MPI_Request *request); -OMPI_DECLSPEC int PMPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], - MPI_Datatype sendtype, void *recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm); -OMPI_DECLSPEC int PMPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[], - MPI_Datatype sendtype, void *recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scatterv_init(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); OMPI_DECLSPEC int PMPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); @@ -2469,6 +2678,18 @@ OMPI_DECLSPEC int PMPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatyp OMPI_DECLSPEC int PMPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC MPI_Fint PMPI_Session_c2f (const MPI_Session session); +OMPI_DECLSPEC int PMPI_Session_create_errhandler (MPI_Session_errhandler_function *session_errhandler_fn, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int PMPI_Session_finalize (MPI_Session *session); +OMPI_DECLSPEC int PMPI_Session_get_info (MPI_Session session, MPI_Info *info_used); +OMPI_DECLSPEC int PMPI_Session_get_num_psets (MPI_Session session, MPI_Info info, int *npset_names); +OMPI_DECLSPEC int PMPI_Session_get_nth_pset (MPI_Session session, MPI_Info info, int n, int *len, char *pset_name); +OMPI_DECLSPEC int PMPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used); +OMPI_DECLSPEC int PMPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, + MPI_Session *session); +OMPI_DECLSPEC MPI_Session PMPI_Session_f2c (MPI_Fint session); +OMPI_DECLSPEC int PMPI_Session_set_info (MPI_Session session, MPI_Info info); OMPI_DECLSPEC int PMPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); diff --git a/ompi/include/mpif-config.h.in b/ompi/include/mpif-config.h.in index a3a6d7b0c1e..fc2054df637 100644 --- a/ompi/include/mpif-config.h.in +++ b/ompi/include/mpif-config.h.in @@ -13,6 +13,8 @@ ! Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2013 Los Alamos National Security, LLC. All rights ! reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -60,6 +62,8 @@ integer MPI_MAX_INFO_VAL integer MPI_MAX_PORT_NAME integer MPI_MAX_DATAREP_STRING + integer MPI_MAX_PSET_NAME_LEN + integer MPI_MAX_STRINGTAG_LEN parameter (MPI_MAX_PROCESSOR_NAME=@OPAL_MAX_PROCESSOR_NAME@-1) parameter (MPI_MAX_ERROR_STRING=@OPAL_MAX_ERROR_STRING@-1) parameter (MPI_MAX_OBJECT_NAME=@OPAL_MAX_OBJECT_NAME@-1) @@ -68,6 +72,8 @@ parameter (MPI_MAX_INFO_VAL=@OPAL_MAX_INFO_VAL@-1) parameter (MPI_MAX_PORT_NAME=@OPAL_MAX_PORT_NAME@-1) parameter (MPI_MAX_DATAREP_STRING=@OPAL_MAX_DATAREP_STRING@-1) + parameter (MPI_MAX_PSET_NAME_LEN=@OPAL_MAX_PSET_NAME_LEN@-1) + parameter (MPI_MAX_STRINGTAG_LEN=@OPAL_MAX_STRINGTAG_LEN@-1) ! ! MPI F08 conformance diff --git a/ompi/include/ompi/constants.h b/ompi/include/ompi/constants.h index 41254141f79..c545ae7a596 100644 --- a/ompi/include/ompi/constants.h +++ b/ompi/include/ompi/constants.h @@ -62,6 +62,7 @@ enum { OMPI_ERR_BUFFER = OPAL_ERR_BUFFER, OMPI_ERR_SILENT = OPAL_ERR_SILENT, OMPI_ERR_HANDLERS_COMPLETE = OPAL_ERR_HANDLERS_COMPLETE, + OMPI_ERR_NOT_BOUND = OPAL_ERR_NOT_BOUND, OMPI_ERR_REQUEST = OMPI_ERR_BASE - 1, OMPI_ERR_RMA_SYNC = OMPI_ERR_BASE - 2, diff --git a/ompi/info/info.c b/ompi/info/info.c index 6785fde5dfa..4116142a8c8 100644 --- a/ompi/info/info.c +++ b/ompi/info/info.c @@ -17,7 +17,7 @@ * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights + * Copyright (c) 2019-2021 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2020 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -54,6 +54,7 @@ #include "ompi/runtime/mpiruntime.h" #include "ompi/runtime/params.h" #include "ompi/runtime/ompi_rte.h" +#include "ompi/instance/instance.h" /* * Global variables @@ -86,9 +87,9 @@ opal_pointer_array_t ompi_info_f_to_c_table = {{0}}; * fortran to C translation table. It also fills in the values * for the MPI_INFO_GET_ENV object */ + int ompi_mpiinfo_init(void) { - char *cptr, **tmp; /* initialize table */ @@ -102,10 +103,26 @@ int ompi_mpiinfo_init(void) OBJ_CONSTRUCT(&ompi_mpi_info_null.info, ompi_info_t); assert(ompi_mpi_info_null.info.i_f_to_c_index == 0); - /* Create MPI_INFO_ENV */ + /* Create MPI_INFO_ENV - we create here for the f_to_c. Can't fill in + here because most info needed is only available after a call to + ompi_rte_init. */ OBJ_CONSTRUCT(&ompi_mpi_info_env.info, ompi_info_t); assert(ompi_mpi_info_env.info.i_f_to_c_index == 1); + ompi_mpi_instance_append_finalize (ompi_mpiinfo_finalize); + + /* All done */ + + return OMPI_SUCCESS; +} + +/* + * Fill in the MPI_INFO_ENV if using MPI3 initialization + */ +int ompi_mpiinfo_init_mpi3(void) +{ + char *cptr, **tmp; + /* fill the env info object */ /* command for this app_context */ @@ -213,9 +230,6 @@ int ompi_mpiinfo_init(void) int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo) { return opal_info_dup (&(info->super), (opal_info_t **)newinfo); } -int ompi_info_dup_mpistandard (ompi_info_t *info, ompi_info_t **newinfo) { - return opal_info_dup_mpistandard (&(info->super), (opal_info_t **)newinfo); -} int ompi_info_set (ompi_info_t *info, const char *key, const char *value) { return opal_info_set (&(info->super), key, value); } @@ -368,6 +382,31 @@ static void info_destructor(ompi_info_t *info) } +ompi_info_t *ompi_info_allocate (void) +{ + ompi_info_t *new_info; + int rc; + + rc = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + /* NTH: seriously, what can we do other than abort () or return? we failed to + * set up the most basic infrastructure! */ + return NULL; + } + + /* + * Call the object create function. This function not only + * allocates the space for MPI_Info, but also calls all the + * relevant init functions. Should I check if the fortran + * handle is valid + */ + new_info = OBJ_NEW(ompi_info_t); + if (NULL == new_info) { + return NULL; + } + + return new_info; +} /* * Free an info handle and all of its keys and values. @@ -377,5 +416,9 @@ int ompi_info_free (ompi_info_t **info) (*info)->i_freed = true; OBJ_RELEASE(*info); *info = MPI_INFO_NULL; + + /* release the retain() from info create/dup */ + ompi_mpi_instance_release (); + return MPI_SUCCESS; } diff --git a/ompi/info/info.h b/ompi/info/info.h index 4fffe6df14c..87df44c03e2 100644 --- a/ompi/info/info.h +++ b/ompi/info/info.h @@ -16,6 +16,9 @@ * reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2020 Intel, Inc. All rights reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,11 +86,17 @@ OMPI_DECLSPEC extern ompi_predefined_info_t *ompi_mpi_info_null_addr; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_info_t); /** - * This function is invoked during ompi_mpi_init() and sets up + * This function is invoked during ompi_instance_retain() and sets up * MPI_Info handling. */ int ompi_mpiinfo_init(void); +/** + * This function is invoked during ompi_mpi_init() and sets up + * the MPI_INFO_ENV object + */ +int ompi_mpiinfo_init_mpi3(void); + /** * This function is used to free a ompi level info */ @@ -155,6 +164,15 @@ OMPI_DECLSPEC int ompi_info_value_to_bool(char *value, bool *interp); OMPI_DECLSPEC int ompi_info_get_nkeys(ompi_info_t *info, int *nkeys); +/** + * @brief Allocate a new info object + * + * This helper function ensures that the minimum infrastructure is initialized + * for creation/modification/destruction of an info object. Do not call + * OBJ_NEW(opal_info_t) directly. + */ +OMPI_DECLSPEC ompi_info_t *ompi_info_allocate (void); + END_C_DECLS /** diff --git a/opal/include/opal/sys/arm64/update.sh b/ompi/instance/Makefile.am similarity index 67% rename from opal/include/opal/sys/arm64/update.sh rename to ompi/instance/Makefile.am index 94d8ed2714b..2ee7f5d59a3 100644 --- a/opal/include/opal/sys/arm64/update.sh +++ b/ompi/instance/Makefile.am @@ -1,4 +1,3 @@ -#!/bin/sh # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology @@ -10,6 +9,9 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2018 Triad National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -17,20 +19,8 @@ # $HEADER$ # -CFILE=/tmp/opal_atomic_$$.c - -trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 - -echo Updating atomic.s from atomic.h using gcc +# This makefile.am does not stand on its own - it is included from ompi/Makefile.am -cat > $CFILE< -#include -#define static -#define inline -#define OPAL_GCC_INLINE_ASSEMBLY 1 -#include "../architecture.h" -#include "atomic.h" -EOF +headers += instance/instance.h -gcc -O1 -I. -S $CFILE -o atomic.s +lib@OMPI_LIBMPI_NAME@_la_SOURCES += instance/instance.c diff --git a/ompi/instance/instance.c b/ompi/instance/instance.c new file mode 100644 index 00000000000..8d9fd339fcd --- /dev/null +++ b/ompi/instance/instance.c @@ -0,0 +1,1323 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "instance.h" + +#include "opal/util/arch.h" + +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/runtime/opal_params.h" + +#include "ompi/mca/pml/pml.h" +#include "ompi/runtime/params.h" + +#include "ompi/interlib/interlib.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/errhandler/errcode.h" +#include "ompi/message/message.h" +#include "ompi/info/info.h" +#include "ompi/attribute/attribute.h" +#include "ompi/op/op.h" +#include "ompi/dpm/dpm.h" +#include "ompi/file/file.h" +#include "ompi/mpiext/mpiext.h" + +#include "ompi/mca/hook/base/base.h" +#include "ompi/mca/op/base/base.h" +#include "opal/mca/allocator/base/base.h" +#include "opal/mca/rcache/base/base.h" +#include "opal/mca/mpool/base/base.h" +#include "ompi/mca/bml/base/base.h" +#include "ompi/mca/pml/base/base.h" +#include "ompi/mca/coll/base/base.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/io/base/base.h" +#include "ompi/mca/topo/base/base.h" +#include "opal/mca/pmix/base/base.h" + +#include "opal/mca/mpool/base/mpool_base_tree.h" +#include "ompi/mca/pml/base/pml_base_bsend.h" +#include "ompi/util/timings.h" +#include "opal/mca/pmix/pmix-internal.h" + +ompi_predefined_instance_t ompi_mpi_instance_null = {{{{0}}}}; + +static opal_recursive_mutex_t instance_lock = OPAL_RECURSIVE_MUTEX_STATIC_INIT; + +/** MPI_Init instance */ +ompi_instance_t *ompi_mpi_instance_default = NULL; + +enum { + OMPI_INSTANCE_INITIALIZING = -1, + OMPI_INSTANCE_FINALIZING = -2, +}; + +opal_atomic_int32_t ompi_instance_count = 0; + +static const char *ompi_instance_builtin_psets[] = { + "mpi://WORLD", + "mpi://SELF", + "mpix://SHARED", +}; + +static const int32_t ompi_instance_builtin_count = 3; + +/** finalization functions that need to be called on teardown */ +static opal_finalize_domain_t ompi_instance_basic_domain; +static opal_finalize_domain_t ompi_instance_common_domain; + +static void ompi_instance_construct (ompi_instance_t *instance) +{ + instance->i_f_to_c_index = opal_pointer_array_add (&ompi_instance_f_to_c_table, instance); + instance->i_name[0] = '\0'; + instance->i_flags = 0; + instance->i_keyhash = NULL; + instance->errhandler_type = OMPI_ERRHANDLER_TYPE_INSTANCE; +} + +OBJ_CLASS_INSTANCE(ompi_instance_t, opal_infosubscriber_t, ompi_instance_construct, NULL); + +/* NTH: frameworks needed by MPI */ +static mca_base_framework_t *ompi_framework_dependencies[] = { + &ompi_hook_base_framework, &ompi_op_base_framework, + &opal_allocator_base_framework, &opal_rcache_base_framework, &opal_mpool_base_framework, + &ompi_bml_base_framework, &ompi_pml_base_framework, &ompi_coll_base_framework, + &ompi_osc_base_framework, NULL, +}; + +static mca_base_framework_t *ompi_lazy_frameworks[] = { + &ompi_io_base_framework, &ompi_topo_base_framework, NULL, +}; + + +static int ompi_mpi_instance_finalize_common (void); + +/* + * Hash tables for MPI_Type_create_f90* functions + */ +opal_hash_table_t ompi_mpi_f90_integer_hashtable = {{0}}; +opal_hash_table_t ompi_mpi_f90_real_hashtable = {{0}}; +opal_hash_table_t ompi_mpi_f90_complex_hashtable = {{0}}; + +static size_t ompi_mpi_instance_num_pmix_psets; +static char **ompi_mpi_instance_pmix_psets; +/* + * Per MPI-2:9.5.3, MPI_REGISTER_DATAREP is a memory leak. There is + * no way to *de*register datareps once they've been registered. So + * we have to track all registrations here so that they can be + * de-registered during MPI_FINALIZE so that memory-tracking debuggers + * don't show Open MPI as leaking memory. + */ +opal_list_t ompi_registered_datareps = {{0}}; + +opal_pointer_array_t ompi_instance_f_to_c_table = {{0}}; + +/* + * PMIx event handlers + */ + +static size_t ompi_default_pmix_err_handler = 0; +static size_t ompi_ulfm_pmix_err_handler = 0; + +static int ompi_instance_print_error (const char *error, int ret) +{ + /* Only print a message if one was not already printed */ + if (NULL != error && OMPI_ERR_SILENT != ret) { + const char *err_msg = opal_strerror(ret); + opal_show_help("help-mpi-runtime.txt", + "mpi_init:startup:internal-failure", true, + "MPI_INIT", "MPI_INIT", error, err_msg, ret); + } + + return ret; +} + +static int ompi_mpi_instance_cleanup_pml (void) +{ + /* call del_procs on all allocated procs even though some may not be known + * to the pml layer. the pml layer is expected to be resilient and ignore + * any unknown procs. */ + size_t nprocs = 0; + ompi_proc_t **procs; + + procs = ompi_proc_get_allocated (&nprocs); + MCA_PML_CALL(del_procs(procs, nprocs)); + free(procs); + + return OMPI_SUCCESS; +} + +/** + * Static functions used to configure the interactions between the OPAL and + * the runtime. + */ +static char *_process_name_print_for_opal (const opal_process_name_t procname) +{ + ompi_process_name_t *rte_name = (ompi_process_name_t*)&procname; + return OMPI_NAME_PRINT(rte_name); +} + +static int _process_name_compare (const opal_process_name_t p1, const opal_process_name_t p2) +{ + ompi_process_name_t *o1 = (ompi_process_name_t *) &p1; + ompi_process_name_t *o2 = (ompi_process_name_t *) &p2; + return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, o1, o2); +} + +static int _convert_string_to_process_name (opal_process_name_t *name, const char* name_string) +{ + return ompi_rte_convert_string_to_process_name(name, name_string); +} + +static int _convert_process_name_to_string (char **name_string, const opal_process_name_t *name) +{ + return ompi_rte_convert_process_name_to_string(name_string, name); +} + +static int32_t ompi_mpi_instance_init_basic_count; +static bool ompi_instance_basic_init; + +void ompi_mpi_instance_release (void) +{ + opal_mutex_lock (&instance_lock); + + if (0 != --ompi_mpi_instance_init_basic_count) { + opal_mutex_unlock (&instance_lock); + return; + } + + opal_argv_free (ompi_mpi_instance_pmix_psets); + ompi_mpi_instance_pmix_psets = NULL; + + opal_finalize_cleanup_domain (&ompi_instance_basic_domain); + OBJ_DESTRUCT(&ompi_instance_basic_domain); + + opal_finalize_util (); + + opal_mutex_unlock (&instance_lock); +} + +int ompi_mpi_instance_retain (void) +{ + int ret; + + opal_mutex_lock (&instance_lock); + + if (0 < ompi_mpi_instance_init_basic_count++) { + opal_mutex_unlock (&instance_lock); + return OMPI_SUCCESS; + } + + /* Setup enough to check get/set MCA params */ + if (OPAL_SUCCESS != (ret = opal_init_util (NULL, NULL))) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_mpi_instance_init: opal_init_util failed", ret); + } + + ompi_instance_basic_init = true; + + OBJ_CONSTRUCT(&ompi_instance_basic_domain, opal_finalize_domain_t); + opal_finalize_domain_init (&ompi_instance_basic_domain, "ompi_mpi_instance_retain"); + opal_finalize_set_domain (&ompi_instance_basic_domain); + + /* Setup f to c table */ + OBJ_CONSTRUCT(&ompi_instance_f_to_c_table, opal_pointer_array_t); + if (OPAL_SUCCESS != opal_pointer_array_init (&ompi_instance_f_to_c_table, 8, + OMPI_FORTRAN_HANDLE_MAX, 32)) { + return OMPI_ERROR; + } + + /* setup the default error handler on instance_null */ + OBJ_CONSTRUCT(&ompi_mpi_instance_null, ompi_instance_t); + ompi_mpi_instance_null.instance.error_handler = &ompi_mpi_errors_return.eh; + + /* Convince OPAL to use our naming scheme */ + opal_process_name_print = _process_name_print_for_opal; + opal_compare_proc = _process_name_compare; + opal_convert_string_to_process_name = _convert_string_to_process_name; + opal_convert_process_name_to_string = _convert_process_name_to_string; + opal_proc_for_name = ompi_proc_for_name; + + /* Register MCA variables */ + if (OPAL_SUCCESS != (ret = ompi_mpi_register_params ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_mpi_init: ompi_register_mca_variables failed", ret); + } + + /* initialize error handlers */ + if (OMPI_SUCCESS != (ret = ompi_errhandler_init ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_errhandler_init() failed", ret); + } + + /* initialize error codes */ + if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_init ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_mpi_errcode_init() failed", ret); + } + + /* initialize internal error codes */ + if (OMPI_SUCCESS != (ret = ompi_errcode_intern_init ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_errcode_intern_init() failed", ret); + } + + /* initialize info */ + if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init ())) { + return ompi_instance_print_error ("ompi_info_init() failed", ret); + } + + ompi_instance_basic_init = false; + + opal_mutex_unlock (&instance_lock); + + return OMPI_SUCCESS; +} + +static void fence_release(pmix_status_t status, void *cbdata) +{ + volatile bool *active = (volatile bool*)cbdata; + OPAL_ACQUIRE_OBJECT(active); + *active = false; + OPAL_POST_OBJECT(active); +} + +static void evhandler_reg_callbk(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; + + lock->status = status; + lock->errhandler_ref = evhandler_ref; + + OPAL_PMIX_WAKEUP_THREAD(lock); +} + +static void evhandler_dereg_callbk(pmix_status_t status, + void *cbdata) +{ + opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; + + lock->status = status; + + OPAL_PMIX_WAKEUP_THREAD(lock); +} + + + +/** + * @brief Function that starts up the common components needed by all instances + */ +static int ompi_mpi_instance_init_common (void) +{ + int ret; + ompi_proc_t **procs; + size_t nprocs; + volatile bool active; + bool background_fence = false; + pmix_info_t info[2]; + pmix_status_t rc; + opal_pmix_lock_t mylock; + OMPI_TIMING_INIT(64); + + ret = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&ompi_instance_common_domain, opal_finalize_domain_t); + opal_finalize_domain_init (&ompi_instance_common_domain, "ompi_mpi_instance_init_common"); + opal_finalize_set_domain (&ompi_instance_common_domain); + + if (OPAL_SUCCESS != (ret = opal_arch_set_fortran_logical_size(sizeof(ompi_fortran_logical_t)))) { + return ompi_instance_print_error ("ompi_mpi_init: opal_arch_set_fortran_logical_size failed", ret); + } + + /* _After_ opal_init_util() but _before_ orte_init(), we need to + set an MCA param that tells libevent that it's ok to use any + mechanism in libevent that is available on this platform (e.g., + epoll and friends). Per opal/event/event.s, we default to + select/poll -- but we know that MPI processes won't be using + pty's with the event engine, so it's ok to relax this + constraint and let any fd-monitoring mechanism be used. */ + + ret = mca_base_var_find("opal", "event", "*", "event_include"); + if (ret >= 0) { + char *allvalue = "all"; + /* We have to explicitly "set" the MCA param value here + because libevent initialization will re-register the MCA + param and therefore override the default. Setting the value + here puts the desired value ("all") in different storage + that is not overwritten if/when the MCA param is + re-registered. This is unless the user has specified a different + value for this MCA parameter. Make sure we check to see if the + default is specified before forcing "all" in case that is not what + the user desires. Note that we do *NOT* set this value as an + environment variable, just so that it won't be inherited by + any spawned processes and potentially cause unintented + side-effects with launching RTE tools... */ + mca_base_var_set_value(ret, allvalue, 4, MCA_BASE_VAR_SOURCE_DEFAULT, NULL); + } + + OMPI_TIMING_NEXT("initialization"); + + /* Setup RTE */ + if (OMPI_SUCCESS != (ret = ompi_rte_init (NULL, NULL))) { + return ompi_instance_print_error ("ompi_mpi_init: ompi_rte_init failed", ret); + } + + /* open the ompi hook framework */ + for (int i = 0 ; ompi_framework_dependencies[i] ; ++i) { + ret = mca_base_framework_open (ompi_framework_dependencies[i], 0); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + char error_msg[256]; + snprintf (error_msg, sizeof(error_msg), "mca_base_framework_open on %s_%s failed", + ompi_framework_dependencies[i]->framework_project, + ompi_framework_dependencies[i]->framework_name); + return ompi_instance_print_error (error_msg, ret); + } + } + + OMPI_TIMING_NEXT("rte_init"); + OMPI_TIMING_IMPORT_OPAL("orte_ess_base_app_setup"); + OMPI_TIMING_IMPORT_OPAL("rte_init"); + + ompi_rte_initialized = true; + + /* Register the default errhandler callback */ + /* give it a name so we can distinguish it */ + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "MPI-Default", PMIX_STRING); + OPAL_PMIX_CONSTRUCT_LOCK(&mylock); + PMIx_Register_event_handler(NULL, 0, info, 1, ompi_errhandler_callback, evhandler_reg_callbk, (void*)&mylock); + OPAL_PMIX_WAIT_THREAD(&mylock); + rc = mylock.status; + ompi_default_pmix_err_handler = mylock.errhandler_ref; + OPAL_PMIX_DESTRUCT_LOCK(&mylock); + PMIX_INFO_DESTRUCT(&info[0]); + if (PMIX_SUCCESS != rc) { + ompi_default_pmix_err_handler = 0; + ret = opal_pmix_convert_status(rc); + return ret; + } + + /* Register the ULFM errhandler callback */ + /* we want to go first */ + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_PREPEND, NULL, PMIX_BOOL); + /* give it a name so we can distinguish it */ + PMIX_INFO_LOAD(&info[1], PMIX_EVENT_HDLR_NAME, "ULFM-Event-handler", PMIX_STRING); + OPAL_PMIX_CONSTRUCT_LOCK(&mylock); + pmix_status_t codes[2] = { PMIX_ERR_PROC_ABORTED, PMIX_ERR_LOST_CONNECTION }; + PMIx_Register_event_handler(codes, 1, info, 2, ompi_errhandler_callback, evhandler_reg_callbk, (void*)&mylock); + OPAL_PMIX_WAIT_THREAD(&mylock); + rc = mylock.status; + ompi_ulfm_pmix_err_handler = mylock.errhandler_ref; + OPAL_PMIX_DESTRUCT_LOCK(&mylock); + PMIX_INFO_DESTRUCT(&info[0]); + PMIX_INFO_DESTRUCT(&info[1]); + if (PMIX_SUCCESS != rc) { + ompi_ulfm_pmix_err_handler = 0; + ret = opal_pmix_convert_status(rc); + return ret; + } + + /* initialize info */ + if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init_mpi3())) { + return ompi_instance_print_error ("ompi_info_init_mpi3() failed", ret); + } + + /* declare our presence for interlib coordination, and + * register for callbacks when other libs declare. XXXXXX -- TODO -- figure out how + * to specify the thread level when different instances may request different levels. */ + if (OMPI_SUCCESS != (ret = ompi_interlib_declare(MPI_THREAD_MULTIPLE, OMPI_IDENT_STRING))) { + return ompi_instance_print_error ("ompi_interlib_declare", ret); + } + + /* initialize datatypes. This step should be done early as it will + * create the local convertor and local arch used in the proc + * init. + */ + if (OMPI_SUCCESS != (ret = ompi_datatype_init())) { + return ompi_instance_print_error ("ompi_datatype_init() failed", ret); + } + + /* Initialize OMPI procs */ + if (OMPI_SUCCESS != (ret = ompi_proc_init())) { + return ompi_instance_print_error ("mca_proc_init() failed", ret); + } + + /* Initialize the op framework. This has to be done *after* + ddt_init, but befor mca_coll_base_open, since some collective + modules (e.g., the hierarchical coll component) may need ops in + their query function. */ + if (OMPI_SUCCESS != (ret = ompi_op_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { + return ompi_instance_print_error ("ompi_op_base_find_available() failed", ret); + } + + if (OMPI_SUCCESS != (ret = ompi_op_init())) { + return ompi_instance_print_error ("ompi_op_init() failed", ret); + } + + /* In order to reduce the common case for MPI apps (where they + don't use MPI-2 IO or MPI-1/3 topology functions), the io and + topo frameworks are initialized lazily, at the first use of + relevant functions (e.g., MPI_FILE_*, MPI_CART_*, MPI_GRAPH_*), + so they are not opened here. */ + + /* Select which MPI components to use */ + + if (OMPI_SUCCESS != (ret = mca_pml_base_select (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { + return ompi_instance_print_error ("mca_pml_base_select() failed", ret); + } + + OMPI_TIMING_IMPORT_OPAL("orte_init"); + OMPI_TIMING_NEXT("rte_init-commit"); + + /* exchange connection info - this function may also act as a barrier + * if data exchange is required. The modex occurs solely across procs + * in our job. If a barrier is required, the "modex" function will + * perform it internally */ + rc = PMIx_Commit(); + if (PMIX_SUCCESS != rc) { + ret = opal_pmix_convert_status(rc); + return ret; /* TODO: need to fix this */ + } + + OMPI_TIMING_NEXT("commit"); +#if (OPAL_ENABLE_TIMING) + if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && + opal_pmix_collect_all_data && !ompi_singleton) { + if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("timing: pmix-barrier-1 failed", ret); + } + OMPI_TIMING_NEXT("pmix-barrier-1"); + if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) { + return ompi_instance_print_error ("timing: pmix-barrier-2 failed", ret); + } + OMPI_TIMING_NEXT("pmix-barrier-2"); + } +#endif + + if (!ompi_singleton) { + if (opal_pmix_base_async_modex) { + /* if we are doing an async modex, but we are collecting all + * data, then execute the non-blocking modex in the background. + * All calls to modex_recv will be cached until the background + * modex completes. If collect_all_data is false, then we skip + * the fence completely and retrieve data on-demand from the + * source node. + */ + if (opal_pmix_collect_all_data) { + /* execute the fence_nb in the background to collect + * the data */ + background_fence = true; + active = true; + OPAL_POST_OBJECT(&active); + PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); + if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, + fence_release, + (void*)&active))) { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + } + } + } else { + /* we want to do the modex - we block at this point, but we must + * do so in a manner that allows us to call opal_progress so our + * event library can be cycled as we have tied PMIx to that + * event base */ + active = true; + OPAL_POST_OBJECT(&active); + PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); + rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); + if( PMIX_SUCCESS != rc) { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence() failed", ret); + } + /* cannot just wait on thread as we need to call opal_progress */ + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } + } + + OMPI_TIMING_NEXT("modex"); + + /* select buffered send allocator component to be used */ + if (OMPI_SUCCESS != (ret = mca_pml_base_bsend_init ())) { + return ompi_instance_print_error ("mca_pml_base_bsend_init() failed", ret); + } + + if (OMPI_SUCCESS != (ret = mca_coll_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { + return ompi_instance_print_error ("mca_coll_base_find_available() failed", ret); + } + + if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { + return ompi_instance_print_error ("ompi_osc_base_find_available() failed", ret); + } + + /* io and topo components are not selected here -- see comment + above about the io and topo frameworks being loaded lazily */ + + /* Initialize each MPI handle subsystem */ + /* initialize requests */ + if (OMPI_SUCCESS != (ret = ompi_request_init ())) { + return ompi_instance_print_error ("ompi_request_init() failed", ret); + } + + if (OMPI_SUCCESS != (ret = ompi_message_init ())) { + return ompi_instance_print_error ("ompi_message_init() failed", ret); + } + + /* initialize groups */ + if (OMPI_SUCCESS != (ret = ompi_group_init ())) { + return ompi_instance_print_error ("ompi_group_init() failed", ret); + } + + ompi_mpi_instance_append_finalize (ompi_mpi_instance_cleanup_pml); + + /* initialize communicator subsystem */ + if (OMPI_SUCCESS != (ret = ompi_comm_init ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_comm_init() failed", ret); + } + + if (mca_pml_base_requires_world ()) { + /* need to set up comm world for this instance -- XXX -- FIXME -- probably won't always + * be the case. */ + if (OMPI_SUCCESS != (ret = ompi_comm_init_mpi3 ())) { + return ompi_instance_print_error ("ompi_comm_init_mpi3 () failed", ret); + } + } + + /* initialize file handles */ + if (OMPI_SUCCESS != (ret = ompi_file_init ())) { + return ompi_instance_print_error ("ompi_file_init() failed", ret); + } + + /* initialize windows */ + if (OMPI_SUCCESS != (ret = ompi_win_init ())) { + return ompi_instance_print_error ("ompi_win_init() failed", ret); + } + + /* Setup the dynamic process management (DPM) subsystem */ + if (OMPI_SUCCESS != (ret = ompi_dpm_init ())) { + return ompi_instance_print_error ("ompi_dpm_init() failed", ret); + } + + + /* identify the architectures of remote procs and setup + * their datatype convertors, if required + */ + if (OMPI_SUCCESS != (ret = ompi_proc_complete_init())) { + return ompi_instance_print_error ("ompi_proc_complete_init failed", ret); + } + + /* start PML/BTL's */ + ret = MCA_PML_CALL(enable(true)); + if( OMPI_SUCCESS != ret ) { + return ompi_instance_print_error ("PML control failed", ret); + } + + /* some btls/mtls require we call add_procs with all procs in the job. + * since the btls/mtls have no visibility here it is up to the pml to + * convey this requirement */ + if (mca_pml_base_requires_world ()) { + if (NULL == (procs = ompi_proc_world (&nprocs))) { + return ompi_instance_print_error ("ompi_proc_get_allocated () failed", ret); + } + } else { + /* add all allocated ompi_proc_t's to PML (below the add_procs limit this + * behaves identically to ompi_proc_world ()) */ + if (NULL == (procs = ompi_proc_get_allocated (&nprocs))) { + return ompi_instance_print_error ("ompi_proc_get_allocated () failed", ret); + } + } + + ret = MCA_PML_CALL(add_procs(procs, nprocs)); + free(procs); + /* If we got "unreachable", then print a specific error message. + Otherwise, if we got some other failure, fall through to print + a generic message. */ + if (OMPI_ERR_UNREACH == ret) { + opal_show_help("help-mpi-runtime.txt", + "mpi_init:startup:pml-add-procs-fail", true); + return ret; + } else if (OMPI_SUCCESS != ret) { + return ompi_instance_print_error ("PML add procs failed", ret); + } + + /* Determine the overall threadlevel support of all processes + in MPI_COMM_WORLD. This has to be done before calling + coll_base_comm_select, since some of the collective components + e.g. hierarch, might create subcommunicators. The threadlevel + requested by all processes is required in order to know + which cid allocation algorithm can be used. */ + if (OMPI_SUCCESS != ( ret = ompi_comm_cid_init ())) { + return ompi_instance_print_error ("ompi_mpi_init: ompi_comm_cid_init failed", ret); + } + + /* Do we need to wait for a debugger? */ + ompi_rte_wait_for_debugger(); + + /* Next timing measurement */ + OMPI_TIMING_NEXT("modex-barrier"); + + if (!ompi_singleton) { + /* if we executed the above fence in the background, then + * we have to wait here for it to complete. However, there + * is no reason to do two barriers! */ + if (background_fence) { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } else if (!ompi_async_mpi_init) { + /* wait for everyone to reach this point - this is a hard + * barrier requirement at this time, though we hope to relax + * it at a later point */ + bool flag = false; + active = true; + OPAL_POST_OBJECT(&active); + PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL); + if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, + fence_release, (void*)&active))) { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + } + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } + } + + /* check for timing request - get stop time and report elapsed + time if so, then start the clock again */ + OMPI_TIMING_NEXT("barrier"); + +#if OPAL_ENABLE_PROGRESS_THREADS == 0 + /* Start setting up the event engine for MPI operations. Don't + block in the event library, so that communications don't take + forever between procs in the dynamic code. This will increase + CPU utilization for the remainder of MPI_INIT when we are + blocking on RTE-level events, but may greatly reduce non-TCP + latency. */ + opal_progress_set_event_flag(OPAL_EVLOOP_NONBLOCK); +#endif + + /* Undo OPAL calling opal_progress_event_users_increment() during + opal_init, to get better latency when not using TCP. Do + this *after* dyn_init, as dyn init uses lots of RTE + communication and we don't want to hinder the performance of + that code. */ + opal_progress_event_users_decrement(); + + /* see if yield_when_idle was specified - if so, use it */ + opal_progress_set_yield_when_idle (ompi_mpi_yield_when_idle); + + /* negative value means use default - just don't do anything */ + if (ompi_mpi_event_tick_rate >= 0) { + opal_progress_set_event_poll_rate(ompi_mpi_event_tick_rate); + } + + /* At this point, we are fully configured and in MPI mode. Any + communication calls here will work exactly like they would in + the user's code. Setup the connections between procs and warm + them up with simple sends, if requested */ + + if (OMPI_SUCCESS != (ret = ompi_mpiext_init())) { + return ompi_instance_print_error ("ompi_mpiext_init", ret); + } + + /* Initialize the registered datarep list to be empty */ + OBJ_CONSTRUCT(&ompi_registered_datareps, opal_list_t); + + /* Initialize the arrays used to store the F90 types returned by the + * MPI_Type_create_f90_XXX functions. + */ + OBJ_CONSTRUCT( &ompi_mpi_f90_integer_hashtable, opal_hash_table_t); + opal_hash_table_init(&ompi_mpi_f90_integer_hashtable, 16 /* why not? */); + + OBJ_CONSTRUCT( &ompi_mpi_f90_real_hashtable, opal_hash_table_t); + opal_hash_table_init(&ompi_mpi_f90_real_hashtable, FLT_MAX_10_EXP); + + OBJ_CONSTRUCT( &ompi_mpi_f90_complex_hashtable, opal_hash_table_t); + opal_hash_table_init(&ompi_mpi_f90_complex_hashtable, FLT_MAX_10_EXP); + + return OMPI_SUCCESS; +} + +int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance) +{ + ompi_instance_t *new_instance; + int ret; + + *instance = &ompi_mpi_instance_null.instance; + + /* If thread support was enabled, then setup OPAL to allow for them by deault. This must be done + * early to prevent a race condition that can occur with orte_init(). */ + if (ts_level == MPI_THREAD_MULTIPLE) { + opal_set_using_threads(true); + } + + opal_mutex_lock (&instance_lock); + if (0 == opal_atomic_fetch_add_32 (&ompi_instance_count, 1)) { + ret = ompi_mpi_instance_init_common (); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + opal_mutex_unlock (&instance_lock); + return ret; + } + } + + new_instance = OBJ_NEW(ompi_instance_t); + if (OPAL_UNLIKELY(NULL == new_instance)) { + if (0 == opal_atomic_add_fetch_32 (&ompi_instance_count, -1)) { + ret = ompi_mpi_instance_finalize_common (); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + opal_mutex_unlock (&instance_lock); + } + } + opal_mutex_unlock (&instance_lock); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + new_instance->error_handler = errhandler; + OBJ_RETAIN(new_instance->error_handler); + + /* Copy info if there is one. */ + if (OPAL_UNLIKELY(NULL != info)) { + new_instance->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &new_instance->super.s_info); + } + } + + *instance = new_instance; + opal_mutex_unlock (&instance_lock); + + return OMPI_SUCCESS; +} + +static int ompi_mpi_instance_finalize_common (void) +{ + uint32_t key; + ompi_datatype_t *datatype; + int ret; + opal_pmix_lock_t mylock; + + /* As finalize is the last legal MPI call, we are allowed to force the release + * of the user buffer used for bsend, before going anywhere further. + */ + (void) mca_pml_base_bsend_detach (NULL, NULL); + + /* Shut down any bindings-specific issues: C++, F77, F90 */ + + /* Remove all memory associated by MPI_REGISTER_DATAREP (per + MPI-2:9.5.3, there is no way for an MPI application to + *un*register datareps, but we don't want the OMPI layer causing + memory leaks). */ + OPAL_LIST_DESTRUCT(&ompi_registered_datareps); + + /* Remove all F90 types from the hash tables */ + OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_integer_hashtable) + OBJ_RELEASE(datatype); + OBJ_DESTRUCT(&ompi_mpi_f90_integer_hashtable); + OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_real_hashtable) + OBJ_RELEASE(datatype); + OBJ_DESTRUCT(&ompi_mpi_f90_real_hashtable); + OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_complex_hashtable) + OBJ_RELEASE(datatype); + OBJ_DESTRUCT(&ompi_mpi_f90_complex_hashtable); + + /* If requested, print out a list of memory allocated by ALLOC_MEM + but not freed by FREE_MEM */ + if (0 != ompi_debug_show_mpi_alloc_mem_leaks) { + mca_mpool_base_tree_print (ompi_debug_show_mpi_alloc_mem_leaks); + } + + opal_finalize_cleanup_domain (&ompi_instance_common_domain); + + if (NULL != ompi_mpi_main_thread) { + OBJ_RELEASE(ompi_mpi_main_thread); + ompi_mpi_main_thread = NULL; + } + + if (0 != ompi_default_pmix_err_handler) { + OPAL_PMIX_CONSTRUCT_LOCK(&mylock); + PMIx_Deregister_event_handler(ompi_default_pmix_err_handler, evhandler_dereg_callbk, &mylock); + OPAL_PMIX_WAIT_THREAD(&mylock); + OPAL_PMIX_DESTRUCT_LOCK(&mylock); + ompi_default_pmix_err_handler = 0; + } + + if (0 != ompi_ulfm_pmix_err_handler) { + OPAL_PMIX_CONSTRUCT_LOCK(&mylock); + PMIx_Deregister_event_handler(ompi_ulfm_pmix_err_handler, evhandler_dereg_callbk, &mylock); + OPAL_PMIX_WAIT_THREAD(&mylock); + OPAL_PMIX_DESTRUCT_LOCK(&mylock); + ompi_ulfm_pmix_err_handler = 0; + } + + /* Leave the RTE */ + if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) { + return ret; + } + + ompi_rte_initialized = false; + + for (int i = 0 ; ompi_lazy_frameworks[i] ; ++i) { + if (0 < ompi_lazy_frameworks[i]->framework_refcnt) { + /* May have been "opened" multiple times. We want it closed now! */ + ompi_lazy_frameworks[i]->framework_refcnt = 1; + + ret = mca_base_framework_close (ompi_lazy_frameworks[i]); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; + } + } + } + + int last_framework = 0; + for (int i = 0 ; ompi_framework_dependencies[i] ; ++i) { + last_framework = i; + } + + for (int j = last_framework ; j >= 0; --j) { + ret = mca_base_framework_close (ompi_framework_dependencies[j]); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; + } + } + + ompi_proc_finalize(); + + OBJ_DESTRUCT(&ompi_mpi_instance_null); + + ompi_mpi_instance_release (); + + if (0 == opal_initialized) { + /* if there is no MPI_T_init_thread that has been MPI_T_finalize'd, + * then be gentle to the app and release all the memory now (instead + * of the opal library destructor */ + opal_class_finalize (); + } + + return OMPI_SUCCESS; +} + +int ompi_mpi_instance_finalize (ompi_instance_t **instance) +{ + int ret = OMPI_SUCCESS; + + OBJ_RELEASE(*instance); + + opal_mutex_lock (&instance_lock); + if (0 == opal_atomic_add_fetch_32 (&ompi_instance_count, -1)) { + ret = ompi_mpi_instance_finalize_common (); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + opal_mutex_unlock (&instance_lock); + } + } + opal_mutex_unlock (&instance_lock); + + *instance = &ompi_mpi_instance_null.instance; + + return ret; +} + +static void ompi_instance_get_num_psets_complete (pmix_status_t status, + pmix_info_t *info, + size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + size_t n; + pmix_status_t rc; + size_t sz; + size_t num_pmix_psets = 0; + char *pset_names = NULL; + + opal_pmix_lock_t *lock = (opal_pmix_lock_t *) cbdata; + + for (n=0; n < ninfo; n++) { + if (0 == strcmp(info[n].key,PMIX_QUERY_NUM_PSETS)) { + PMIX_VALUE_UNLOAD(rc, + &info[n].value, + (void **)&num_pmix_psets, + &sz); + if (num_pmix_psets != ompi_mpi_instance_num_pmix_psets) { + opal_argv_free (ompi_mpi_instance_pmix_psets); + ompi_mpi_instance_pmix_psets = NULL; + } + ompi_mpi_instance_num_pmix_psets = num_pmix_psets; + } else if (0 == strcmp (info[n].key, PMIX_QUERY_PSET_NAMES)) { + if (ompi_mpi_instance_pmix_psets) { + opal_argv_free (ompi_mpi_instance_pmix_psets); + } + PMIX_VALUE_UNLOAD(rc, + &info[n].value, + (void **)&pset_names, + &sz); + ompi_mpi_instance_pmix_psets = opal_argv_split (pset_names, ','); + ompi_mpi_instance_num_pmix_psets = opal_argv_count (ompi_mpi_instance_pmix_psets); + free(pset_names); + } + } + + if (NULL != release_fn) { + release_fn(release_cbdata); + } + OPAL_PMIX_WAKEUP_THREAD(lock); +} + +static void ompi_instance_refresh_pmix_psets (const char *key) +{ + pmix_status_t rc; + pmix_query_t query; + opal_pmix_lock_t lock; + bool refresh = true; + + opal_mutex_lock (&instance_lock); + + PMIX_QUERY_CONSTRUCT(&query); + PMIX_ARGV_APPEND(rc, query.keys, key); + PMIX_INFO_CREATE(query.qualifiers, 1); + query.nqual = 1; + PMIX_INFO_LOAD(&query.qualifiers[0], PMIX_QUERY_REFRESH_CACHE, &refresh, PMIX_BOOL); + + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + + /* + * TODO: need to handle this better + */ + if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(&query, 1, + ompi_instance_get_num_psets_complete, + (void*)&lock))) { + opal_mutex_unlock (&instance_lock); + } + + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); + + opal_mutex_unlock (&instance_lock); +} + + +int ompi_instance_get_num_psets (ompi_instance_t *instance, int *npset_names) +{ + ompi_instance_refresh_pmix_psets (PMIX_QUERY_NUM_PSETS); + *npset_names = ompi_instance_builtin_count + ompi_mpi_instance_num_pmix_psets; + + return OMPI_SUCCESS; +} + +int ompi_instance_get_nth_pset (ompi_instance_t *instance, int n, int *len, char *pset_name) +{ + if (NULL == ompi_mpi_instance_pmix_psets && n >= ompi_instance_builtin_count) { + ompi_instance_refresh_pmix_psets (PMIX_QUERY_PSET_NAMES); + } + + if ((size_t) n >= (ompi_instance_builtin_count + ompi_mpi_instance_num_pmix_psets) || n < 0) { + return OMPI_ERR_BAD_PARAM; + } + + if (0 == *len) { + if (n < ompi_instance_builtin_count) { + *len = strlen(ompi_instance_builtin_psets[n]) + 1; + } else { + *len = strlen (ompi_mpi_instance_pmix_psets[n - ompi_instance_builtin_count]) + 1; + } + return OMPI_SUCCESS; + } + + if (n < ompi_instance_builtin_count) { + strncpy (pset_name, ompi_instance_builtin_psets[n], *len); + } else { + strncpy (pset_name, ompi_mpi_instance_pmix_psets[n - ompi_instance_builtin_count], *len); + } + + return OMPI_SUCCESS; +} + +static int ompi_instance_group_world (ompi_instance_t *instance, ompi_group_t **group_out) +{ + ompi_group_t *group; + size_t size; + + size = ompi_process_info.num_procs; + + group = ompi_group_allocate (size); + if (OPAL_UNLIKELY(NULL == group)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (size_t i = 0 ; i < size ; ++i) { + opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid}; + /* look for existing ompi_proc_t that matches this name */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name); + if (NULL == group->grp_proc_pointers[i]) { + /* set sentinel value */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name); + } else { + OBJ_RETAIN (group->grp_proc_pointers[i]); + } + } + + ompi_set_group_rank (group, ompi_proc_local()); + + group->grp_instance = instance; + + *group_out = group; + return OMPI_SUCCESS; +} + +static int ompi_instance_group_shared (ompi_instance_t *instance, ompi_group_t **group_out) +{ + ompi_group_t *group; + opal_process_name_t wildcard_rank; + int ret; + size_t size; + char **peers; + char *val; + + /* Find out which processes are local */ + wildcard_rank.jobid = OMPI_PROC_MY_NAME->jobid; + wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid; + + OPAL_MODEX_RECV_VALUE(ret, PMIX_LOCAL_PEERS, &wildcard_rank, &val, PMIX_STRING); + if (OPAL_SUCCESS != ret || NULL == val) { + return OMPI_ERROR; + } + + peers = opal_argv_split(val, ','); + free (val); + if (OPAL_UNLIKELY(NULL == peers)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + size = opal_argv_count (peers); + + group = ompi_group_allocate (size); + if (OPAL_UNLIKELY(NULL == group)) { + opal_argv_free (peers); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (size_t i = 0 ; NULL != peers[i] ; ++i) { + opal_process_name_t name = {.vpid = strtoul(peers[i], NULL, 10), .jobid = OMPI_PROC_MY_NAME->jobid}; + /* look for existing ompi_proc_t that matches this name */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name); + if (NULL == group->grp_proc_pointers[i]) { + /* set sentinel value */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name); + } else { + OBJ_RETAIN (group->grp_proc_pointers[i]); + } + } + + opal_argv_free (peers); + + /* group is dense */ + ompi_set_group_rank (group, ompi_proc_local()); + + group->grp_instance = instance; + + *group_out = group; + return OMPI_SUCCESS; +} + +static int ompi_instance_group_self (ompi_instance_t *instance, ompi_group_t **group_out) +{ + ompi_group_t *group; + size_t size; + + group = OBJ_NEW(ompi_group_t); + if (OPAL_UNLIKELY(NULL == group)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + group->grp_proc_pointers = ompi_proc_self(&size); + group->grp_my_rank = 0; + group->grp_proc_count = size; + + /* group is dense */ + OMPI_GROUP_SET_DENSE (group); + + group->grp_instance = instance; + + *group_out = group; + return OMPI_SUCCESS; +} + +static int ompi_instance_group_pmix_pset (ompi_instance_t *instance, const char *pset_name, ompi_group_t **group_out) +{ + pmix_status_t rc; + pmix_proc_t p; + ompi_group_t *group; + pmix_value_t *pval = NULL; + char *stmp = NULL; + size_t size = 0; + + /* make the group large enough to hold world */ + group = ompi_group_allocate (ompi_process_info.num_procs); + if (OPAL_UNLIKELY(NULL == group)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + + for (size_t i = 0 ; i < ompi_process_info.num_procs ; ++i) { + opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid}; + + OPAL_PMIX_CONVERT_NAME(&p, &name); + rc = PMIx_Get(&p, PMIX_PSET_NAME, NULL, 0, &pval); + if (OPAL_UNLIKELY(PMIX_SUCCESS != rc)) { + OBJ_RELEASE(group); + return opal_pmix_convert_status(rc); + } + + PMIX_VALUE_UNLOAD(rc, + pval, + (void **)&stmp, + &size); + if (0 != strcmp (pset_name, stmp)) { + PMIX_VALUE_RELEASE(pval); + free(stmp); + continue; + } + PMIX_VALUE_RELEASE(pval); + free(stmp); + + /* look for existing ompi_proc_t that matches this name */ + group->grp_proc_pointers[size] = (ompi_proc_t *) ompi_proc_lookup (name); + if (NULL == group->grp_proc_pointers[size]) { + /* set sentinel value */ + group->grp_proc_pointers[size] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name); + } else { + OBJ_RETAIN (group->grp_proc_pointers[size]); + } + ++size; + } + + /* shrink the proc array if needed */ + if (size < (size_t) group->grp_proc_count) { + void *tmp = realloc (group->grp_proc_pointers, size * sizeof (group->grp_proc_pointers[0])); + if (OPAL_UNLIKELY(NULL == tmp)) { + OBJ_RELEASE(group); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + group->grp_proc_pointers = (ompi_proc_t **) tmp; + group->grp_proc_count = (int) size; + } + + ompi_set_group_rank (group, ompi_proc_local()); + + group->grp_instance = instance; + + *group_out = group; + return OMPI_SUCCESS; +} + +static int ompi_instance_get_pmix_pset_size (ompi_instance_t *instance, const char *pset_name, size_t *size_out) +{ + pmix_status_t rc; + pmix_proc_t p; + pmix_value_t *pval = NULL; + size_t size = 0; + char *stmp = NULL; + + for (size_t i = 0 ; i < ompi_process_info.num_procs ; ++i) { + opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid}; + + OPAL_PMIX_CONVERT_NAME(&p, &name); + rc = PMIx_Get(&p, PMIX_PSET_NAME, NULL, 0, &pval); + if (OPAL_UNLIKELY(PMIX_SUCCESS != rc)) { + return rc; + } + + PMIX_VALUE_UNLOAD(rc, + pval, + (void **)&stmp, + &size); + + size += (0 == strcmp (pset_name, stmp)); + PMIX_VALUE_RELEASE(pval); + free(stmp); + + ++size; + } + + *size_out = size; + + return OMPI_SUCCESS; +} + +int ompi_group_from_pset (ompi_instance_t *instance, const char *pset_name, ompi_group_t **group_out) +{ + if (group_out == MPI_GROUP_NULL) { + return OMPI_ERR_BAD_PARAM; + } + + if (0 == strncmp (pset_name, "mpi://", 6)) { + pset_name += 6; + if (0 == strcasecmp (pset_name, "WORLD")) { + return ompi_instance_group_world (instance, group_out); + } + if (0 == strcasecmp (pset_name, "SELF")) { + return ompi_instance_group_self (instance, group_out); + } + } + + if (0 == strncmp (pset_name, "mpix://", 7)) { + pset_name += 7; + if (0 == strcasecmp (pset_name, "SHARED")) { + return ompi_instance_group_shared (instance, group_out); + } + } + + return ompi_instance_group_pmix_pset (instance, pset_name, group_out); +} + +int ompi_instance_get_pset_info (ompi_instance_t *instance, const char *pset_name, opal_info_t **info_used) +{ + ompi_info_t *info = ompi_info_allocate (); + char tmp[16]; + size_t size = 0UL; + int ret; + + *info_used = (opal_info_t *) MPI_INFO_NULL; + + if (OPAL_UNLIKELY(NULL == info)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (0 == strncmp (pset_name, "mpi://", 6)) { + pset_name += 6; + if (0 == strcmp (pset_name, "world")) { + size = ompi_process_info.num_procs; + } else if (0 == strcmp (pset_name, "self")) { + size = 1; + } else if (0 == strcmp (pset_name, "shared")) { + size = ompi_process_info.num_local_peers + 1; + } + } else { + ompi_instance_get_pmix_pset_size (instance, pset_name, &size); + } + + snprintf (tmp, 16, "%" PRIsize_t, size); + ret = opal_info_set (&info->super, MPI_INFO_KEY_SESSION_PSET_SIZE, tmp); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + ompi_info_free (&info); + return ret; + } + + *info_used = &info->super; + + return OMPI_SUCCESS; +} diff --git a/ompi/instance/instance.h b/ompi/instance/instance.h new file mode 100644 index 00000000000..13945a92362 --- /dev/null +++ b/ompi/instance/instance.h @@ -0,0 +1,157 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OMPI_INSTANCE_H) +#define OMPI_INSTANCE_H + +#include "opal/class/opal_object.h" +#include "opal/class/opal_hash_table.h" +#include "opal/util/info_subscriber.h" +#include "ompi/errhandler/errhandler.h" +#include "opal/mca/threads/mutex.h" +#include "ompi/communicator/comm_request.h" + +#include "mpi.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/info/info.h" +#include "ompi/proc/proc.h" + +struct ompi_group_t; + +struct ompi_instance_t { + opal_infosubscriber_t super; + int i_thread_level; + char i_name[MPI_MAX_OBJECT_NAME]; + uint32_t i_flags; + + /* Attributes */ + opal_hash_table_t *i_keyhash; + + /* index in Fortran <-> C translation array (for when I get around + * to implementing fortran support-- UGH) */ + int i_f_to_c_index; + + ompi_errhandler_t *error_handler; + ompi_errhandler_type_t errhandler_type; +}; + +typedef struct ompi_instance_t ompi_instance_t; + +OBJ_CLASS_DECLARATION(ompi_instance_t); + +/* Define for the preallocated size of the predefined handle. + * Note that we are using a pointer type as the base memory chunk + * size so when the bitness changes the size of the handle changes. + * This is done so we don't end up needing a structure that is + * incredibly larger than necessary because of the bitness. + * + * This padding mechanism works as a (likely) compile time check for when the + * size of the ompi_communicator_t exceeds the predetermined size of the + * ompi_predefined_communicator_t. It also allows us to change the size of + * the ompi_communicator_t without impacting the size of the + * ompi_predefined_communicator_t structure for some number of additions. + * + * Note: we used to define the PAD as a multiple of sizeof(void*). + * However, this makes a different size PAD, depending on + * sizeof(void*). In some cases + * (https://github.com/open-mpi/ompi/issues/3610), 32 bit builds can + * run out of space when 64 bit builds are still ok. So we changed to + * use just a naked byte size. As a rule of thumb, however, the size + * should probably still be a multiple of 8 so that it has the + * possibility of being nicely aligned. + * + * As an example: + * If the size of ompi_communicator_t is less than the size of the _PAD then + * the _PAD ensures that the size of the ompi_predefined_communicator_t is + * whatever size is defined below in the _PAD macro. + * However, if the size of the ompi_communicator_t grows larger than the _PAD + * (say by adding a few more function pointers to the structure) then the + * 'padding' variable will be initialized to a large number often triggering + * a 'array is too large' compile time error. This signals two things: + * 1) That the _PAD should be increased. + * 2) That users need to be made aware of the size change for the + * ompi_predefined_communicator_t structure. + * + * Q: So you just made a change to communicator structure, do you need to adjust + * the PREDEFINED_COMMUNICATOR_PAD macro? + * A: Most likely not, but it would be good to check. + */ +#define PREDEFINED_INSTANCE_PAD 512 + +struct ompi_predefined_instance_t { + ompi_instance_t instance; + char padding[PREDEFINED_INSTANCE_PAD - sizeof(ompi_instance_t)]; +}; +typedef struct ompi_predefined_instance_t ompi_predefined_instance_t; + +/** + * @brief NULL instance + */ +OMPI_DECLSPEC extern ompi_predefined_instance_t ompi_mpi_instance_null; + +OMPI_DECLSPEC extern opal_pointer_array_t ompi_instance_f_to_c_table; + +extern ompi_instance_t *ompi_mpi_instance_default; + +/** + * @brief Bring up the bare minimum infrastructure to support pre-session_init functions. + * + * List of subsystems initialized: + * - OPAL (including class system) + * - Error handlers + * - MPI Info + */ +int ompi_mpi_instance_retain (void); + +/** + * @brief Release (and possibly teardown) pre-session_init infrastructure. + */ +void ompi_mpi_instance_release (void); + +/** + * @brief Create a new MPI instance + * + * @param[in] ts_level thread support level (see mpi.h) + * @param[in] info info object + * @param[in] errhander errhandler to set on the instance + */ +OMPI_DECLSPEC int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance); + +/** + * @brief Destroy an MPI instance and set it to MPI_SESSION_NULL + */ +OMPI_DECLSPEC int ompi_mpi_instance_finalize (ompi_instance_t **instance); + + +/** + * @brief Add a function to the finalize chain. Note this function will be called + * when the last instance has been destroyed. + */ +#define ompi_mpi_instance_append_finalize opal_finalize_register_cleanup + +/** + * @brief Get an MPI group object for a named process set. + * + * @param[in] instance MPI instance (session) + * @param[in] pset_name Name of process set (includes mpi://world, mpi://self) + * @param[out group_out New MPI group + */ +OMPI_DECLSPEC int ompi_group_from_pset (ompi_instance_t *instance, const char *pset_name, struct ompi_group_t **group_out); + +OMPI_DECLSPEC int ompi_instance_get_num_psets (ompi_instance_t *instance, int *npset_names); +OMPI_DECLSPEC int ompi_instance_get_nth_pset (ompi_instance_t *instance, int n, int *len, char *pset_name); +OMPI_DECLSPEC int ompi_instance_get_pset_info (ompi_instance_t *instance, const char *pset_name, opal_info_t **info_used); + +/** + * @brief current number of active instances + */ +extern opal_atomic_int32_t ompi_instance_count; + +#endif /* !defined(OMPI_INSTANCE_H) */ diff --git a/ompi/mca/bml/base/base.h b/ompi/mca/bml/base/base.h index b7a226ac6ec..723f905cc8c 100644 --- a/ompi/mca/bml/base/base.h +++ b/ompi/mca/bml/base/base.h @@ -61,6 +61,7 @@ OMPI_DECLSPEC extern mca_bml_base_component_t mca_bml_component; OMPI_DECLSPEC extern mca_bml_base_module_t mca_bml; OMPI_DECLSPEC extern mca_base_framework_t ompi_bml_base_framework; OMPI_DECLSPEC extern opal_mutex_t mca_bml_lock; +OMPI_DECLSPEC extern bool mca_bml_component_init_called; static inline struct mca_bml_base_endpoint_t *mca_bml_base_get_endpoint (struct ompi_proc_t *proc) { if (OPAL_UNLIKELY(NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML])) { diff --git a/ompi/mca/bml/base/bml_base_frame.c b/ompi/mca/bml/base/bml_base_frame.c index b5a63dd9a2e..a76a891e49d 100644 --- a/ompi/mca/bml/base/bml_base_frame.c +++ b/ompi/mca/bml/base/bml_base_frame.c @@ -127,5 +127,11 @@ static int mca_bml_base_close( void ) return ret; } - return mca_base_framework_close(&opal_btl_base_framework); + ret = mca_base_framework_close(&opal_btl_base_framework); + if (OMPI_SUCCESS != ret) { + return ret; + } + + mca_bml_component_init_called = false; + return OMPI_SUCCESS; } diff --git a/ompi/mca/bml/base/bml_base_init.c b/ompi/mca/bml/base/bml_base_init.c index 9a2efec8ccc..6d1060d4690 100644 --- a/ompi/mca/bml/base/bml_base_init.c +++ b/ompi/mca/bml/base/bml_base_init.c @@ -42,12 +42,12 @@ mca_bml_base_module_t mca_bml = { }; mca_bml_base_component_t mca_bml_component = {{0}}; -static bool init_called = false; +bool mca_bml_component_init_called = false; bool mca_bml_base_inited(void) { - return init_called; + return mca_bml_component_init_called; } int mca_bml_base_init( bool enable_progress_threads, @@ -57,11 +57,11 @@ int mca_bml_base_init( bool enable_progress_threads, int priority = 0, best_priority = -1; mca_base_component_list_item_t *cli = NULL; - if (init_called) { + if (true == mca_bml_component_init_called) { return OPAL_SUCCESS; } - init_called = true; + mca_bml_component_init_called = true; OPAL_LIST_FOREACH(cli, &ompi_bml_base_framework.framework_components, mca_base_component_list_item_t) { component = (mca_bml_base_component_t*) cli->cli_component; diff --git a/ompi/mca/coll/adapt/coll_adapt_module.c b/ompi/mca/coll/adapt/coll_adapt_module.c index 8b5fda9bf60..554944e002c 100644 --- a/ompi/mca/coll/adapt/coll_adapt_module.c +++ b/ompi/mca/coll/adapt/coll_adapt_module.c @@ -2,6 +2,9 @@ * Copyright (c) 2014-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,8 +94,8 @@ OBJ_CLASS_INSTANCE(mca_coll_adapt_module_t, adapt_module->previous_ ## __api ## _module = comm->c_coll->coll_ ## __api ## _module; \ if (!comm->c_coll->coll_ ## __api || !comm->c_coll->coll_ ## __api ## _module) { \ opal_output_verbose(1, ompi_coll_base_framework.framework_output, \ - "(%d/%s): no underlying " # __api"; disqualifying myself", \ - comm->c_contextid, comm->c_name); \ + "(%s/%s): no underlying " # __api"; disqualifying myself", \ + ompi_comm_print_cid(comm), comm->c_name); \ return OMPI_ERROR; \ } \ OBJ_RETAIN(adapt_module->previous_ ## __api ## _module); \ @@ -137,9 +140,9 @@ mca_coll_base_module_t *ompi_coll_adapt_comm_query(struct ompi_communicator_t * /* If we're intercomm, or if there's only one process in the communicator */ if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm)) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:adapt:comm_query (%d/%s): intercomm, " + "coll:adapt:comm_query (%s/%s): intercomm, " "comm is too small; disqualifying myself", - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); return NULL; } @@ -148,9 +151,9 @@ mca_coll_base_module_t *ompi_coll_adapt_comm_query(struct ompi_communicator_t * *priority = mca_coll_adapt_component.adapt_priority; if (mca_coll_adapt_component.adapt_priority < 0) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:adapt:comm_query (%d/%s): priority too low; " + "coll:adapt:comm_query (%s/%s): priority too low; " "disqualifying myself", - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); return NULL; } @@ -181,8 +184,8 @@ mca_coll_base_module_t *ompi_coll_adapt_comm_query(struct ompi_communicator_t * adapt_module->super.coll_iallreduce = NULL; opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:adapt:comm_query (%d/%s): pick me! pick me!", - comm->c_contextid, comm->c_name); + "coll:adapt:comm_query (%s/%s): pick me! pick me!", + ompi_comm_print_cid(comm), comm->c_name); return &(adapt_module->super); } diff --git a/ompi/mca/coll/base/coll_base_allgather.c b/ompi/mca/coll/base/coll_base_allgather.c index a6d0794b6aa..eed2857bdee 100644 --- a/ompi/mca/coll/base/coll_base_allgather.c +++ b/ompi/mca/coll/base/coll_base_allgather.c @@ -23,6 +23,8 @@ #include "ompi_config.h" +#include "math.h" + #include "mpi.h" #include "opal/util/bit_ops.h" #include "ompi/constants.h" @@ -338,7 +340,145 @@ ompi_coll_base_allgather_intra_recursivedoubling(const void *sbuf, int scount, return err; } +/* + * ompi_coll_base_allgather_intra_sparbit + * + * Function: allgather using O(log(N)) steps. + * Accepts: Same arguments as MPI_Allgather + * Returns: MPI_SUCCESS or error code + * + * Description: Proposal of an allgather algorithm similar to Bruck but with inverted distances + * and non-decreasing exchanged data sizes. Described in "Sparbit: a new + * logarithmic-cost and data locality-aware MPI Allgather algorithm". + * + * Memory requirements: + * Additional memory for N requests. + * + * Example on 6 nodes, with l representing the highest power of two smaller than N, in this case l = + * 4 (more details can be found on the paper): + * Initial state + * # 0 1 2 3 4 5 + * [0] [ ] [ ] [ ] [ ] [ ] + * [ ] [1] [ ] [ ] [ ] [ ] + * [ ] [ ] [2] [ ] [ ] [ ] + * [ ] [ ] [ ] [3] [ ] [ ] + * [ ] [ ] [ ] [ ] [4] [ ] + * [ ] [ ] [ ] [ ] [ ] [5] + * Step 0: Each process sends its own block to process r + l and receives another from r - l. + * # 0 1 2 3 4 5 + * [0] [ ] [ ] [ ] [0] [ ] + * [ ] [1] [ ] [ ] [ ] [1] + * [2] [ ] [2] [ ] [ ] [ ] + * [ ] [3] [ ] [3] [ ] [ ] + * [ ] [ ] [4] [ ] [4] [ ] + * [ ] [ ] [ ] [5] [ ] [5] + * Step 1: Each process sends its own block to process r + l/2 and receives another from r - l/2. + * The block received on the previous step is ignored to avoid a future double-write. + * # 0 1 2 3 4 5 + * [0] [ ] [0] [ ] [0] [ ] + * [ ] [1] [ ] [1] [ ] [1] + * [2] [ ] [2] [ ] [2] [ ] + * [ ] [3] [ ] [3] [ ] [3] + * [4] [ ] [4] [ ] [4] [ ] + * [ ] [5] [ ] [5] [ ] [5] + * Step 1: Each process sends all the data it has (3 blocks) to process r + l/4 and similarly + * receives all the data from process r - l/4. + * # 0 1 2 3 4 5 + * [0] [0] [0] [0] [0] [0] + * [1] [1] [1] [1] [1] [1] + * [2] [2] [2] [2] [2] [2] + * [3] [3] [3] [3] [3] [3] + * [4] [4] [4] [4] [4] [4] + * [5] [5] [5] [5] [5] [5] + */ +int ompi_coll_base_allgather_intra_sparbit(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + /* ################# VARIABLE DECLARATION, BUFFER CREATION AND PREPARATION FOR THE ALGORITHM ######################## */ + + /* list of variable declaration */ + int rank = 0, comm_size = 0, comm_log = 0, exclusion = 0, data_expected = 1, transfer_count = 0; + int sendto, recvfrom, send_disp, recv_disp; + uint32_t last_ignore, ignore_steps, distance = 1; + + int err = 0; + int line = -1; + + ptrdiff_t rlb, rext; + + char *tmpsend = NULL, *tmprecv = NULL; + + MPI_Request *requests = NULL; + + /* algorithm choice information printing */ + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:base:allgather_intra_sparbit rank %d", rank)); + + comm_size = ompi_comm_size(comm); + rank = ompi_comm_rank(comm); + + err = ompi_datatype_get_extent(rdtype, &rlb, &rext); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + + /* if the MPI_IN_PLACE condition is not set, copy the send buffer to the receive buffer to perform the sends (all the data is extracted and forwarded from the recv buffer)*/ + /* tmprecv and tmpsend are used as abstract pointers to simplify send and receive buffer choice */ + tmprecv = (char *) rbuf; + if(MPI_IN_PLACE != sbuf){ + tmpsend = (char *) sbuf; + err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv + (ptrdiff_t) rank * rcount * rext, rcount, rdtype); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + } + tmpsend = tmprecv; + + requests = (MPI_Request *) malloc(comm_size * sizeof(MPI_Request)); + + /* ################# ALGORITHM LOGIC ######################## */ + + /* calculate log2 of the total process count */ + comm_log = ceil(log(comm_size)/log(2)); + distance <<= comm_log - 1; + + last_ignore = __builtin_ctz(comm_size); + ignore_steps = (~((uint32_t) comm_size >> last_ignore) | 1) << last_ignore; + + /* perform the parallel binomial tree distribution steps */ + for (int i = 0; i < comm_log; ++i) { + sendto = (rank + distance) % comm_size; + recvfrom = (rank - distance + comm_size) % comm_size; + exclusion = (distance & ignore_steps) == distance; + + for (transfer_count = 0; transfer_count < data_expected - exclusion; transfer_count++) { + send_disp = (rank - 2 * transfer_count * distance + comm_size) % comm_size; + recv_disp = (rank - (2 * transfer_count + 1) * distance + comm_size) % comm_size; + + /* Since each process sends several non-contiguos blocks of data, each block sent (and therefore each send and recv call) needs a different tag. */ + /* As base OpenMPI only provides one tag for allgather, we are forced to use a tag space from other components in the send and recv calls */ + MCA_PML_CALL(isend(tmpsend + (ptrdiff_t) send_disp * scount * rext, scount, rdtype, sendto, MCA_COLL_BASE_TAG_HCOLL_BASE - send_disp, MCA_PML_BASE_SEND_STANDARD, comm, requests + transfer_count)); + MCA_PML_CALL(irecv(tmprecv + (ptrdiff_t) recv_disp * rcount * rext, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_HCOLL_BASE - recv_disp, comm, requests + data_expected - exclusion + transfer_count)); + } + ompi_request_wait_all(transfer_count * 2, requests, MPI_STATUSES_IGNORE); + + distance >>= 1; + /* calculates the data expected for the next step, based on the current number of blocks and eventual exclusions */ + data_expected = (data_expected << 1) - exclusion; + exclusion = 0; + } + + free(requests); + + return OMPI_SUCCESS; + +err_hndl: + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", + __FILE__, line, err, rank)); + (void)line; // silence compiler warning + return err; +} /* * ompi_coll_base_allgather_intra_ring diff --git a/ompi/mca/coll/base/coll_base_allgatherv.c b/ompi/mca/coll/base/coll_base_allgatherv.c index 3f09d91d6cc..faa6fde0245 100644 --- a/ompi/mca/coll/base/coll_base_allgatherv.c +++ b/ompi/mca/coll/base/coll_base_allgatherv.c @@ -25,6 +25,8 @@ #include "ompi_config.h" +#include "math.h" + #include "mpi.h" #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" @@ -202,6 +204,154 @@ int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, int scount, return err; } +/* + * ompi_coll_base_allgather_intra_sparbit + * + * Function: allgather using O(log(N)) steps. + * Accepts: Same arguments as MPI_Allgather + * Returns: MPI_SUCCESS or error code + * + * Description: Proposal of an allgather algorithm similar to Bruck but with inverted distances + * and non-decreasing exchanged data sizes. Described in "Sparbit: a new + * logarithmic-cost and data locality-aware MPI Allgather algorithm". + * + * Memory requirements: + * Additional memory for N requests. + * + * Example on 6 nodes, with l representing the highest power of two smaller than N, in this case l = + * 4 (more details can be found on the paper): + * Initial state + * # 0 1 2 3 4 5 + * [0] [ ] [ ] [ ] [ ] [ ] + * [ ] [1] [ ] [ ] [ ] [ ] + * [ ] [ ] [2] [ ] [ ] [ ] + * [ ] [ ] [ ] [3] [ ] [ ] + * [ ] [ ] [ ] [ ] [4] [ ] + * [ ] [ ] [ ] [ ] [ ] [5] + * Step 0: Each process sends its own block to process r + l and receives another from r - l. + * # 0 1 2 3 4 5 + * [0] [ ] [ ] [ ] [0] [ ] + * [ ] [1] [ ] [ ] [ ] [1] + * [2] [ ] [2] [ ] [ ] [ ] + * [ ] [3] [ ] [3] [ ] [ ] + * [ ] [ ] [4] [ ] [4] [ ] + * [ ] [ ] [ ] [5] [ ] [5] + * Step 1: Each process sends its own block to process r + l/2 and receives another from r - l/2. + * The block received on the previous step is ignored to avoid a future double-write. + * # 0 1 2 3 4 5 + * [0] [ ] [0] [ ] [0] [ ] + * [ ] [1] [ ] [1] [ ] [1] + * [2] [ ] [2] [ ] [2] [ ] + * [ ] [3] [ ] [3] [ ] [3] + * [4] [ ] [4] [ ] [4] [ ] + * [ ] [5] [ ] [5] [ ] [5] + * Step 1: Each process sends all the data it has (3 blocks) to process r + l/4 and similarly + * receives all the data from process r - l/4. + * # 0 1 2 3 4 5 + * [0] [0] [0] [0] [0] [0] + * [1] [1] [1] [1] [1] [1] + * [2] [2] [2] [2] [2] [2] + * [3] [3] [3] [3] [3] [3] + * [4] [4] [4] [4] [4] [4] + * [5] [5] [5] [5] [5] [5] + */ + +int ompi_coll_base_allgatherv_intra_sparbit(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, const int *rcounts, + const int *rdispls, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + /* ################# VARIABLE DECLARATION, BUFFER CREATION AND PREPARATION FOR THE ALGORITHM ######################## */ + + /* list of variable declaration */ + int rank = 0, comm_size = 0, comm_log = 0, exclusion = 0; + int data_expected = 1, transfer_count = 0, step_requests = 0; + int sendto, recvfrom, send_disp, recv_disp; + uint32_t last_ignore, ignore_steps, distance = 1; + + int err = 0; + int line = -1; + + ptrdiff_t rlb, rext; + + char *tmpsend = NULL, *tmprecv = NULL; + + MPI_Request *requests = NULL; + + /* printf("utilizando o allgatherv novo!!\n"); */ + + /* algorithm choice information printing */ + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "coll:sparbit:allgather_sync_intra rank %d", rank)); + + comm_size = ompi_comm_size(comm); + rank = ompi_comm_rank(comm); + + err = ompi_datatype_get_extent(rdtype, &rlb, &rext); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + + /* if the MPI_IN_PLACE condition is not set, copy the send buffer to the receive buffer to perform the sends (all the data is extracted and forwarded from the recv buffer)*/ + /* tmprecv and tmpsend are used as abstract pointers to simplify send and receive buffer choice */ + tmprecv = (char *) rbuf; + if(MPI_IN_PLACE != sbuf){ + tmpsend = (char *) sbuf; + err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv + (ptrdiff_t) rdispls[rank] * rext, scount, rdtype); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + } + tmpsend = tmprecv; + + requests = (MPI_Request *) malloc(comm_size * sizeof(MPI_Request)); + + /* ################# ALGORITHM LOGIC ######################## */ + + /* calculate log2 of the total process count */ + comm_log = ceil(log(comm_size)/log(2)); + distance <<= comm_log - 1; + + last_ignore = __builtin_ctz(comm_size); + ignore_steps = (~((uint32_t) comm_size >> last_ignore) | 1) << last_ignore; + + /* perform the parallel binomial tree distribution steps */ + for (int i = 0; i < comm_log; ++i) { + sendto = (rank + distance) % comm_size; + recvfrom = (rank - distance + comm_size) % comm_size; + exclusion = (distance & ignore_steps) == distance; + + for (transfer_count = 0; transfer_count < data_expected - exclusion; transfer_count++) { + send_disp = (rank - 2 * transfer_count * distance + comm_size) % comm_size; + recv_disp = (rank - (2 * transfer_count + 1) * distance + comm_size) % comm_size; + + /* Since each process sends several non-contiguos blocks of data to the same destination, + * each block sent (and therefore each send and recv call) needs a different tag. */ + /* As base OpenMPI only provides one tag for allgather, we are forced to use a tag space + * from other components in the send and recv calls */ + if(rcounts[send_disp] > 0) + MCA_PML_CALL(isend(tmpsend + (ptrdiff_t) rdispls[send_disp] * rext, rcounts[send_disp], rdtype, sendto, MCA_COLL_BASE_TAG_HCOLL_BASE - send_disp, MCA_PML_BASE_SEND_STANDARD, comm, requests + step_requests++)); + if(rcounts[recv_disp] > 0) + MCA_PML_CALL(irecv(tmprecv + (ptrdiff_t) rdispls[recv_disp] * rext, rcounts[recv_disp], rdtype, recvfrom, MCA_COLL_BASE_TAG_HCOLL_BASE - recv_disp, comm, requests + step_requests++)); + } + ompi_request_wait_all(step_requests, requests, MPI_STATUSES_IGNORE); + + distance >>= 1; + /* calculates the data expected for the next step, based on the current number of blocks and eventual exclusions */ + data_expected = (data_expected << 1) - exclusion; + exclusion = step_requests = 0; + } + + free(requests); + + return OMPI_SUCCESS; + +err_hndl: + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", + __FILE__, line, err, rank)); + (void)line; // silence compiler warning + return err; + +} /* * ompi_coll_base_allgatherv_intra_ring diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index 9446b8a414d..b06ba3f238b 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -15,6 +15,8 @@ * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,6 +29,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_convertor_internal.h" #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_tags.h" @@ -35,88 +38,112 @@ #include "coll_base_topo.h" #include "coll_base_util.h" -/* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */ +/* + * We want to minimize the amount of temporary memory needed while allowing as many ranks + * to exchange data simultaneously. We use a variation of the ring algorithm, where in a + * single step a process echange the data with both neighbors at distance k (on the left + * and the right on a logical ring topology). With this approach we need to pack the data + * for a single of the two neighbors, as we can then use the original buffer (and datatype + * and count) to send the data to the other. + */ int mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i, j, size, rank, err = MPI_SUCCESS, line; - ptrdiff_t ext, gap = 0; + int i, size, rank, left, right, err = MPI_SUCCESS, line; + ptrdiff_t extent; ompi_request_t *req; - char *allocated_buffer = NULL, *tmp_buffer; - size_t max_size; + char *tmp_buffer; + size_t packed_size = 0, max_size; + opal_convertor_t convertor; /* Initialize. */ size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); - /* If only one process, we're done. */ - if (1 == size) { + ompi_datatype_type_size(rdtype, &max_size); + + /* Easy way out */ + if ((1 == size) || (0 == rcount) || (0 == max_size) ) { return MPI_SUCCESS; } - /* Find the largest receive amount */ - ompi_datatype_type_extent (rdtype, &ext); - max_size = opal_datatype_span(&rdtype->super, rcount, &gap); + /* Find the largest amount of packed send/recv data among all peers where + * we need to pack before the send. + */ +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + for (i = 1 ; i <= (size >> 1) ; ++i) { + right = (rank + i) % size; + ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, right); + + if( OPAL_UNLIKELY(opal_local_arch != ompi_proc->super.proc_convertor->master->remote_arch)) { + packed_size = opal_datatype_compute_remote_size(&rdtype->super, + ompi_proc->super.proc_convertor->master->remote_sizes); + max_size = packed_size > max_size ? packed_size : max_size; + } + } +#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ + max_size *= rcount; - /* Initiate all send/recv to/from others. */ + ompi_datatype_type_extent(rdtype, &extent); /* Allocate a temporary buffer */ - allocated_buffer = calloc (max_size, 1); - if( NULL == allocated_buffer) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto error_hndl; } - tmp_buffer = allocated_buffer - gap; - max_size = ext * rcount; - - /* in-place alltoall slow algorithm (but works) */ - for (i = 0 ; i < size ; ++i) { - for (j = i+1 ; j < size ; ++j) { - if (i == rank) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer, - (char *) rbuf + j * max_size); - if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } - - /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALL, comm, &req)); - if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } - - err = MCA_PML_CALL(send ((char *) tmp_buffer, rcount, rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm)); - if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } - } else if (j == rank) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer, - (char *) rbuf + i * max_size); - if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } - - /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALL, comm, &req)); - if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } - - err = MCA_PML_CALL(send ((char *) tmp_buffer, rcount, rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm)); - if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } - } else { - continue; - } - - /* Wait for the requests to complete */ - err = ompi_request_wait ( &req, MPI_STATUSES_IGNORE); - if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } + tmp_buffer = calloc (max_size, 1); + if( NULL == tmp_buffer) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto error_hndl; } + + for (i = 1 ; i <= (size >> 1) ; ++i) { + struct iovec iov = {.iov_base = tmp_buffer, .iov_len = max_size}; + uint32_t iov_count = 1; + + right = (rank + i) % size; + left = (rank + size - i) % size; + + ompi_proc_t *right_proc = ompi_comm_peer_lookup(comm, right); + opal_convertor_clone(right_proc->super.proc_convertor, &convertor, 0); + opal_convertor_prepare_for_send(&convertor, &rdtype->super, rcount, + (char *) rbuf + right * rcount * extent); + packed_size = max_size; + err = opal_convertor_pack(&convertor, &iov, &iov_count, &packed_size); + if (1 != err) { goto error_hndl; } + + /* Receive data from the right */ + err = MCA_PML_CALL(irecv ((char *) rbuf + right * rcount * extent, rcount, rdtype, + right, MCA_COLL_BASE_TAG_ALLTOALL, comm, &req)); + if (MPI_SUCCESS != err) { goto error_hndl; } + + if( left != right ) { + /* Send data to the left */ + err = MCA_PML_CALL(send ((char *) rbuf + left * rcount * extent, rcount, rdtype, + left, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, + comm)); + if (MPI_SUCCESS != err) { goto error_hndl; } + + err = ompi_request_wait (&req, MPI_STATUSES_IGNORE); + if (MPI_SUCCESS != err) { goto error_hndl; } + + /* Receive data from the left */ + err = MCA_PML_CALL(irecv ((char *) rbuf + left * rcount * extent, rcount, rdtype, + left, MCA_COLL_BASE_TAG_ALLTOALL, comm, &req)); + if (MPI_SUCCESS != err) { goto error_hndl; } } + + /* Send data to the right */ + err = MCA_PML_CALL(send ((char *) tmp_buffer, packed_size, MPI_PACKED, + right, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, + comm)); + if (MPI_SUCCESS != err) { goto error_hndl; } + + err = ompi_request_wait (&req, MPI_STATUSES_IGNORE); + if (MPI_SUCCESS != err) { goto error_hndl; } } error_hndl: /* Free the temporary buffer */ - if( NULL != allocated_buffer ) - free (allocated_buffer); + if( NULL != tmp_buffer ) + free (tmp_buffer); if( MPI_SUCCESS != err ) { OPAL_OUTPUT((ompi_coll_base_framework.framework_output, diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 5274de89a42..ddb82a1cf35 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -17,6 +17,8 @@ * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_convertor_internal.h" #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_tags.h" @@ -37,85 +40,124 @@ #include "coll_base_topo.h" #include "coll_base_util.h" +/* + * We want to minimize the amount of temporary memory needed while allowing as many ranks + * to exchange data simultaneously. We use a variation of the ring algorithm, where in a + * single step a process echange the data with both neighbors at distance k (on the left + * and the right on a logical ring topology). With this approach we need to pack the data + * for a single of the two neighbors, as we can then use the original buffer (and datatype + * and count) to send the data to the other. + */ int mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i, j, size, rank, err=MPI_SUCCESS; - char *allocated_buffer, *tmp_buffer; - size_t max_size; - ptrdiff_t ext, gap = 0; + int i, size, rank, left, right, err = MPI_SUCCESS, line; + ptrdiff_t extent; + ompi_request_t *req = MPI_REQUEST_NULL; + char *tmp_buffer; + size_t packed_size = 0, max_size; + opal_convertor_t convertor; /* Initialize. */ size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); - /* If only one process, we're done. */ - if (1 == size) { + ompi_datatype_type_size(rdtype, &max_size); + max_size *= rcounts[rank]; + + /* Easy way out */ + if ((1 == size) || (0 == max_size) ) { return MPI_SUCCESS; } - /* Find the largest receive amount */ - ompi_datatype_type_extent (rdtype, &ext); - for (i = 0, max_size = 0 ; i < size ; ++i) { - if (i == rank) { - continue; + + /* Find the largest amount of packed send/recv data among all peers where + * we need to pack before the send. + */ +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + for (i = 1 ; i <= (size >> 1) ; ++i) { + right = (rank + i) % size; + ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, right); + + if( OPAL_UNLIKELY(opal_local_arch != ompi_proc->super.proc_convertor->master->remote_arch)) { + packed_size = opal_datatype_compute_remote_size(&rdtype->super, + ompi_proc->super.proc_convertor->master->remote_sizes); + packed_size *= rcounts[right]; + max_size = packed_size > max_size ? packed_size : max_size; } - size_t cur_size = opal_datatype_span(&rdtype->super, rcounts[i], &gap); - max_size = cur_size > max_size ? cur_size : max_size; } - /* The gap will always be the same as we are working on the same datatype */ +#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ - if (OPAL_UNLIKELY(0 == max_size)) { - return MPI_SUCCESS; - } + ompi_datatype_type_extent(rdtype, &extent); /* Allocate a temporary buffer */ - allocated_buffer = calloc (max_size, 1); - if (NULL == allocated_buffer) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - tmp_buffer = allocated_buffer - gap; - - /* Initiate all send/recv to/from others. */ - /* in-place alltoallv slow algorithm (but works) */ - for (i = 0 ; i < size ; ++i) { - for (j = i+1 ; j < size ; ++j) { - if (i == rank && 0 != rcounts[j]) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], - tmp_buffer, (char *) rbuf + rdisps[j] * ext); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Exchange data with the peer */ - err = ompi_coll_base_sendrecv_actual((void *) tmp_buffer, rcounts[j], rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALLV, - (char *)rbuf + rdisps[j] * ext, rcounts[j], rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALLV, - comm, MPI_STATUS_IGNORE); - if (MPI_SUCCESS != err) { goto error_hndl; } - } else if (j == rank && 0 != rcounts[i]) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i], - tmp_buffer, (char *) rbuf + rdisps[i] * ext); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Exchange data with the peer */ - err = ompi_coll_base_sendrecv_actual((void *) tmp_buffer, rcounts[i], rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALLV, - (char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALLV, - comm, MPI_STATUS_IGNORE); - if (MPI_SUCCESS != err) { goto error_hndl; } - } + tmp_buffer = calloc (max_size, 1); + if( NULL == tmp_buffer) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto error_hndl; } + + for (i = 1 ; i <= (size >> 1) ; ++i) { + struct iovec iov = {.iov_base = tmp_buffer, .iov_len = max_size}; + uint32_t iov_count = 1; + + right = (rank + i) % size; + left = (rank + size - i) % size; + + if( 0 != rcounts[right] ) { /* nothing to exchange with the peer on the right */ + ompi_proc_t *right_proc = ompi_comm_peer_lookup(comm, right); + opal_convertor_clone(right_proc->super.proc_convertor, &convertor, 0); + opal_convertor_prepare_for_send(&convertor, &rdtype->super, rcounts[right], + (char *) rbuf + rdisps[right] * extent); + packed_size = max_size; + err = opal_convertor_pack(&convertor, &iov, &iov_count, &packed_size); + if (1 != err) { goto error_hndl; } + + /* Receive data from the right */ + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[right] * extent, rcounts[right], rdtype, + right, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &req)); + if (MPI_SUCCESS != err) { goto error_hndl; } + } + + if( (left != right) && (0 != rcounts[left]) ) { + /* Send data to the left */ + err = MCA_PML_CALL(send ((char *) rbuf + rdisps[left] * extent, rcounts[left], rdtype, + left, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, + comm)); + if (MPI_SUCCESS != err) { goto error_hndl; } + + err = ompi_request_wait (&req, MPI_STATUSES_IGNORE); + if (MPI_SUCCESS != err) { goto error_hndl; } + + /* Receive data from the left */ + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[left] * extent, rcounts[left], rdtype, + left, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &req)); + if (MPI_SUCCESS != err) { goto error_hndl; } + } + + if( 0 != rcounts[right] ) { /* nothing to exchange with the peer on the right */ + /* Send data to the right */ + err = MCA_PML_CALL(send ((char *) tmp_buffer, packed_size, MPI_PACKED, + right, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, + comm)); + if (MPI_SUCCESS != err) { goto error_hndl; } } + + err = ompi_request_wait (&req, MPI_STATUSES_IGNORE); + if (MPI_SUCCESS != err) { goto error_hndl; } } error_hndl: /* Free the temporary buffer */ - free (allocated_buffer); + if( NULL != tmp_buffer ) + free (tmp_buffer); + + if( MPI_SUCCESS != err ) { + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, + rank)); + (void)line; // silence compiler warning + } /* All done */ return err; diff --git a/ompi/mca/coll/base/coll_base_barrier.c b/ompi/mca/coll/base/coll_base_barrier.c index 94a24e6a572..516dcd10c83 100644 --- a/ompi/mca/coll/base/coll_base_barrier.c +++ b/ompi/mca/coll/base/coll_base_barrier.c @@ -125,7 +125,7 @@ int ompi_coll_base_barrier_intra_doublering(struct ompi_communicator_t *comm, OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_barrier_intra_doublering rank %d", rank)); - left = ((rank-1)%size); + left = ((size+rank-1)%size); right = ((rank+1)%size); if (rank > 0) { /* receive message from the left */ diff --git a/ompi/mca/coll/base/coll_base_comm_select.c b/ompi/mca/coll/base/coll_base_comm_select.c index fcdb8649eba..d5c3a0dbfe5 100644 --- a/ompi/mca/coll/base/coll_base_comm_select.c +++ b/ompi/mca/coll/base/coll_base_comm_select.c @@ -102,8 +102,8 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm) /* Announce */ opal_output_verbose(9, ompi_coll_base_framework.framework_output, - "coll:base:comm_select: new communicator: %s (cid %d)", - comm->c_name, comm->c_contextid); + "coll:base:comm_select: new communicator: %s (cid %s)", + comm->c_name, ompi_comm_print_cid (comm)); /* Initialize all the relevant pointers, since they're used as * sentinel values */ diff --git a/ompi/mca/coll/base/coll_base_functions.h b/ompi/mca/coll/base/coll_base_functions.h index acaf9dcdd38..6c3f8ebc812 100644 --- a/ompi/mca/coll/base/coll_base_functions.h +++ b/ompi/mca/coll/base/coll_base_functions.h @@ -189,6 +189,7 @@ BEGIN_C_DECLS /* All Gather */ int ompi_coll_base_allgather_intra_bruck(ALLGATHER_ARGS); int ompi_coll_base_allgather_intra_recursivedoubling(ALLGATHER_ARGS); +int ompi_coll_base_allgather_intra_sparbit(ALLGATHER_ARGS); int ompi_coll_base_allgather_intra_ring(ALLGATHER_ARGS); int ompi_coll_base_allgather_intra_neighborexchange(ALLGATHER_ARGS); int ompi_coll_base_allgather_intra_basic_linear(ALLGATHER_ARGS); @@ -196,6 +197,7 @@ int ompi_coll_base_allgather_intra_two_procs(ALLGATHER_ARGS); /* All GatherV */ int ompi_coll_base_allgatherv_intra_bruck(ALLGATHERV_ARGS); +int ompi_coll_base_allgatherv_intra_sparbit(ALLGATHERV_ARGS); int ompi_coll_base_allgatherv_intra_ring(ALLGATHERV_ARGS); int ompi_coll_base_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS); int ompi_coll_base_allgatherv_intra_basic_default(ALLGATHERV_ARGS); diff --git a/ompi/mca/coll/basic/coll_basic_alltoallw.c b/ompi/mca/coll/basic/coll_basic_alltoallw.c index 93fa880fc2d..0f8a2dae144 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoallw.c +++ b/ompi/mca/coll/basic/coll_basic_alltoallw.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -14,8 +14,8 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -31,100 +31,123 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_convertor_internal.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_tags.h" #include "ompi/mca/pml/pml.h" - +/* + * We want to minimize the amount of temporary memory needed while allowing as many ranks + * to exchange data simultaneously. We use a variation of the ring algorithm, where in a + * single step a process echange the data with both neighbors at distance k (on the left + * and the right on a logical ring topology). With this approach we need to pack the data + * for a single of the two neighbors, as we can then use the original buffer (and datatype + * and count) to send the data to the other. + */ static int mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i, j, size, rank, err = MPI_SUCCESS, max_size; - ompi_request_t *req; - char *tmp_buffer, *save_buffer = NULL; - ptrdiff_t ext, gap = 0; - - /* Initialize. */ + int i, size, rank, left, right, err = MPI_SUCCESS; + ompi_request_t *req = MPI_REQUEST_NULL; + char *tmp_buffer = NULL; + size_t max_size = 0, packed_size, msg_size_left, msg_size_right; + opal_convertor_t convertor; size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); - - /* If only one process, we're done. */ - if (1 == size) { + if (1 == size) { /* If only one process, we're done. */ return MPI_SUCCESS; } + rank = ompi_comm_rank(comm); - /* Find the largest receive amount */ - for (i = 0, max_size = 0 ; i < size ; ++i) { - ext = opal_datatype_span(&rdtypes[i]->super, rcounts[i], &gap); - - max_size = ext > max_size ? ext : max_size; + /* Find the largest amount of packed send/recv data among all peers where + * we need to pack before the send. + */ + for (i = 1 ; i <= (size >> 1) ; ++i) { + right = (rank + i) % size; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, right); + + if( OPAL_LIKELY(opal_local_arch == ompi_proc->super.proc_convertor->master->remote_arch)) { + opal_datatype_type_size(&rdtypes[right]->super, &packed_size); + } else { + packed_size = opal_datatype_compute_remote_size(&rdtypes[right]->super, + ompi_proc->super.proc_convertor->master->remote_sizes); + } +#else + opal_datatype_type_size(&rdtypes[right]->super, &packed_size); +#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ + packed_size *= rcounts[right]; + max_size = packed_size > max_size ? packed_size : max_size; } /* Allocate a temporary buffer */ - tmp_buffer = save_buffer = calloc (max_size, 1); + tmp_buffer = calloc (max_size, 1); if (NULL == tmp_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - tmp_buffer -= gap; - - /* in-place alltoallw slow algorithm (but works) */ - for (i = 0 ; i < size ; ++i) { - size_t msg_size_i; - ompi_datatype_type_size(rdtypes[i], &msg_size_i); - msg_size_i *= rcounts[i]; - for (j = i+1 ; j < size ; ++j) { - size_t msg_size_j; - ompi_datatype_type_size(rdtypes[j], &msg_size_j); - msg_size_j *= rcounts[j]; - - /* Initiate all send/recv to/from others. */ - if (i == rank && msg_size_j != 0) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtypes[j], rcounts[j], - tmp_buffer, (char *) rbuf + rdisps[j]); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtypes[j], - j, MCA_COLL_BASE_TAG_ALLTOALLW, comm, &req)); - if (MPI_SUCCESS != err) { goto error_hndl; } - - err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[j], rdtypes[j], - j, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD, - comm)); - if (MPI_SUCCESS != err) { goto error_hndl; } - } else if (j == rank && msg_size_i != 0) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtypes[i], rcounts[i], - tmp_buffer, (char *) rbuf + rdisps[i]); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i], - i, MCA_COLL_BASE_TAG_ALLTOALLW, comm, &req)); - if (MPI_SUCCESS != err) { goto error_hndl; } - - err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[i], rdtypes[i], - i, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD, - comm)); - if (MPI_SUCCESS != err) { goto error_hndl; } - } else { - continue; - } - - /* Wait for the requests to complete */ + + for (i = 1 ; i <= (size >> 1) ; ++i) { + struct iovec iov = {.iov_base = tmp_buffer, .iov_len = max_size}; + uint32_t iov_count = 1; + + right = (rank + i) % size; + left = (rank + size - i) % size; + + ompi_datatype_type_size(rdtypes[right], &msg_size_right); + msg_size_right *= rcounts[right]; + + ompi_datatype_type_size(rdtypes[left], &msg_size_left); + msg_size_left *= rcounts[left]; + + if( 0 != msg_size_right ) { /* nothing to exchange with the peer on the right */ + ompi_proc_t *right_proc = ompi_comm_peer_lookup(comm, right); + opal_convertor_clone(right_proc->super.proc_convertor, &convertor, 0); + opal_convertor_prepare_for_send(&convertor, &rdtypes[right]->super, rcounts[right], + (char *) rbuf + rdisps[right]); + packed_size = max_size; + err = opal_convertor_pack(&convertor, &iov, &iov_count, &packed_size); + if (1 != err) { goto error_hndl; } + + /* Receive data from the right */ + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[right], rcounts[right], rdtypes[right], + right, MCA_COLL_BASE_TAG_ALLTOALLW, comm, &req)); + if (MPI_SUCCESS != err) { goto error_hndl; } + } + + if( (left != right) && (0 != msg_size_left) ) { + /* Send data to the left */ + err = MCA_PML_CALL(send ((char *) rbuf + rdisps[left], rcounts[left], rdtypes[left], + left, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD, + comm)); + if (MPI_SUCCESS != err) { goto error_hndl; } + err = ompi_request_wait (&req, MPI_STATUSES_IGNORE); if (MPI_SUCCESS != err) { goto error_hndl; } + + /* Receive data from the left */ + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[left], rcounts[left], rdtypes[left], + left, MCA_COLL_BASE_TAG_ALLTOALLW, comm, &req)); + if (MPI_SUCCESS != err) { goto error_hndl; } } + + if( 0 != msg_size_right ) { /* nothing to exchange with the peer on the right */ + /* Send data to the right */ + err = MCA_PML_CALL(send ((char *) tmp_buffer, packed_size, MPI_PACKED, + right, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD, + comm)); + if (MPI_SUCCESS != err) { goto error_hndl; } + } + + err = ompi_request_wait (&req, MPI_STATUSES_IGNORE); + if (MPI_SUCCESS != err) { goto error_hndl; } } error_hndl: /* Free the temporary buffer */ - free (save_buffer); + free (tmp_buffer); /* All done */ diff --git a/ompi/mca/coll/basic/coll_basic_gatherv.c b/ompi/mca/coll/basic/coll_basic_gatherv.c index 6ea30c49afe..c45c4f28b3b 100644 --- a/ompi/mca/coll/basic/coll_basic_gatherv.c +++ b/ompi/mca/coll/basic/coll_basic_gatherv.c @@ -9,8 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -47,6 +47,7 @@ mca_coll_basic_gatherv_intra(const void *sbuf, int scount, int i, rank, size, err; char *ptmp; ptrdiff_t lb, extent; + size_t rdsize; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -58,7 +59,9 @@ mca_coll_basic_gatherv_intra(const void *sbuf, int scount, 0) */ if (rank != root) { - if (scount > 0) { + size_t sdsize; + ompi_datatype_type_size(sdtype, &sdsize); + if (scount > 0 && sdsize > 0) { return MCA_PML_CALL(send(sbuf, scount, sdtype, root, MCA_COLL_BASE_TAG_GATHERV, MCA_PML_BASE_SEND_STANDARD, comm)); @@ -68,6 +71,12 @@ mca_coll_basic_gatherv_intra(const void *sbuf, int scount, /* I am the root, loop receiving data. */ + ompi_datatype_type_size(rdtype, &rdsize); + if (OPAL_UNLIKELY(0 == rdsize)) { + /* bozzo case */ + return MPI_SUCCESS; + } + err = ompi_datatype_get_extent(rdtype, &lb, &extent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; diff --git a/ompi/mca/coll/basic/coll_basic_scatterv.c b/ompi/mca/coll/basic/coll_basic_scatterv.c index 16602158b2b..7138cd271d4 100644 --- a/ompi/mca/coll/basic/coll_basic_scatterv.c +++ b/ompi/mca/coll/basic/coll_basic_scatterv.c @@ -9,8 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -49,6 +49,7 @@ mca_coll_basic_scatterv_intra(const void *sbuf, const int *scounts, int i, rank, size, err; char *ptmp; ptrdiff_t lb, extent; + size_t sdsize; /* Initialize */ @@ -58,8 +59,10 @@ mca_coll_basic_scatterv_intra(const void *sbuf, const int *scounts, /* If not root, receive data. */ if (rank != root) { + size_t rdsize; + ompi_datatype_type_size(rdtype, &rdsize); /* Only receive if there is something to receive */ - if (rcount > 0) { + if (rcount > 0 && rdsize > 0) { return MCA_PML_CALL(recv(rbuf, rcount, rdtype, root, MCA_COLL_BASE_TAG_SCATTERV, comm, MPI_STATUS_IGNORE)); @@ -67,6 +70,12 @@ mca_coll_basic_scatterv_intra(const void *sbuf, const int *scounts, return MPI_SUCCESS; } + ompi_datatype_type_size(sdtype, &sdsize); + if (OPAL_UNLIKELY(0 == sdsize)) { + /* bozzo case */ + return MPI_SUCCESS; + } + /* I am the root, loop sending data. */ err = ompi_datatype_get_extent(sdtype, &lb, &extent); diff --git a/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c b/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c index 6537bb9bc7c..658cf479748 100644 --- a/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c +++ b/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c @@ -1,8 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2014-2020 The University of Tennessee and The University + * Copyright (c) 2014-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * * * $COPYRIGHT$ * @@ -491,7 +494,7 @@ static void era_debug_print_group(int lvl, ompi_group_t *group, ompi_communicato } s = 128 + n * 16; str = (char*)malloc(s); - sprintf(str, "Group of size %d. Ranks in %d.%d: (", n, comm->c_contextid, comm->c_epoch); + sprintf(str, "Group of size %d. Ranks in %d.%d: (", n, comm->c_index, comm->c_epoch); p = strlen(str); for(i = 0; i < n; i++) { snprintf(str + p, s - p, "%d%s", gra[i], i==n-1 ? "" : ", "); @@ -896,19 +899,19 @@ static void era_agreement_info_set_comm(era_agreement_info_t *ci, ompi_communica int *src_ra; int r, grp_size; - assert( comm->c_contextid == ci->agreement_id.ERAID_FIELDS.contextid ); + assert( comm->c_index == ci->agreement_id.ERAID_FIELDS.contextid ); assert( comm->c_epoch == ci->agreement_id.ERAID_FIELDS.epoch ); assert( ci->comm == NULL ); ci->comm = comm; OBJ_RETAIN(comm); OPAL_OUTPUT_VERBOSE((30, ompi_ftmpi_output_handle, - "%s ftagree:agreement (ERA) Agreement (%d.%d).%d: assigning to communicator %d\n", + "%s ftagree:agreement (ERA) Agreement (%d.%d).%d: assigning to communicator %s\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), ci->agreement_id.ERAID_FIELDS.contextid, ci->agreement_id.ERAID_FIELDS.epoch, ci->agreement_id.ERAID_FIELDS.agreementid, - comm->c_contextid)); + ompi_comm_print_cid(comm))); if( AGS(comm) == NULL ) { era_comm_agreement_specific_t *ags = OBJ_NEW(era_comm_agreement_specific_t); @@ -1006,6 +1009,7 @@ static void era_combine_agreement_values(era_agreement_info_t *ni, era_value_t * ni->current_value->header.max_aid = value->header.max_aid; } + assert(NULL != value->new_dead_array || 0 == value->header.nb_new_dead); era_merge_new_dead_list(ni, value->header.nb_new_dead, value->new_dead_array); } @@ -1622,11 +1626,11 @@ static void era_decide(era_value_t *decided_value, era_agreement_info_t *ci) ompi_communicator_t *comm; era_rank_item_t *rl; int r, s, dead_size; - void *value; assert( 0 != ci->agreement_id.ERAID_FIELDS.agreementid ); #if OPAL_ENABLE_DEBUG + void *value; r = era_parent(ci); if( opal_hash_table_get_value_uint64(&era_passed_agreements, ci->agreement_id.ERAID_KEY, &value) == OMPI_SUCCESS ) { @@ -1655,7 +1659,7 @@ static void era_decide(era_value_t *decided_value, era_agreement_info_t *ci) OPAL_OUTPUT_VERBOSE(((ci->comm->c_my_rank == r)? 2: 10, ompi_ftmpi_output_handle, "%s ftagree:agreement (ERA) decide %08x.%d.%d.. on agreement (%d.%d).%d\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - (0!=ERA_VALUE_BYTES_COUNT(&decided_value->header))? *(int*)decided_value->bytes: 0, + (NULL != decided_value->bytes)? *(int*)decided_value->bytes: 0, decided_value->header.ret, decided_value->header.nb_new_dead, ci->agreement_id.ERAID_FIELDS.contextid, @@ -1673,7 +1677,7 @@ static void era_decide(era_value_t *decided_value, era_agreement_info_t *ci) OPAL_OUTPUT_VERBOSE((30, ompi_ftmpi_output_handle, "%s ftagree:agreement (ERA) decide %08x.%d.%d on agreement (%d.%d).%d: adding up to %d processes to the list of agreed deaths\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - (0!=ERA_VALUE_BYTES_COUNT(&decided_value->header))? *(int*)decided_value->bytes: 0, + (NULL != decided_value->bytes)? *(int*)decided_value->bytes: 0, decided_value->header.ret, decided_value->header.nb_new_dead, ci->agreement_id.ERAID_FIELDS.contextid, @@ -1734,7 +1738,7 @@ static void era_decide(era_value_t *decided_value, era_agreement_info_t *ci) OPAL_OUTPUT_VERBOSE((10, ompi_ftmpi_output_handle, "%s ftagree:agreement (ERA) decide %08x.%d.%d.. on agreement (%d.%d).%d: group of agreed deaths is of size %d\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - (0!=ERA_VALUE_BYTES_COUNT(&decided_value->header))? *(int*)decided_value->bytes: 0, + (NULL != decided_value->bytes)? *(int*)decided_value->bytes: 0, decided_value->header.ret, decided_value->header.nb_new_dead, ci->agreement_id.ERAID_FIELDS.contextid, @@ -2119,7 +2123,7 @@ static void send_msg(ompi_communicator_t *comm, agreement_id.ERAID_FIELDS.epoch, agreement_id.ERAID_FIELDS.agreementid, era_msg_type_to_string(type), - (0!=ERA_VALUE_BYTES_COUNT(&value->header))? *(int*)value->bytes: 0, + (NULL != value->bytes)? *(int*)value->bytes: 0, value->header.ret, value->header.nb_new_dead, dst, @@ -2136,7 +2140,7 @@ static void send_msg(ompi_communicator_t *comm, agreement_id.ERAID_FIELDS.epoch, agreement_id.ERAID_FIELDS.agreementid, era_msg_type_to_string(type), - (0!=ERA_VALUE_BYTES_COUNT(&value->header))? *(int*)value->bytes: 0, + (NULL != value->bytes)? *(int*)value->bytes: 0, value->header.ret, value->header.nb_new_dead, nb_ack_failed, @@ -2150,7 +2154,7 @@ static void send_msg(ompi_communicator_t *comm, agreement_id.ERAID_FIELDS.epoch, agreement_id.ERAID_FIELDS.agreementid, era_msg_type_to_string(type), - (0!=ERA_VALUE_BYTES_COUNT(&value->header))? *(int*)value->bytes: 0, + (NULL != value->bytes)? *(int*)value->bytes: 0, value->header.ret, value->header.nb_new_dead, dst, @@ -2166,7 +2170,7 @@ static void send_msg(ompi_communicator_t *comm, } #endif /* OPAL_ENABLE_DEBUG */ - assert( NULL == comm || agreement_id.ERAID_FIELDS.contextid == ompi_comm_get_cid(comm) ); + assert( NULL == comm || agreement_id.ERAID_FIELDS.contextid == ompi_comm_get_local_cid(comm) ); assert( NULL == comm || agreement_id.ERAID_FIELDS.epoch == comm->c_epoch ); if( NULL == comm ) { @@ -2230,7 +2234,7 @@ static void send_msg(ompi_communicator_t *comm, agreement_id.ERAID_FIELDS.epoch, agreement_id.ERAID_FIELDS.agreementid, era_msg_type_to_string(type), - (0!=ERA_VALUE_BYTES_COUNT(&value->header))? *(int*)value->bytes: 0, + (NULL != value->bytes)? *(int*)value->bytes: 0, value->header.ret, value->header.nb_new_dead, msg_header.nb_ack, @@ -2274,7 +2278,7 @@ static void send_msg(ompi_communicator_t *comm, agreement_id.ERAID_FIELDS.epoch, agreement_id.ERAID_FIELDS.agreementid, era_msg_type_to_string(type), - (0!=ERA_VALUE_BYTES_COUNT(&value->header))? *(int*)value->bytes: 0, + (NULL != value->bytes)? *(int*)value->bytes: 0, value->header.ret, value->header.nb_new_dead, msg_header.nb_ack, @@ -2421,7 +2425,7 @@ static void msg_up(era_msg_header_t *msg_header, uint8_t *bytes, int *new_dead, msg_header->agreement_id.ERAID_FIELDS.agreementid, msg_header->src_comm_rank, OMPI_NAME_PRINT(&msg_header->src_proc_name), - (0!=ERA_VALUE_BYTES_COUNT(&msg_header->agreement_value_header))? *(int*)bytes: 0, + (NULL != bytes)? *(int*)bytes: 0, msg_header->agreement_value_header.ret, msg_header->agreement_value_header.nb_new_dead, msg_header->nb_ack)); @@ -2552,7 +2556,7 @@ static void msg_down(era_msg_header_t *msg_header, uint8_t *bytes, int *new_dead msg_header->agreement_id.ERAID_FIELDS.agreementid, msg_header->src_comm_rank, OMPI_NAME_PRINT(&msg_header->src_proc_name), - (0!=ERA_VALUE_BYTES_COUNT(&msg_header->agreement_value_header))? *(int*)bytes: 0, + (NULL != bytes)? *(int*)bytes: 0, msg_header->agreement_value_header.nb_new_dead)); ci = era_lookup_agreement_info( msg_header->agreement_id ); @@ -2635,7 +2639,6 @@ static void era_cb_fn(struct mca_btl_base_module_t* btl, const mca_btl_base_receive_descriptor_t* descriptor) { era_incomplete_msg_t *incomplete_msg = NULL; - mca_btl_base_tag_t tag = descriptor->tag; era_msg_header_t *msg_header; era_frag_t *frag; uint64_t src_hash; @@ -2645,7 +2648,7 @@ static void era_cb_fn(struct mca_btl_base_module_t* btl, int *new_dead; int *ack_failed; - assert(MCA_BTL_TAG_FT_AGREE == tag); + assert(MCA_BTL_TAG_FT_AGREE == descriptor->tag); assert(1 == descriptor->des_segment_count); frag = (era_frag_t*)descriptor->des_segments->seg_addr.pval; @@ -2723,7 +2726,7 @@ static void era_cb_fn(struct mca_btl_base_module_t* btl, msg_header->agreement_id.ERAID_FIELDS.epoch, msg_header->agreement_id.ERAID_FIELDS.agreementid, msg_header->msg_type, - (0!=ERA_VALUE_BYTES_COUNT(&msg_header->agreement_value_header))? *(int*)value_bytes: 0, + (NULL != value_bytes)? *(int*)value_bytes: 0, msg_header->agreement_value_header.ret, msg_header->agreement_value_header.nb_new_dead, msg_header->src_comm_rank, @@ -2774,10 +2777,10 @@ static void era_on_comm_rank_failure(ompi_communicator_t *comm, int rank, bool r opal_hash_table_t *msg_table; OPAL_OUTPUT_VERBOSE((4, ompi_ftmpi_output_handle, - "%s ftagree:agreement (ERA) %d in communicator (%d.%d) died\n", + "%s ftagree:agreement (ERA) %d in communicator (%s.%d) died\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), rank, - comm->c_contextid, + ompi_comm_print_cid(comm), comm->c_epoch)); if( AGS(comm) != NULL ) { @@ -2810,7 +2813,7 @@ static void era_on_comm_rank_failure(ompi_communicator_t *comm, int rank, bool r &key64, &next_value, node, &node); - if( cid.ERAID_FIELDS.contextid == comm->c_contextid && + if( cid.ERAID_FIELDS.contextid == comm->c_contextid.cid_sub.u64 && cid.ERAID_FIELDS.epoch == comm->c_epoch ) { ci = (era_agreement_info_t *)value; OPAL_OUTPUT_VERBOSE((6, ompi_ftmpi_output_handle, @@ -3026,7 +3029,7 @@ static int mca_coll_ftagree_era_prepare_agreement(ompi_communicator_t* comm, } /* Let's find the id of the new agreement */ - agreement_id.ERAID_FIELDS.contextid = comm->c_contextid; + agreement_id.ERAID_FIELDS.contextid = comm->c_contextid.cid_sub.u64; agreement_id.ERAID_FIELDS.epoch = comm->c_epoch; agreement_id.ERAID_FIELDS.agreementid = (uint16_t)ag_info->agreement_seq_num; @@ -3115,12 +3118,12 @@ static int mca_coll_ftagree_era_complete_agreement(era_identifier_t agreement_id assert(0 != agreement_id.ERAID_FIELDS.agreementid); ci = era_lookup_agreement_info(agreement_id); + assert(NULL != ci); + comm = ci->comm; /** Now, it's time to remove that guy from the ongoing agreements */ opal_hash_table_remove_value_uint64(&era_ongoing_agreements, agreement_id.ERAID_KEY); - comm = ci->comm; - OBJ_RELEASE(ci); /* This will take care of the content of ci too */ ret = opal_hash_table_get_value_uint64(&era_passed_agreements, @@ -3156,7 +3159,7 @@ static int mca_coll_ftagree_era_complete_agreement(era_identifier_t agreement_id agreement_id.ERAID_FIELDS.epoch, agreement_id.ERAID_FIELDS.agreementid, ret, - (0!=ERA_VALUE_BYTES_COUNT(&av->header))? *(int*)contrib: 0)); + (NULL != contrib)? *(int*)contrib: 0)); return ret; } @@ -3226,10 +3229,19 @@ int mca_coll_ftagree_era_inter(void *contrib, contriblh[0] = ~0; contriblh[1] = *(int*)contrib; } - ompi_comm_set(&shadowcomm, comm, - ompi_group_size(uniongrp), NULL, 0, NULL, - NULL, comm->error_handler, NULL, - uniongrp, NULL); + + ompi_comm_set(&shadowcomm, /* new comm */ + comm, /* old comm */ + ompi_group_size(uniongrp), /* local_size */ + NULL, /* local_procs */ + 0, /* remote_size */ + NULL, /* remote procs */ + NULL, /* attrs */ + comm->error_handler, /* error handler */ + NULL, /* local group */ + uniongrp, /* remote group */ + 0); /* flags */ + ompi_group_free(&uniongrp); shadowcomm->c_contextid = comm->c_contextid; shadowcomm->c_epoch = comm->c_epoch; @@ -3364,7 +3376,7 @@ int mca_coll_ftagree_era_free_comm(ompi_communicator_t* comm, } while(rc != MPI_SUCCESS); OBJ_RELEASE(acked); - aid.ERAID_FIELDS.contextid = comm->c_contextid; + aid.ERAID_FIELDS.contextid = comm->c_contextid.cid_sub.u64; aid.ERAID_FIELDS.epoch = comm->c_epoch; opal_mutex_lock(&era_mutex); diff --git a/ompi/mca/coll/ftagree/coll_ftagree_earlyterminating.c b/ompi/mca/coll/ftagree/coll_ftagree_earlyterminating.c index eeee1195eaa..d2a48098a0e 100644 --- a/ompi/mca/coll/ftagree/coll_ftagree_earlyterminating.c +++ b/ompi/mca/coll/ftagree/coll_ftagree_earlyterminating.c @@ -101,7 +101,7 @@ mca_coll_ftagree_eta_intra(void *contrib, { /* ignore acked failures (add them later to the result) */ ompi_group_t* ackedgrp = NULL; int npa; int *aranks, *cranks; ackedgrp = *group; - if( 0 != (npa = ompi_group_size(ackedgrp)) ) { + if( 0 != (npa = (NULL == ackedgrp? 0: ompi_group_size(ackedgrp))) ) { aranks = calloc( npa, sizeof(int) ); for( i = 0; i < npa; i++ ) aranks[i] = i; cranks = calloc( npa, sizeof(int) ); @@ -336,7 +336,7 @@ mca_coll_ftagree_eta_intra(void *contrib, free(statuses); free(in); /* Let's build the group of failed processes */ - if( NULL != group ) { + if( update_grp ) { int pos; /* We overwrite proc_status because it is not used anymore */ int *failed = proc_status; @@ -346,10 +346,10 @@ mca_coll_ftagree_eta_intra(void *contrib, failed[pos++] = i; } } - if( update_grp ) { + if( NULL != *group ) { OBJ_RELEASE(*group); - ompi_group_incl(comm->c_remote_group, pos, failed, group); } + ompi_group_incl(comm->c_remote_group, pos, failed, group); } free(proc_status); @@ -363,7 +363,7 @@ mca_coll_ftagree_eta_intra(void *contrib, OPAL_OUTPUT_VERBOSE((5, ompi_ftmpi_output_handle, "%s ftagree:agreement (ETA) return %d with 4 first bytes of result 0x%08x and dead group with %d processes", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), ret, *(int*)contrib, - (NULL == group) ? 0 : (*group)->grp_proc_count)); + (NULL == *group) ? 0 : (*group)->grp_proc_count)); return ret; } diff --git a/ompi/mca/coll/han/coll_han_dynamic.c b/ompi/mca/coll/han/coll_han_dynamic.c index 9e3469b0160..6cdcb9af4f0 100644 --- a/ompi/mca/coll/han/coll_han_dynamic.c +++ b/ompi/mca/coll/han/coll_han_dynamic.c @@ -1,6 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2020 Bull S.A.S. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * * $COPYRIGHT$ * @@ -94,9 +96,9 @@ mca_coll_han_get_all_coll_modules(struct ompi_communicator_t *comm, han_module->modules_storage.modules[id].module_handler = module; opal_output_verbose(80, mca_coll_han_component.han_output, "coll:han:get_all_coll_modules HAN found module %s with id %d " - "for topological level %d (%s) for communicator (%d/%s)\n", + "for topological level %d (%s) for communicator (%s/%s)\n", name, id, topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); nb_modules++; } } @@ -113,9 +115,9 @@ mca_coll_han_get_all_coll_modules(struct ompi_communicator_t *comm, opal_output_verbose(60, mca_coll_han_component.han_output, "coll:han:get_all_coll_modules HAN sub-communicator modules storage " "for topological level %d (%s) gets %d modules " - "for communicator (%d/%s)\n", + "for communicator (%s/%s)\n", topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - nb_modules, comm->c_contextid, comm->c_name); + nb_modules, ompi_comm_print_cid(comm), comm->c_name); assert(0 != nb_modules); @@ -352,11 +354,11 @@ mca_coll_han_allgather_intra_dynamic(const void *sbuf, int scount, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_allgather_intra_dynamic " "HAN did not find any valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s). " + "with topological level %d (%s) on communicator (%s/%s). " "Please check dynamic file/mca parameters\n", ALLGATHER, mca_coll_base_colltype_to_str(ALLGATHER), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/ALLGATHER: No module found for the sub-communicator. " "Falling back to another component\n")); @@ -370,11 +372,11 @@ mca_coll_han_allgather_intra_dynamic(const void *sbuf, int scount, han_module->dynamic_errors++; opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_allgather_intra_dynamic HAN found valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s) but this module cannot handle this collective. " + "with topological level %d (%s) on communicator (%s/%s) but this module cannot handle this collective. " "Please check dynamic file/mca parameters\n", ALLGATHER, mca_coll_base_colltype_to_str(ALLGATHER), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/ALLGATHER: the module found for the sub-communicator" " cannot handle the ALLGATHER operation. Falling back to another component\n")); @@ -462,11 +464,11 @@ mca_coll_han_allgatherv_intra_dynamic(const void *sbuf, int scount, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_allgatherv_intra_dynamic " "HAN did not find any valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s). " + "with topological level %d (%s) on communicator (%s/%s). " "Please check dynamic file/mca parameters\n", ALLGATHERV, mca_coll_base_colltype_to_str(ALLGATHERV), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/ALLGATHERV: No module found for the sub-communicator. " "Falling back to another component\n")); @@ -481,12 +483,12 @@ mca_coll_han_allgatherv_intra_dynamic(const void *sbuf, int scount, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_allgatherv_intra_dynamic " "HAN found valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s) " + "with topological level %d (%s) on communicator (%s/%s) " "but this module cannot handle this collective. " "Please check dynamic file/mca parameters\n", ALLGATHERV, mca_coll_base_colltype_to_str(ALLGATHERV), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/ALLGATHERV: the module found for the sub-" "communicator cannot handle the ALLGATHERV operation. " @@ -503,11 +505,11 @@ mca_coll_han_allgatherv_intra_dynamic(const void *sbuf, int scount, opal_output_verbose(30, mca_coll_han_component.han_output, "coll:han:mca_coll_han_allgatherv_intra_dynamic " "HAN used for collective %d (%s) with topological level %d (%s) " - "on communicator (%d/%s) but this module cannot handle " + "on communicator (%s/%s) but this module cannot handle " "this collective on this topologic level\n", ALLGATHERV, mca_coll_base_colltype_to_str(ALLGATHERV), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); allgatherv = han_module->previous_allgatherv; sub_module = han_module->previous_allgatherv_module; } else { @@ -573,11 +575,11 @@ mca_coll_han_allreduce_intra_dynamic(const void *sbuf, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_allreduce_intra_dynamic " "HAN did not find any valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s). " + "with topological level %d (%s) on communicator (%s/%s). " "Please check dynamic file/mca parameters\n", ALLREDUCE, mca_coll_base_colltype_to_str(ALLREDUCE), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/ALLREDUCE: No module found for the sub-communicator. " "Falling back to another component\n")); @@ -592,12 +594,12 @@ mca_coll_han_allreduce_intra_dynamic(const void *sbuf, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_allreduce_intra_dynamic " "HAN found valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s) " + "with topological level %d (%s) on communicator (%s/%s) " "but this module cannot handle this collective. " "Please check dynamic file/mca parameters\n", ALLREDUCE, mca_coll_base_colltype_to_str(ALLREDUCE), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/ALLREDUCE: the module found for the sub-" "communicator cannot handle the ALLREDUCE operation. " @@ -675,11 +677,11 @@ mca_coll_han_barrier_intra_dynamic(struct ompi_communicator_t *comm, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_barrier_intra_dynamic " "Han did not find any valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s). " + "with topological level %d (%s) on communicator (%s/%s). " "Please check dynamic file/mca parameters\n", BARRIER, mca_coll_base_colltype_to_str(BARRIER), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/BARRIER: No module found for the sub-communicator. " "Falling back to another component\n")); @@ -694,12 +696,12 @@ mca_coll_han_barrier_intra_dynamic(struct ompi_communicator_t *comm, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_barrier_intra_dynamic " "Han found valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s) " + "with topological level %d (%s) on communicator (%s/%s) " "but this module cannot handle this collective. " "Please check dynamic file/mca parameters\n", BARRIER, mca_coll_base_colltype_to_str(BARRIER), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/BARRIER: the module found for the sub-" "communicator cannot handle the BARRIER operation. " @@ -772,11 +774,11 @@ mca_coll_han_bcast_intra_dynamic(void *buff, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_bcast_intra_dynamic " "HAN did not find any valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s). " + "with topological level %d (%s) on communicator (%s/%s). " "Please check dynamic file/mca parameters\n", BCAST, mca_coll_base_colltype_to_str(BCAST), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/BCAST: No module found for the sub-communicator. " "Falling back to another component\n")); @@ -791,12 +793,12 @@ mca_coll_han_bcast_intra_dynamic(void *buff, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_bcast_intra_dynamic " "HAN found valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s) " + "with topological level %d (%s) on communicator (%s/%s) " "but this module cannot handle this collective. " "Please check dynamic file/mca parameters\n", BCAST, mca_coll_base_colltype_to_str(BCAST), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/BCAST: the module found for the sub-" "communicator cannot handle the BCAST operation. " @@ -882,11 +884,11 @@ mca_coll_han_gather_intra_dynamic(const void *sbuf, int scount, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_gather_intra_dynamic " "HAN did not find any valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s). " + "with topological level %d (%s) on communicator (%s/%s). " "Please check dynamic file/mca parameters\n", GATHER, mca_coll_base_colltype_to_str(GATHER), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/GATHER: No module found for the sub-communicator. " "Falling back to another component\n")); @@ -901,12 +903,12 @@ mca_coll_han_gather_intra_dynamic(const void *sbuf, int scount, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_gather_intra_dynamic " "HAN found valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s) " + "with topological level %d (%s) on communicator (%s/%s) " "but this module cannot handle this collective. " "Please check dynamic file/mca parameters\n", GATHER, mca_coll_base_colltype_to_str(GATHER), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/GATHER: the module found for the sub-" "communicator cannot handle the GATHER operation. " @@ -989,11 +991,11 @@ mca_coll_han_reduce_intra_dynamic(const void *sbuf, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_reduce_intra_dynamic " "HAN did not find any valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s). " + "with topological level %d (%s) on communicator (%s/%s). " "Please check dynamic file/mca parameters\n", REDUCE, mca_coll_base_colltype_to_str(REDUCE), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/REDUCE: No module found for the sub-communicator. " "Falling back to another component\n")); @@ -1008,12 +1010,12 @@ mca_coll_han_reduce_intra_dynamic(const void *sbuf, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_reduce_intra_dynamic " "HAN found valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s) " + "with topological level %d (%s) on communicator (%s/%s) " "but this module cannot handle this collective. " "Please check dynamic file/mca parameters\n", REDUCE, mca_coll_base_colltype_to_str(REDUCE), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/REDUCE: the module found for the sub-" "communicator cannot handle the REDUCE operation. " @@ -1104,11 +1106,11 @@ mca_coll_han_scatter_intra_dynamic(const void *sbuf, int scount, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_scatter_intra_dynamic " "HAN did not find any valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s). " + "with topological level %d (%s) on communicator (%s/%s). " "Please check dynamic file/mca parameters\n", SCATTER, mca_coll_base_colltype_to_str(SCATTER), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/SCATTER: No module found for the sub-communicator. " "Falling back to another component\n")); @@ -1123,12 +1125,12 @@ mca_coll_han_scatter_intra_dynamic(const void *sbuf, int scount, opal_output_verbose(verbosity, mca_coll_han_component.han_output, "coll:han:mca_coll_han_scatter_intra_dynamic " "HAN found valid module for collective %d (%s) " - "with topological level %d (%s) on communicator (%d/%s) " + "with topological level %d (%s) on communicator (%s/%s) " "but this module cannot handle this collective. " "Please check dynamic file/mca parameters\n", SCATTER, mca_coll_base_colltype_to_str(SCATTER), topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), - comm->c_contextid, comm->c_name); + ompi_comm_print_cid(comm), comm->c_name); OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "HAN/SCATTER: the module found for the sub-" "communicator cannot handle the SCATTER operation. " diff --git a/ompi/mca/coll/han/coll_han_module.c b/ompi/mca/coll/han/coll_han_module.c index bcb1de75cfd..166bdc8057e 100644 --- a/ompi/mca/coll/han/coll_han_module.c +++ b/ompi/mca/coll/han/coll_han_module.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2020 Bull S.A.S. All rights reserved. * Copyright (c) 2021 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -187,21 +189,21 @@ mca_coll_han_comm_query(struct ompi_communicator_t * comm, int *priority) */ if (OMPI_COMM_IS_INTER(comm)) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:han:comm_query (%d/%s): intercomm; disqualifying myself", - comm->c_contextid, comm->c_name); + "coll:han:comm_query (%s/%s): intercomm; disqualifying myself", + ompi_comm_print_cid(comm), comm->c_name); return NULL; } if (1 == ompi_comm_size(comm)) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:han:comm_query (%d/%s): comm is too small; disqualifying myself", - comm->c_contextid, comm->c_name); + "coll:han:comm_query (%s/%s): comm is too small; disqualifying myself", + ompi_comm_print_cid(comm), comm->c_name); return NULL; } if( !ompi_group_have_remote_peers(comm->c_local_group) ) { /* The group only contains local processes. Disable HAN for now */ opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:han:comm_query (%d/%s): comm has only local processes; disqualifying myself", - comm->c_contextid, comm->c_name); + "coll:han:comm_query (%s/%s): comm has only local processes; disqualifying myself", + ompi_comm_print_cid(comm), comm->c_name); return NULL; } /* Get the priority level attached to this module. If priority is less @@ -209,8 +211,8 @@ mca_coll_han_comm_query(struct ompi_communicator_t * comm, int *priority) *priority = mca_coll_han_component.han_priority; if (mca_coll_han_component.han_priority < 0) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:han:comm_query (%d/%s): priority too low; disqualifying myself", - comm->c_contextid, comm->c_name); + "coll:han:comm_query (%s/%s): priority too low; disqualifying myself", + ompi_comm_print_cid(comm), comm->c_name); return NULL; } @@ -264,8 +266,8 @@ mca_coll_han_comm_query(struct ompi_communicator_t * comm, int *priority) } opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:han:comm_query (%d/%s): pick me! pick me!", - comm->c_contextid, comm->c_name); + "coll:han:comm_query (%s/%s): pick me! pick me!", + ompi_comm_print_cid(comm), comm->c_name); return &(han_module->super); } @@ -280,8 +282,8 @@ mca_coll_han_comm_query(struct ompi_communicator_t * comm, int *priority) do { \ if (!comm->c_coll->coll_ ## __api || !comm->c_coll->coll_ ## __api ## _module) { \ opal_output_verbose(1, ompi_coll_base_framework.framework_output, \ - "(%d/%s): no underlying " # __api"; disqualifying myself", \ - comm->c_contextid, comm->c_name); \ + "(%s/%s): no underlying " # __api"; disqualifying myself", \ + ompi_comm_print_cid(comm), comm->c_name); \ goto handle_error; \ } \ han_module->previous_ ## __api = comm->c_coll->coll_ ## __api; \ diff --git a/ompi/mca/coll/hcoll/coll_hcoll_module.c b/ompi/mca/coll/hcoll/coll_hcoll_module.c index d09607d8d02..8da8d63522e 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_module.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_module.c @@ -16,6 +16,8 @@ #include "coll_hcoll.h" #include "coll_hcoll_dtypes.h" +static int use_safety_valve = 0; + int hcoll_comm_attr_keyval; int hcoll_type_attr_keyval; mca_coll_hcoll_dtype_t zero_dte_mapping; @@ -327,6 +329,7 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) cm->using_mem_hooks = 1; opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL); setenv("MXM_HCOLL_MEM_ON_DEMAND_MAP", "y", 0); + use_safety_valve = 1; } } } else { @@ -376,7 +379,7 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) hcoll_module->comm = comm; HCOL_VERBOSE(10,"Creating hcoll_context for comm %p, comm_id %d, comm_size %d", - (void*)comm,comm->c_contextid,ompi_comm_size(comm)); + (void*)comm,comm->c_index,ompi_comm_size(comm)); hcoll_module->hcoll_context = hcoll_create_context((rte_grp_handle_t)comm); @@ -447,5 +450,9 @@ OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_t, mca_coll_hcoll_module_construct, mca_coll_hcoll_module_destruct); - - +static void safety_valve(void) __attribute__((destructor)); +void safety_valve(void) { + if (use_safety_valve) { + opal_mem_hooks_unregister_release(mca_coll_hcoll_mem_release_cb); + } +} diff --git a/ompi/mca/coll/hcoll/coll_hcoll_ops.c b/ompi/mca/coll/hcoll/coll_hcoll_ops.c index 0f52a25b752..d80798b2407 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_ops.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_ops.c @@ -156,12 +156,19 @@ int mca_coll_hcoll_gather(const void *sbuf, int scount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module){ + mca_coll_base_module_t *module) +{ + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; dte_data_representation_t stype; dte_data_representation_t rtype; int rc; + HCOL_VERBOSE(20,"RUNNING HCOL GATHER"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + + if (root != comm->c_my_rank) { + rdtype = sdtype; + } + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { @@ -368,13 +375,19 @@ int mca_coll_hcoll_gatherv(const void* sbuf, int scount, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; dte_data_representation_t stype; dte_data_representation_t rtype; int rc; HCOL_VERBOSE(20,"RUNNING HCOL GATHERV"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + + if (root != comm->c_my_rank) { + rdtype = sdtype; + } + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ @@ -387,7 +400,9 @@ int mca_coll_hcoll_gatherv(const void* sbuf, int scount, comm, hcoll_module->previous_gatherv_module); return rc; } - rc = hcoll_collectives.coll_gatherv((void *)sbuf, scount, stype, rbuf, (int *)rcounts, (int *)displs, rtype, root, hcoll_module->hcoll_context); + rc = hcoll_collectives.coll_gatherv((void *)sbuf, scount, stype, rbuf, + (int *)rcounts, (int *)displs, rtype, + root, hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK GATHERV"); rc = hcoll_module->previous_gatherv(sbuf,scount,sdtype, @@ -406,13 +421,20 @@ int mca_coll_hcoll_scatterv(const void* sbuf, const int *scounts, const int *dis struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; dte_data_representation_t stype; dte_data_representation_t rtype; int rc; + HCOL_VERBOSE(20,"RUNNING HCOL SCATTERV"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + + if (root != comm->c_my_rank) { + sdtype = rdtype; + } + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + if (rbuf == MPI_IN_PLACE) { assert(root == comm->c_my_rank); rtype = stype; @@ -693,13 +715,20 @@ int mca_coll_hcoll_igatherv(const void* sbuf, int scount, ompi_request_t ** request, mca_coll_base_module_t *module) { + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; dte_data_representation_t stype; dte_data_representation_t rtype; int rc; void** rt_handle; + HCOL_VERBOSE(20,"RUNNING HCOL IGATHERV"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + rt_handle = (void**) request; + + if (root != comm->c_my_rank) { + rdtype = sdtype; + } + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { diff --git a/ompi/mca/coll/hcoll/coll_hcoll_rte.c b/ompi/mca/coll/hcoll/coll_hcoll_rte.c index e2a1ea9dabf..7b1000568be 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_rte.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_rte.c @@ -320,7 +320,7 @@ static uint32_t jobid(void){ } static int group_id(rte_grp_handle_t group){ - return ((ompi_communicator_t *)group)->c_contextid; + return ((ompi_communicator_t *)group)->c_index; } static int diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index 2054ecf7735..1c380c682de 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -16,7 +16,7 @@ * Author(s): Torsten Hoefler * * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2021 IBM Corporation. All rights reserved. * Copyright (c) 2017 Ian Bradley Morgan and Anthony Skjellum. All * rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. @@ -119,7 +119,7 @@ static int nbc_schedule_round_append (NBC_Schedule *schedule, void *data, int da } /* this function puts a send into the schedule */ -static int NBC_Sched_send_internal (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, bool local, NBC_Schedule *schedule, bool barrier) { +static int NBC_Sched_send_internal (const void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int dest, bool local, NBC_Schedule *schedule, bool barrier) { NBC_Args_send send_args; int ret; @@ -143,16 +143,16 @@ static int NBC_Sched_send_internal (const void* buf, char tmpbuf, int count, MPI return OMPI_SUCCESS; } -int NBC_Sched_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier) { +int NBC_Sched_send (const void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier) { return NBC_Sched_send_internal (buf, tmpbuf, count, datatype, dest, false, schedule, barrier); } -int NBC_Sched_local_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier) { +int NBC_Sched_local_send (const void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier) { return NBC_Sched_send_internal (buf, tmpbuf, count, datatype, dest, true, schedule, barrier); } /* this function puts a receive into the schedule */ -static int NBC_Sched_recv_internal (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, bool local, NBC_Schedule *schedule, bool barrier) { +static int NBC_Sched_recv_internal (void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int source, bool local, NBC_Schedule *schedule, bool barrier) { NBC_Args_recv recv_args; int ret; @@ -176,16 +176,16 @@ static int NBC_Sched_recv_internal (void* buf, char tmpbuf, int count, MPI_Datat return OMPI_SUCCESS; } -int NBC_Sched_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier) { +int NBC_Sched_recv (void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier) { return NBC_Sched_recv_internal(buf, tmpbuf, count, datatype, source, false, schedule, barrier); } -int NBC_Sched_local_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier) { +int NBC_Sched_local_recv (void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier) { return NBC_Sched_recv_internal(buf, tmpbuf, count, datatype, source, true, schedule, barrier); } /* this function puts an operation into the schedule */ -int NBC_Sched_op (const void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, +int NBC_Sched_op (const void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, size_t count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule, bool barrier) { NBC_Args_op op_args; int ret; @@ -212,7 +212,8 @@ int NBC_Sched_op (const void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int } /* this function puts a copy into the schedule */ -int NBC_Sched_copy (void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, +int NBC_Sched_copy (void *src, char tmpsrc, size_t srccount, MPI_Datatype srctype, + void *tgt, char tmptgt, size_t tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule, bool barrier) { NBC_Args_copy copy_args; int ret; @@ -240,7 +241,7 @@ int NBC_Sched_copy (void *src, char tmpsrc, int srccount, MPI_Datatype srctype, } /* this function puts a unpack into the schedule */ -int NBC_Sched_unpack (void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, +int NBC_Sched_unpack (void *inbuf, char tmpinbuf, size_t count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule, bool barrier) { NBC_Args_unpack unpack_args; int ret; @@ -534,6 +535,7 @@ static inline int NBC_Start_round(NBC_Handle *handle) { } else { buf2=opargs.buf2; } + ompi_op_reduce(opargs.op, buf1, buf2, opargs.count, opargs.datatype); break; case COPY: diff --git a/ompi/mca/coll/libnbc/nbc_iallreduce.c b/ompi/mca/coll/libnbc/nbc_iallreduce.c index ff0212b49c6..49e81bb681c 100644 --- a/ompi/mca/coll/libnbc/nbc_iallreduce.c +++ b/ompi/mca/coll/libnbc/nbc_iallreduce.c @@ -112,6 +112,7 @@ static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI return OMPI_ERR_OUT_OF_RESOURCE; } + alg = NBC_ARED_RING; /* default generic selection */ /* algorithm selection */ int nprocs_pof2 = opal_next_poweroftwo(p) >> 1; if (libnbc_iallreduce_algorithm == 0) { @@ -119,8 +120,6 @@ static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI alg = NBC_ARED_BINOMIAL; } else if (count >= nprocs_pof2 && ompi_op_is_commute(op)) { alg = NBC_ARED_REDSCAT_ALLGATHER; - } else { - alg = NBC_ARED_RING; } } else { if (libnbc_iallreduce_algorithm == 1) @@ -131,8 +130,6 @@ static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI alg = NBC_ARED_REDSCAT_ALLGATHER; else if (libnbc_iallreduce_algorithm == 4) alg = NBC_ARED_RDBL; - else - alg = NBC_ARED_RING; } #ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ @@ -369,7 +366,7 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat root = 0; /* this makes the code for ireduce and iallreduce nearly identical - could be changed to improve performance */ RANK2VRANK(rank, vrank, root); - maxr = (int)ceil((log((double)p)/LOG2)); + maxr = ceil_of_log2(p); /* ensure the result ends up in recvbuf on vrank 0 */ if (0 == (maxr%2)) { rbuf = (void *)(-gap); @@ -633,38 +630,37 @@ static inline int allred_sched_recursivedoubling(int rank, int p, const void *se return OMPI_SUCCESS; } -static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datatype, const void *sendbuf, void *recvbuf, MPI_Op op, - int size, int ext, NBC_Schedule *schedule, void *tmpbuf) { +static inline int +allred_sched_ring(int r, int p, + int count, MPI_Datatype datatype, const void *sendbuf, void *recvbuf, + MPI_Op op, int size, int ext, NBC_Schedule *schedule, void *tmpbuf) +{ int segsize, *segsizes, *segoffsets; /* segment sizes and offsets per segment (number of segments == number of nodes */ - int speer, rpeer; /* send and recvpeer */ + int speer, rpeer; /* send and recv peers */ int res = OMPI_SUCCESS; - if (count == 0) { + if (0 == count) { return OMPI_SUCCESS; } - segsizes = (int *) malloc (sizeof (int) * p); - segoffsets = (int *) malloc (sizeof (int) * p); - if (NULL == segsizes || NULL == segoffsets) { - free (segsizes); - free (segoffsets); + segsizes = (int *) malloc((2 * p + 1 ) *sizeof (int)); + if (NULL == segsizes) { return OMPI_ERR_OUT_OF_RESOURCE; } + segoffsets = segsizes + p; - segsize = (count + p - 1) / p; /* size of the segments */ + segsize = count / p; /* size of the segments across the last ranks. + The remainder will be evenly distributed across the smaller ranks */ segoffsets[0] = 0; - for (int i = 0, mycount = count ; i < p ; ++i) { - mycount -= segsize; + for (int i = 0, mycount = count % p; i < p ; ++i) { segsizes[i] = segsize; - if (mycount < 0) { - segsizes[i] = segsize + mycount; - mycount = 0; + if( mycount > 0 ) { /* We have extra segments to distribute */ + segsizes[i]++; + mycount--; } - if (i) { - segoffsets[i] = segoffsets[i-1] + segsizes[i-1]; - } + segoffsets[i+1] = segoffsets[i] + segsizes[i]; } /* reduce peers */ @@ -786,28 +782,29 @@ static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datat } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - break; + goto free_and_return; } - - res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer, - schedule, true); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - break; + if( recvbuf != sendbuf ) { /* check for MPI_IN_PLACE */ + res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + goto free_and_return; + } + res = NBC_Sched_op ((char *) sendbuf + roffset, false, (char *) recvbuf + roffset, false, + segsizes[relement], datatype, op, schedule, true); + } else { + res = NBC_Sched_recv ((char *) tmpbuf, false, segsizes[relement], datatype, rpeer, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + goto free_and_return; + } + res = NBC_Sched_op ((char *) tmpbuf, false, (char *) recvbuf + roffset, false, + segsizes[relement], datatype, op, schedule, true); } - - res = NBC_Sched_op ((char *) sendbuf + roffset, false, (char *) recvbuf + roffset, false, - segsizes[relement], datatype, op, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - break; + goto free_and_return; } } - - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - free (segsizes); - free (segoffsets); - return res; - } - for (int round = p - 1 ; round < 2 * p - 2 ; ++round) { int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */ int soffset = segoffsets[selement]*ext; @@ -819,16 +816,14 @@ static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datat if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { break; } - res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { break; } } - +free_and_return: free (segsizes); - free (segoffsets); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallv.c b/ompi/mca/coll/libnbc/nbc_ialltoallv.c index 5b9db1119d8..2a943db5af7 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallv.c @@ -11,6 +11,7 @@ * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2021 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -90,6 +91,7 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const sendcounts = recvcounts; sdispls = rdispls; + sndext = rcvext; } else { res = ompi_datatype_type_extent (sendtype, &sndext); if (MPI_SUCCESS != res) { diff --git a/ompi/mca/coll/libnbc/nbc_ibarrier.c b/ompi/mca/coll/libnbc/nbc_ibarrier.c index 42891eaba11..5c1ff994050 100644 --- a/ompi/mca/coll/libnbc/nbc_ibarrier.c +++ b/ompi/mca/coll/libnbc/nbc_ibarrier.c @@ -45,7 +45,7 @@ static int nbc_barrier_init(struct ompi_communicator_t *comm, ompi_request_t ** return OMPI_ERR_OUT_OF_RESOURCE; } - maxround = (int)ceil((log((double)p)/LOG2)-1); + maxround = ceil_of_log2(p) -1; for (int round = 0 ; round <= maxround ; ++round) { sendpeer = (rank + (1 << round)) % p; diff --git a/ompi/mca/coll/libnbc/nbc_ibcast.c b/ompi/mca/coll/libnbc/nbc_ibcast.c index a9069361a9f..37a42b773af 100644 --- a/ompi/mca/coll/libnbc/nbc_ibcast.c +++ b/ompi/mca/coll/libnbc/nbc_ibcast.c @@ -238,7 +238,7 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype) { int maxr, vrank, peer, res; - maxr = (int)ceil((log((double)p)/LOG2)); + maxr = ceil_of_log2(p); RANK2VRANK(rank, vrank, root); diff --git a/ompi/mca/coll/libnbc/nbc_internal.h b/ompi/mca/coll/libnbc/nbc_internal.h index ce181b22343..51a8a6369ce 100644 --- a/ompi/mca/coll/libnbc/nbc_internal.h +++ b/ompi/mca/coll/libnbc/nbc_internal.h @@ -10,11 +10,12 @@ * * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science + * Copyright (c) 2015-2021 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,8 +51,15 @@ extern "C" { #endif -/* log(2) */ -#define LOG2 0.69314718055994530941 +/* Dividing very close floats may lead to unexpected roundings */ +static inline int +ceil_of_log2 (int val) { + int ret = 0; + while (1 << ret < val) { + ret ++; + } + return ret; +} /* true/false */ #define true 1 @@ -90,7 +98,7 @@ typedef enum { /* the send argument struct */ typedef struct { NBC_Fn_type type; - int count; + size_t count; const void *buf; MPI_Datatype datatype; int dest; @@ -101,7 +109,7 @@ typedef struct { /* the receive argument struct */ typedef struct { NBC_Fn_type type; - int count; + size_t count; void *buf; MPI_Datatype datatype; char tmpbuf; @@ -118,18 +126,18 @@ typedef struct { void *buf2; MPI_Op op; MPI_Datatype datatype; - int count; + size_t count; } NBC_Args_op; /* the copy argument struct */ typedef struct { NBC_Fn_type type; - int srccount; + size_t srccount; void *src; void *tgt; MPI_Datatype srctype; MPI_Datatype tgttype; - int tgtcount; + size_t tgtcount; char tmpsrc; char tmptgt; } NBC_Args_copy; @@ -137,7 +145,7 @@ typedef struct { /* unpack operation arguments */ typedef struct { NBC_Fn_type type; - int count; + size_t count; void *inbuf; void *outbuf; MPI_Datatype datatype; @@ -146,15 +154,15 @@ typedef struct { } NBC_Args_unpack; /* internal function prototypes */ -int NBC_Sched_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier); -int NBC_Sched_local_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest,NBC_Schedule *schedule, bool barrier); -int NBC_Sched_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier); -int NBC_Sched_local_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier); -int NBC_Sched_op (const void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, +int NBC_Sched_send (const void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_local_send (const void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int dest,NBC_Schedule *schedule, bool barrier); +int NBC_Sched_recv (void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_local_recv (void* buf, char tmpbuf, size_t count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_op (const void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, size_t count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule, bool barrier); -int NBC_Sched_copy (void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, +int NBC_Sched_copy (void *src, char tmpsrc, size_t srccount, MPI_Datatype srctype, void *tgt, char tmptgt, size_t tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule, bool barrier); -int NBC_Sched_unpack (void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, +int NBC_Sched_unpack (void *inbuf, char tmpinbuf, size_t count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule, bool barrier); int NBC_Sched_barrier (NBC_Schedule *schedule); @@ -564,7 +572,7 @@ static inline void NBC_SchedCache_dictwipe(hb_tree *dict_in, int *size) { #define NBC_IN_PLACE(sendbuf, recvbuf, inplace) \ { \ inplace = 0; \ - if(recvbuf == sendbuf) { \ + if(recvbuf == sendbuf && MPI_BOTTOM != sendbuf) { \ inplace = 1; \ } else \ if(sendbuf == MPI_IN_PLACE) { \ diff --git a/ompi/mca/coll/libnbc/nbc_ireduce.c b/ompi/mca/coll/libnbc/nbc_ireduce.c index 819c92880b9..1e797efa7d0 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce.c @@ -367,7 +367,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen vroot = 0; } RANK2VRANK(rank, vrank, vroot); - maxr = (int)ceil((log((double)p)/LOG2)); + maxr = ceil_of_log2(p); if (rank != root) { inplace = 0; diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c index fbe9b3b3f4b..fdab39e2dfb 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c @@ -12,7 +12,7 @@ * Copyright (c) 2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017-2021 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * @@ -45,7 +45,8 @@ static int nbc_reduce_scatter_init(const void* sendbuf, void* recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module, bool persistent) { - int peer, rank, maxr, p, res, count; + int peer, rank, maxr, p, res; + size_t count; MPI_Aint ext; ptrdiff_t gap, span, span_align; char *sbuf, inplace; @@ -82,7 +83,7 @@ static int nbc_reduce_scatter_init(const void* sendbuf, void* recvbuf, const int return nbc_get_noop_request(persistent, request); } - maxr = (int) ceil ((log((double) p) / LOG2)); + maxr = ceil_of_log2(p); span = opal_datatype_span(&datatype->super, count, &gap); span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t); @@ -161,7 +162,8 @@ static int nbc_reduce_scatter_init(const void* sendbuf, void* recvbuf, const int /* rank 0 is root and sends - all others receive */ if (rank == 0) { - for (long int r = 1, offset = 0 ; r < p ; ++r) { + size_t offset = 0; + for (long int r = 1 ; r < p ; ++r) { offset += recvcounts[r-1]; sbuf = lbuf + (offset*ext); /* root sends the right buffer to the right receiver */ @@ -229,7 +231,8 @@ int ompi_coll_libnbc_ireduce_scatter (const void* sendbuf, void* recvbuf, const static int nbc_reduce_scatter_inter_init (const void* sendbuf, void* recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module, bool persistent) { - int rank, res, count, lsize, rsize; + int rank, res, lsize, rsize; + size_t count; MPI_Aint ext; ptrdiff_t gap, span, span_align; NBC_Schedule *schedule; @@ -313,7 +316,8 @@ static int nbc_reduce_scatter_inter_init (const void* sendbuf, void* recvbuf, co free(tmpbuf); return res; } - for (int peer = 1, offset = recvcounts[0] * ext; peer < lsize ; ++peer) { + size_t offset = recvcounts[0] * ext; + for (int peer = 1; peer < lsize ; ++peer) { res = NBC_Sched_local_send (lbuf + offset, true, recvcounts[peer], datatype, peer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c index 4cc99a3545e..0a2ebfbb0d3 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c @@ -10,7 +10,7 @@ * reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017-2021 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * @@ -43,7 +43,8 @@ static int nbc_reduce_scatter_block_init(const void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module, bool persistent) { - int peer, rank, maxr, p, res, count; + int peer, rank, maxr, p, res; + size_t count; MPI_Aint ext; ptrdiff_t gap, span; char *redbuf, *sbuf, inplace; @@ -67,7 +68,7 @@ static int nbc_reduce_scatter_block_init(const void* sendbuf, void* recvbuf, int return OMPI_ERR_OUT_OF_RESOURCE; } - maxr = (int)ceil((log((double)p)/LOG2)); + maxr = ceil_of_log2(p); count = p * recvcount; @@ -166,7 +167,8 @@ static int nbc_reduce_scatter_block_init(const void* sendbuf, void* recvbuf, int return res; } } else { - for (int r = 1, offset = 0 ; r < p ; ++r) { + size_t offset = 0; + for (int r = 1 ; r < p ; ++r) { offset += recvcount; sbuf = lbuf + (offset*ext); /* root sends the right buffer to the right receiver */ @@ -228,7 +230,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i static int nbc_reduce_scatter_block_inter_init(const void *sendbuf, void *recvbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t **request, mca_coll_base_module_t *module, bool persistent) { - int rank, res, count, lsize, rsize; + int rank, res, lsize, rsize; + size_t count; MPI_Aint ext; ptrdiff_t gap, span, span_align; NBC_Schedule *schedule; diff --git a/ompi/mca/coll/portals4/coll_portals4_allreduce.c b/ompi/mca/coll/portals4/coll_portals4_allreduce.c index 69146cc1668..cae07299903 100644 --- a/ompi/mca/coll/portals4/coll_portals4_allreduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_allreduce.c @@ -96,13 +96,13 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, */ /* Compute match bits */ - COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_cid(comm), 1, 0, + COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_local_cid(comm), 1, 0, COLL_PORTALS4_ALLREDUCE, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), 0, 1, + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_ALLREDUCE, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_ALLREDUCE, 0, internal_count); if ((ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, &request->u.allreduce.trig_ct_h)) != 0) { diff --git a/ompi/mca/coll/portals4/coll_portals4_barrier.c b/ompi/mca/coll/portals4/coll_portals4_barrier.c index d9cf36ae2df..e3232f4158e 100644 --- a/ompi/mca/coll/portals4/coll_portals4_barrier.c +++ b/ompi/mca/coll/portals4/coll_portals4_barrier.c @@ -54,10 +54,10 @@ barrier_hypercube_top(struct ompi_communicator_t *comm, return OMPI_ERR_TEMP_OUT_OF_RESOURCE; } - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_BARRIER, 0, count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_BARRIER, 0, count); /* Build "tree" out of hypercube */ diff --git a/ompi/mca/coll/portals4/coll_portals4_bcast.c b/ompi/mca/coll/portals4/coll_portals4_bcast.c index ed890335d54..f9feb1ffdbd 100644 --- a/ompi/mca/coll/portals4/coll_portals4_bcast.c +++ b/ompi/mca/coll/portals4/coll_portals4_bcast.c @@ -200,13 +200,13 @@ bcast_kary_tree_top(void *buff, int count, } /* Compute match bits */ - COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_cid(comm), 1, 0, + COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_local_cid(comm), 1, 0, COLL_PORTALS4_BCAST, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), 0, 1, + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_BCAST, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_BCAST, 0, internal_count); /* The data will be cut in segment_nb segments. @@ -531,13 +531,13 @@ bcast_pipeline_top(void *buff, int count, } /* Compute match bits */ - COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_cid(comm), 1, 0, + COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_local_cid(comm), 1, 0, COLL_PORTALS4_BCAST, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), 0, 1, + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_BCAST, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_BCAST, 0, internal_count); /* The data will be cut in segment_nb segments. * nb_long segments will have a size of (seg_size + 1) diff --git a/ompi/mca/coll/portals4/coll_portals4_component.c b/ompi/mca/coll/portals4/coll_portals4_component.c index be75230140b..7902ea3c8bf 100644 --- a/ompi/mca/coll/portals4/coll_portals4_component.c +++ b/ompi/mca/coll/portals4/coll_portals4_component.c @@ -661,7 +661,11 @@ portals4_module_enable(mca_coll_base_module_t *module, return OMPI_SUCCESS; } - +#if OPAL_ENABLE_DEBUG +/* These string maps are only used for debugging output. + * They will be compiled-out when OPAL is configured + * without --enable-debug. + */ static char *failtype[] = { "PTL_NI_OK", "PTL_NI_PERM_VIOLATION", @@ -695,6 +699,7 @@ static char *evname[] = { "PTL_EVENT_SEARCH", "PTL_EVENT_LINK" }; +#endif /* Target EQ */ static int diff --git a/ompi/mca/coll/portals4/coll_portals4_gather.c b/ompi/mca/coll/portals4/coll_portals4_gather.c index a386bd2606e..b5a2df973eb 100644 --- a/ompi/mca/coll/portals4/coll_portals4_gather.c +++ b/ompi/mca/coll/portals4/coll_portals4_gather.c @@ -76,7 +76,7 @@ static ompi_coll_portals4_tree_t* ompi_coll_portals4_build_in_order_bmtree( struct ompi_communicator_t* comm, int root ) { - int childs = 0, rank, vrank, vparent, size, mask = 1, remote, i; + int childs = 0, rank, vrank, vparent, size, mask = 1, remote; ompi_coll_portals4_tree_t *bmtree; /* @@ -97,7 +97,7 @@ ompi_coll_portals4_build_in_order_bmtree( struct ompi_communicator_t* comm, bmtree->tree_bmtree = 1; bmtree->tree_root = MPI_UNDEFINED; bmtree->tree_nextsize = MPI_UNDEFINED; - for(i=0;itree_next[i] = -1; } @@ -360,7 +360,7 @@ setup_gather_handles(struct ompi_communicator_t *comm, /**********************************/ /* Setup Gather Handles */ /**********************************/ - COLL_PORTALS4_SET_BITS(request->u.gather.gather_match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(request->u.gather.gather_match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_GATHER, 0, request->u.gather.coll_count); ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, @@ -413,7 +413,7 @@ setup_sync_handles(struct ompi_communicator_t *comm, /**********************************/ /* Setup Sync Handles */ /**********************************/ - COLL_PORTALS4_SET_BITS(request->u.gather.sync_match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(request->u.gather.sync_match_bits, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_GATHER, 0, request->u.gather.coll_count); ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, @@ -520,8 +520,6 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc int vrank=-1; - int32_t i=0; - ompi_coll_portals4_tree_t* bmtree; int32_t expected_ops =0; @@ -606,7 +604,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc "%s:%d: packed_size=%lu, fragment_size=%lu", __FILE__, __LINE__, request->u.gather.packed_size, mca_coll_portals4_component.ni_limits.max_msg_size)); - for (int i =0; i < bmtree->tree_nextsize; i++) { + for (int i = 0; i < bmtree->tree_nextsize; i++) { int child_vrank = VRANK(bmtree->tree_next[i], request->u.gather.root_rank, request->u.gather.size); int sub_tree_size = get_tree_numdescendants_of(comm, child_vrank) + 1; ptl_size_t local_number_of_fragment = ((sub_tree_size * request->u.gather.packed_size) + mca_coll_portals4_component.ni_limits.max_msg_size -1) / mca_coll_portals4_component.ni_limits.max_msg_size; @@ -688,7 +686,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc /************************************/ /* put Recv-ACK to each child */ /************************************/ - for (i=0;itree_nextsize;i++) { + for (int i = 0; i < bmtree->tree_nextsize; i++) { int32_t child=bmtree->tree_next[i]; ret = PtlTriggeredPut(request->u.gather.sync_mdh, 0, @@ -730,7 +728,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc /**********************************/ /* put RTR to each child */ /**********************************/ - for (i=0;itree_nextsize;i++) { + for (int i = 0; i < bmtree->tree_nextsize; i++) { int32_t child=bmtree->tree_next[i]; ret = PtlTriggeredPut(request->u.gather.sync_mdh, 0, @@ -750,7 +748,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc /**********************************/ /* put RTR to each child */ /**********************************/ - for (i=0;itree_nextsize;i++) { + for (int i = 0; i < bmtree->tree_nextsize; i++) { int32_t child=bmtree->tree_next[i]; ret = PtlPut(request->u.gather.sync_mdh, 0, @@ -818,8 +816,6 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct int8_t i_am_root; - int32_t i=0; - int32_t expected_ops =0; int32_t expected_acks=0; @@ -975,7 +971,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct /* root puts Recv-ACK to all other ranks */ /*****************************************/ if (i_am_root) { - for (i=0;iu.gather.size;i++) { + for (int i = 0; i < request->u.gather.size; i++) { if (i == request->u.gather.root_rank) { continue; } ret = PtlTriggeredPut(request->u.gather.sync_mdh, 0, @@ -1019,7 +1015,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct /* root puts RTR to all other ranks */ /************************************/ if (i_am_root) { - for (i=0;iu.gather.size;i++) { + for (int i = 0; i < request->u.gather.size; i++) { if (i == request->u.gather.root_rank) { continue; } ret = PtlTriggeredPut(request->u.gather.sync_mdh, 0, @@ -1041,7 +1037,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct /* root puts RTR to all other ranks */ /************************************/ if (i_am_root) { - for (i=0;iu.gather.size;i++) { + for (int i = 0; i < request->u.gather.size; i++) { if (i == request->u.gather.root_rank) { continue; } ret = PtlPut(request->u.gather.sync_mdh, 0, @@ -1093,7 +1089,6 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm ompi_coll_portals4_request_t *request) { int ret, line; - int i; OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "coll:portals4:gather_intra_binomial_bottom enter rank %d", request->u.gather.my_rank)); @@ -1109,7 +1104,7 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm struct iovec iov; size_t max_data; - for (i=0;iu.gather.size;i++) { + for (int i = 0; i < request->u.gather.size; i++) { uint64_t offset = request->u.gather.unpack_dst_extent * request->u.gather.unpack_dst_count * ((request->u.gather.my_rank + i) % request->u.gather.size); opal_output_verbose(30, ompi_coll_base_framework.framework_output, @@ -1161,7 +1156,6 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm, ompi_coll_portals4_request_t *request) { int ret, line; - int i; OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "coll:portals4:gather_intra_linear_bottom enter rank %d", request->u.gather.my_rank)); @@ -1177,7 +1171,7 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm, struct iovec iov; size_t max_data; - for (i=0;iu.gather.size;i++) { + for (int i = 0; i < request->u.gather.size; i++) { ompi_coll_portals4_create_recv_converter (&request->u.gather.recv_converter, request->u.gather.unpack_dst_buf + (request->u.gather.unpack_dst_extent*request->u.gather.unpack_dst_count*i), ompi_comm_peer_lookup(comm, request->u.gather.my_rank), diff --git a/ompi/mca/coll/portals4/coll_portals4_reduce.c b/ompi/mca/coll/portals4/coll_portals4_reduce.c index fb8a019237a..9072564e6db 100644 --- a/ompi/mca/coll/portals4/coll_portals4_reduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_reduce.c @@ -98,13 +98,13 @@ reduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, */ /* Compute match bits */ - COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_cid(comm), 1, 0, + COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_local_cid(comm), 1, 0, COLL_PORTALS4_REDUCE, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), 0, 1, + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_REDUCE, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_REDUCE, 0, internal_count); if ((ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, &request->u.reduce.trig_ct_h)) != 0) { diff --git a/ompi/mca/coll/portals4/coll_portals4_request.c b/ompi/mca/coll/portals4/coll_portals4_request.c index 001594f5d5c..9bebe56825c 100644 --- a/ompi/mca/coll/portals4/coll_portals4_request.c +++ b/ompi/mca/coll/portals4/coll_portals4_request.c @@ -27,7 +27,7 @@ request_free(struct ompi_request_t **ompi_req) ompi_coll_portals4_request_t *request = (ompi_coll_portals4_request_t*) *ompi_req; - if (true != request->super.req_complete) { + if (!REQUEST_COMPLETE(&request->super)) { return MPI_ERR_REQUEST; } diff --git a/ompi/mca/coll/portals4/coll_portals4_request.h b/ompi/mca/coll/portals4/coll_portals4_request.h index 871e2bb8200..e420e73203c 100644 --- a/ompi/mca/coll/portals4/coll_portals4_request.h +++ b/ompi/mca/coll/portals4/coll_portals4_request.h @@ -179,7 +179,6 @@ OBJ_CLASS_DECLARATION(ompi_coll_portals4_request_t); req = (ompi_coll_portals4_request_t*) item; \ OMPI_REQUEST_INIT(&req->super, false); \ req->super.req_mpi_object.comm = comm; \ - req->super.req_complete = false; \ req->super.req_state = OMPI_REQUEST_ACTIVE; \ } while (0) diff --git a/ompi/mca/coll/portals4/coll_portals4_scatter.c b/ompi/mca/coll/portals4/coll_portals4_scatter.c index c6f7ebbfaa1..2640273b54b 100644 --- a/ompi/mca/coll/portals4/coll_portals4_scatter.c +++ b/ompi/mca/coll/portals4/coll_portals4_scatter.c @@ -133,7 +133,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm, /**********************************/ /* Setup Scatter Handles */ /**********************************/ - COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count); OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, @@ -194,7 +194,7 @@ setup_sync_handles(struct ompi_communicator_t *comm, /**********************************/ /* Setup Sync Handles */ /**********************************/ - COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count); OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index e5750467ee5..ba3c62ce1c4 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -175,8 +175,9 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) are not on this node, then we don't want to run */ if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) || ompi_group_have_remote_peers (comm->c_local_group)) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:comm_query (%d/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", comm->c_contextid, comm->c_name); - return NULL; + "coll:sm:comm_query (%s/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", + ompi_comm_print_cid (comm), comm->c_name); + return NULL; } /* Get the priority level attached to this module. If priority is less @@ -184,8 +185,9 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) *priority = mca_coll_sm_component.sm_priority; if (mca_coll_sm_component.sm_priority < 0) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:comm_query (%d/%s): priority too low; disqualifying myself", comm->c_contextid, comm->c_name); - return NULL; + "coll:sm:comm_query (%s/%s): priority too low; disqualifying myself", + ompi_comm_print_cid (comm), comm->c_name); + return NULL; } sm_module = OBJ_NEW(mca_coll_sm_module_t); @@ -213,8 +215,8 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) sm_module->super.coll_scatterv = NULL; opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:comm_query (%d/%s): pick me! pick me!", - comm->c_contextid, comm->c_name); + "coll:sm:comm_query (%s/%s): pick me! pick me!", + ompi_comm_print_cid (comm), comm->c_name); return &(sm_module->super); } @@ -228,8 +230,8 @@ static int sm_module_enable(mca_coll_base_module_t *module, if (NULL == comm->c_coll->coll_reduce || NULL == comm->c_coll->coll_reduce_module) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): no underlying reduce; disqualifying myself", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): no underlying reduce; disqualifying myself", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERROR; } @@ -265,8 +267,8 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, c->sm_comm_num_segments * 3); if (NULL == maffinity) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): malloc failed (1)", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): malloc failed (1)", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -292,8 +294,8 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, if (NULL == data) { free(maffinity); opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): malloc failed (2)", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): malloc failed (2)", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERR_TEMP_OUT_OF_RESOURCE; } data->mcb_operation_count = 0; @@ -468,24 +470,24 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, /* Wait for everyone in this communicator to attach and setup */ opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): waiting for peers to attach", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): waiting for peers to attach", + ompi_comm_print_cid (comm), comm->c_name); SPIN_CONDITION(size == data->sm_bootstrap_meta->module_seg->seg_inited, seg_init_exit); /* Once we're all here, remove the mmap file; it's not needed anymore */ if (0 == rank) { unlink(data->sm_bootstrap_meta->shmem_ds.seg_name); opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): removed mmap file %s", - comm->c_contextid, comm->c_name, + "coll:sm:enable (%s/%s): removed mmap file %s", + ompi_comm_print_cid (comm), comm->c_name, data->sm_bootstrap_meta->shmem_ds.seg_name); } /* All done */ opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): success!", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): success!", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_SUCCESS; } @@ -518,12 +520,12 @@ static int bootstrap_comm(ompi_communicator_t *comm, lowest_name = OMPI_CAST_RTE_NAME(&proc->super.proc_name); } } - opal_asprintf(&shortpath, "coll-sm-cid-%d-name-%s.mmap", comm->c_contextid, + opal_asprintf(&shortpath, "coll-sm-cid-%s-name-%s.mmap", ompi_comm_print_cid (comm), OMPI_NAME_PRINT(lowest_name)); if (NULL == shortpath) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable:bootstrap comm (%d/%s): asprintf failed", - comm->c_contextid, comm->c_name); + "coll:sm:enable:bootstrap comm (%s/%s): asprintf failed", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERR_OUT_OF_RESOURCE; } fullpath = opal_os_path(false, ompi_process_info.job_session_dir, @@ -531,8 +533,8 @@ static int bootstrap_comm(ompi_communicator_t *comm, free(shortpath); if (NULL == fullpath) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable:bootstrap comm (%d/%s): opal_os_path failed", - comm->c_contextid, comm->c_name); + "coll:sm:enable:bootstrap comm (%s/%s): opal_os_path failed", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -562,14 +564,14 @@ static int bootstrap_comm(ompi_communicator_t *comm, (num_segments * (comm_size * control_size * 2)) + (num_segments * (comm_size * frag_size)); opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable:bootstrap comm (%d/%s): attaching to %" PRIsize_t " byte mmap: %s", - comm->c_contextid, comm->c_name, size, fullpath); + "coll:sm:enable:bootstrap comm (%s/%s): attaching to %" PRIsize_t " byte mmap: %s", + ompi_comm_print_cid (comm), comm->c_name, size, fullpath); if (0 == ompi_comm_rank (comm)) { data->sm_bootstrap_meta = mca_common_sm_module_create_and_attach (size, fullpath, sizeof(mca_common_sm_seg_header_t), 8); if (NULL == data->sm_bootstrap_meta) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable:bootstrap comm (%d/%s): mca_common_sm_init_group failed", - comm->c_contextid, comm->c_name); + "coll:sm:enable:bootstrap comm (%s/%s): mca_common_sm_init_group failed", + ompi_comm_print_cid (comm), comm->c_name); free(fullpath); return OMPI_ERR_OUT_OF_RESOURCE; } diff --git a/ompi/mca/coll/sm/memory-layout.ppt b/ompi/mca/coll/sm/memory-layout.ppt deleted file mode 100644 index 6151678ac27..00000000000 Binary files a/ompi/mca/coll/sm/memory-layout.ppt and /dev/null differ diff --git a/ompi/mca/coll/sm/memory-layout.pptx b/ompi/mca/coll/sm/memory-layout.pptx new file mode 100644 index 00000000000..236f2aebbe6 Binary files /dev/null and b/ompi/mca/coll/sm/memory-layout.pptx differ diff --git a/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c b/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c index 4dd6bd35d3b..1ae5f506ef1 100644 --- a/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c @@ -40,6 +40,7 @@ static const mca_base_var_enum_value_t allgather_algorithms[] = { {4, "ring"}, {5, "neighbor"}, {6, "two_proc"}, + {7, "sparbit"}, {0, NULL} }; @@ -78,7 +79,7 @@ ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca mca_param_indices->algorithm_param_index = mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "allgather_algorithm", - "Which allgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 bruck, 3 recursive doubling, 4 ring, 5 neighbor exchange, 6: two proc only. " + "Which allgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 bruck, 3 recursive doubling, 4 ring, 5 neighbor exchange, 6: two proc only, 7: sparbit. " "Only relevant if coll_tuned_use_dynamic_rules is true.", MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, @@ -163,6 +164,10 @@ int ompi_coll_tuned_allgather_intra_do_this(const void *sbuf, int scount, return ompi_coll_base_allgather_intra_two_procs(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module); + case (7): + return ompi_coll_base_allgather_intra_sparbit(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); } /* switch */ OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allgather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", diff --git a/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c b/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c index cd2f3561e52..4c53b6b5ee0 100644 --- a/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c @@ -39,6 +39,7 @@ static const mca_base_var_enum_value_t allgatherv_algorithms[] = { {3, "ring"}, {4, "neighbor"}, {5, "two_proc"}, + {6, "sparbit"}, {0, NULL} }; @@ -77,7 +78,7 @@ ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mc mca_param_indices->algorithm_param_index = mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "allgatherv_algorithm", - "Which allgatherv algorithm is used. Can be locked down to choice of: 0 ignore, 1 default (allgathervv + bcast), 2 bruck, 3 ring, 4 neighbor exchange, 5: two proc only. " + "Which allgatherv algorithm is used. Can be locked down to choice of: 0 ignore, 1 default (allgathervv + bcast), 2 bruck, 3 ring, 4 neighbor exchange, 5: two proc only, 6: sparbit. " "Only relevant if coll_tuned_use_dynamic_rules is true.", MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, @@ -160,6 +161,10 @@ int ompi_coll_tuned_allgatherv_intra_do_this(const void *sbuf, int scount, return ompi_coll_base_allgatherv_intra_two_procs(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm, module); + case (6): + return ompi_coll_base_allgatherv_intra_sparbit(sbuf, scount, sdtype, + rbuf, rcounts, rdispls, rdtype, + comm, module); } /* switch */ OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allgatherv_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", diff --git a/ompi/mca/coll/ucc/Makefile.am b/ompi/mca/coll/ucc/Makefile.am index eec2507707c..c66ffa47694 100644 --- a/ompi/mca/coll/ucc/Makefile.am +++ b/ompi/mca/coll/ucc/Makefile.am @@ -16,13 +16,17 @@ coll_ucc_sources = \ coll_ucc.h \ coll_ucc_debug.h \ coll_ucc_dtypes.h \ + coll_ucc_common.h \ coll_ucc_module.c \ coll_ucc_component.c \ coll_ucc_barrier.c \ coll_ucc_bcast.c \ coll_ucc_allreduce.c \ + coll_ucc_reduce.c \ coll_ucc_alltoall.c \ - coll_ucc_alltoallv.c + coll_ucc_alltoallv.c \ + coll_ucc_allgather.c \ + coll_ucc_allgatherv.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/ompi/mca/coll/ucc/coll_ucc.h b/ompi/mca/coll/ucc/coll_ucc.h index 375f11f2b6c..0525dc0d741 100644 --- a/ompi/mca/coll/ucc/coll_ucc.h +++ b/ompi/mca/coll/ucc/coll_ucc.h @@ -26,10 +26,11 @@ BEGIN_C_DECLS #define COLL_UCC_CTS (UCC_COLL_TYPE_BARRIER | UCC_COLL_TYPE_BCAST | \ UCC_COLL_TYPE_ALLREDUCE | UCC_COLL_TYPE_ALLTOALL | \ - UCC_COLL_TYPE_ALLTOALLV) + UCC_COLL_TYPE_ALLTOALLV | UCC_COLL_TYPE_ALLGATHER | \ + UCC_COLL_TYPE_REDUCE | UCC_COLL_TYPE_ALLGATHERV) -#define COLL_UCC_CTS_STR "barrier,bcast,allreduce,alltoall,alltoallv," \ - "ibarrier,ibcast,iallreduce,ialltoall,ialltoallv" +#define COLL_UCC_CTS_STR "barrier,bcast,allreduce,alltoall,alltoallv,allgather,allgatherv,reduce," \ + "ibarrier,ibcast,iallreduce,ialltoall,ialltoallv,iallgather,iallgatherv,ireduce" typedef struct mca_coll_ucc_req { ompi_request_t super; @@ -63,30 +64,42 @@ OMPI_MODULE_DECLSPEC extern mca_coll_ucc_component_t mca_coll_ucc_component; * UCC enabled communicator */ struct mca_coll_ucc_module_t { - mca_coll_base_module_t super; - ompi_communicator_t* comm; - int rank; - ucc_team_h ucc_team; - mca_coll_base_module_allreduce_fn_t previous_allreduce; - mca_coll_base_module_t* previous_allreduce_module; - mca_coll_base_module_iallreduce_fn_t previous_iallreduce; - mca_coll_base_module_t* previous_iallreduce_module; - mca_coll_base_module_barrier_fn_t previous_barrier; - mca_coll_base_module_t* previous_barrier_module; - mca_coll_base_module_ibarrier_fn_t previous_ibarrier; - mca_coll_base_module_t* previous_ibarrier_module; - mca_coll_base_module_bcast_fn_t previous_bcast; - mca_coll_base_module_t* previous_bcast_module; - mca_coll_base_module_ibcast_fn_t previous_ibcast; - mca_coll_base_module_t* previous_ibcast_module; - mca_coll_base_module_alltoall_fn_t previous_alltoall; - mca_coll_base_module_t* previous_alltoall_module; - mca_coll_base_module_ialltoall_fn_t previous_ialltoall; - mca_coll_base_module_t* previous_ialltoall_module; - mca_coll_base_module_alltoallv_fn_t previous_alltoallv; - mca_coll_base_module_t* previous_alltoallv_module; - mca_coll_base_module_ialltoallv_fn_t previous_ialltoallv; - mca_coll_base_module_t* previous_ialltoallv_module; + mca_coll_base_module_t super; + ompi_communicator_t* comm; + int rank; + ucc_team_h ucc_team; + mca_coll_base_module_allreduce_fn_t previous_allreduce; + mca_coll_base_module_t* previous_allreduce_module; + mca_coll_base_module_iallreduce_fn_t previous_iallreduce; + mca_coll_base_module_t* previous_iallreduce_module; + mca_coll_base_module_reduce_fn_t previous_reduce; + mca_coll_base_module_t* previous_reduce_module; + mca_coll_base_module_ireduce_fn_t previous_ireduce; + mca_coll_base_module_t* previous_ireduce_module; + mca_coll_base_module_barrier_fn_t previous_barrier; + mca_coll_base_module_t* previous_barrier_module; + mca_coll_base_module_ibarrier_fn_t previous_ibarrier; + mca_coll_base_module_t* previous_ibarrier_module; + mca_coll_base_module_bcast_fn_t previous_bcast; + mca_coll_base_module_t* previous_bcast_module; + mca_coll_base_module_ibcast_fn_t previous_ibcast; + mca_coll_base_module_t* previous_ibcast_module; + mca_coll_base_module_alltoall_fn_t previous_alltoall; + mca_coll_base_module_t* previous_alltoall_module; + mca_coll_base_module_ialltoall_fn_t previous_ialltoall; + mca_coll_base_module_t* previous_ialltoall_module; + mca_coll_base_module_alltoallv_fn_t previous_alltoallv; + mca_coll_base_module_t* previous_alltoallv_module; + mca_coll_base_module_ialltoallv_fn_t previous_ialltoallv; + mca_coll_base_module_t* previous_ialltoallv_module; + mca_coll_base_module_allgather_fn_t previous_allgather; + mca_coll_base_module_t* previous_allgather_module; + mca_coll_base_module_iallgather_fn_t previous_iallgather; + mca_coll_base_module_t* previous_iallgather_module; + mca_coll_base_module_allgatherv_fn_t previous_allgatherv; + mca_coll_base_module_t* previous_allgatherv_module; + mca_coll_base_module_iallgatherv_fn_t previous_iallgatherv; + mca_coll_base_module_t* previous_iallgatherv_module; }; typedef struct mca_coll_ucc_module_t mca_coll_ucc_module_t; OBJ_CLASS_DECLARATION(mca_coll_ucc_module_t); @@ -105,6 +118,17 @@ int mca_coll_ucc_iallreduce(const void *sbuf, void *rbuf, int count, ompi_request_t** request, mca_coll_base_module_t *module); +int mca_coll_ucc_reduce(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, + int root, struct ompi_communicator_t *comm, + struct mca_coll_base_module_2_4_0_t *module); + +int mca_coll_ucc_ireduce(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, + int root, struct ompi_communicator_t *comm, + ompi_request_t** request, + struct mca_coll_base_module_2_4_0_t *module); + int mca_coll_ucc_barrier(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); @@ -146,5 +170,30 @@ int mca_coll_ucc_ialltoallv(const void *sbuf, const int *scounts, const int *sdi struct ompi_communicator_t *comm, ompi_request_t** request, mca_coll_base_module_t *module); + +int mca_coll_ucc_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_ucc_iallgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t** request, + mca_coll_base_module_t *module); + +int mca_coll_ucc_allgatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void* rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_ucc_iallgatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void* rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t** request, + mca_coll_base_module_t *module); + END_C_DECLS #endif diff --git a/ompi/mca/coll/ucc/coll_ucc_allgather.c b/ompi/mca/coll/ucc/coll_ucc_allgather.c new file mode 100644 index 00000000000..b620d9529a2 --- /dev/null +++ b/ompi/mca/coll/ucc/coll_ucc_allgather.c @@ -0,0 +1,108 @@ + +/** + * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + */ + +#include "coll_ucc_common.h" + +static inline ucc_status_t mca_coll_ucc_allgather_init(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, struct ompi_datatype_t *rdtype, + mca_coll_ucc_module_t *ucc_module, + ucc_coll_req_h *req, + mca_coll_ucc_req_t *coll_req) +{ + ucc_datatype_t ucc_sdt, ucc_rdt; + int comm_size = ompi_comm_size(ucc_module->comm); + + if (!ompi_datatype_is_contiguous_memory_layout(sdtype, scount) || + !ompi_datatype_is_contiguous_memory_layout(rdtype, rcount * comm_size)) { + goto fallback; + } + ucc_sdt = ompi_dtype_to_ucc_dtype(sdtype); + ucc_rdt = ompi_dtype_to_ucc_dtype(rdtype); + if (COLL_UCC_DT_UNSUPPORTED == ucc_sdt || + COLL_UCC_DT_UNSUPPORTED == ucc_rdt) { + UCC_VERBOSE(5, "ompi_datatype is not supported: dtype = %s", + (COLL_UCC_DT_UNSUPPORTED == ucc_sdt) ? + sdtype->super.name : rdtype->super.name); + goto fallback; + } + + ucc_coll_args_t coll = { + .mask = 0, + .coll_type = UCC_COLL_TYPE_ALLGATHER, + .src.info = { + .buffer = (void*)sbuf, + .count = scount, + .datatype = ucc_sdt, + .mem_type = UCC_MEMORY_TYPE_UNKNOWN + }, + .dst.info = { + .buffer = (void*)rbuf, + .count = rcount * comm_size, + .datatype = ucc_rdt, + .mem_type = UCC_MEMORY_TYPE_UNKNOWN + } + }; + + if (MPI_IN_PLACE == sbuf) { + coll.mask = UCC_COLL_ARGS_FIELD_FLAGS; + coll.flags = UCC_COLL_ARGS_FLAG_IN_PLACE; + } + COLL_UCC_REQ_INIT(coll_req, req, coll, ucc_module); + return UCC_OK; +fallback: + return UCC_ERR_NOT_SUPPORTED; +} + +int mca_coll_ucc_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; + ucc_coll_req_h req; + + UCC_VERBOSE(3, "running ucc allgather"); + COLL_UCC_CHECK(mca_coll_ucc_allgather_init(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + ucc_module, &req, NULL)); + COLL_UCC_POST_AND_CHECK(req); + COLL_UCC_CHECK(coll_ucc_req_wait(req)); + return OMPI_SUCCESS; +fallback: + UCC_VERBOSE(3, "running fallback allgather"); + return ucc_module->previous_allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, + comm, ucc_module->previous_allgather_module); +} + +int mca_coll_ucc_iallgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t** request, + mca_coll_base_module_t *module) +{ + mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; + ucc_coll_req_h req; + mca_coll_ucc_req_t *coll_req = NULL; + + UCC_VERBOSE(3, "running ucc iallgather"); + COLL_UCC_GET_REQ(coll_req); + COLL_UCC_CHECK(mca_coll_ucc_allgather_init(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + ucc_module, &req, coll_req)); + COLL_UCC_POST_AND_CHECK(req); + *request = &coll_req->super; + return OMPI_SUCCESS; +fallback: + UCC_VERBOSE(3, "running fallback iallgather"); + if (coll_req) { + mca_coll_ucc_req_free((ompi_request_t **)&coll_req); + } + return ucc_module->previous_iallgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, + comm, request, ucc_module->previous_iallgather_module); +} diff --git a/ompi/mca/coll/ucc/coll_ucc_allgatherv.c b/ompi/mca/coll/ucc/coll_ucc_allgatherv.c new file mode 100644 index 00000000000..6cf33a5dd80 --- /dev/null +++ b/ompi/mca/coll/ucc/coll_ucc_allgatherv.c @@ -0,0 +1,113 @@ + +/** + * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + */ + +#include "coll_ucc_common.h" + +static inline ucc_status_t mca_coll_ucc_allgatherv_init(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, + mca_coll_ucc_module_t *ucc_module, + ucc_coll_req_h *req, + mca_coll_ucc_req_t *coll_req) +{ + ucc_datatype_t ucc_sdt, ucc_rdt; + + ucc_sdt = ompi_dtype_to_ucc_dtype(sdtype); + ucc_rdt = ompi_dtype_to_ucc_dtype(rdtype); + if (COLL_UCC_DT_UNSUPPORTED == ucc_sdt || + COLL_UCC_DT_UNSUPPORTED == ucc_rdt) { + UCC_VERBOSE(5, "ompi_datatype is not supported: dtype = %s", + (COLL_UCC_DT_UNSUPPORTED == ucc_sdt) ? + sdtype->super.name : rdtype->super.name); + goto fallback; + } + + ucc_coll_args_t coll = { + .mask = 0, + .coll_type = UCC_COLL_TYPE_ALLGATHERV, + .src.info = { + .buffer = (void*)sbuf, + .count = scount, + .datatype = ucc_sdt, + .mem_type = UCC_MEMORY_TYPE_UNKNOWN + }, + .dst.info_v = { + .buffer = (void*)rbuf, + .counts = (ucc_count_t*)rcounts, + .displacements = (ucc_aint_t*)rdisps, + .datatype = ucc_rdt, + .mem_type = UCC_MEMORY_TYPE_UNKNOWN + } + }; + + if (MPI_IN_PLACE == sbuf) { + coll.mask = UCC_COLL_ARGS_FIELD_FLAGS; + coll.flags = UCC_COLL_ARGS_FLAG_IN_PLACE; + } + COLL_UCC_REQ_INIT(coll_req, req, coll, ucc_module); + return UCC_OK; +fallback: + return UCC_ERR_NOT_SUPPORTED; +} + +int mca_coll_ucc_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; + ucc_coll_req_h req; + + UCC_VERBOSE(3, "running ucc allgatherv"); + + COLL_UCC_CHECK(mca_coll_ucc_allgatherv_init(sbuf, scount, sdtype, + rbuf, rcounts, rdisps, rdtype, + ucc_module, &req, NULL)); + COLL_UCC_POST_AND_CHECK(req); + COLL_UCC_CHECK(coll_ucc_req_wait(req)); + return OMPI_SUCCESS; +fallback: + UCC_VERBOSE(3, "running fallback allgatherv"); + return ucc_module->previous_allgatherv(sbuf, scount, sdtype, + rbuf, rcounts, rdisps, rdtype, + comm, ucc_module->previous_allgatherv_module); +} + +int mca_coll_ucc_iallgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t** request, + mca_coll_base_module_t *module) +{ + mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; + ucc_coll_req_h req; + mca_coll_ucc_req_t *coll_req = NULL; + + UCC_VERBOSE(3, "running ucc iallgatherv"); + COLL_UCC_GET_REQ(coll_req); + COLL_UCC_CHECK(mca_coll_ucc_allgatherv_init(sbuf, scount, sdtype, + rbuf, rcounts, rdisps, rdtype, + ucc_module, &req, coll_req)); + COLL_UCC_POST_AND_CHECK(req); + *request = &coll_req->super; + return OMPI_SUCCESS; +fallback: + UCC_VERBOSE(3, "running fallback iallgatherv"); + if (coll_req) { + mca_coll_ucc_req_free((ompi_request_t **)&coll_req); + } + return ucc_module->previous_iallgatherv(sbuf, scount, sdtype, + rbuf, rcounts, rdisps, rdtype, + comm, request, ucc_module->previous_iallgatherv_module); +} diff --git a/ompi/mca/coll/ucc/coll_ucc_allreduce.c b/ompi/mca/coll/ucc/coll_ucc_allreduce.c index dd902b7dbb1..a46a59db337 100644 --- a/ompi/mca/coll/ucc/coll_ucc_allreduce.c +++ b/ompi/mca/coll/ucc/coll_ucc_allreduce.c @@ -31,7 +31,7 @@ static inline ucc_status_t mca_coll_ucc_allreduce_init(const void *sbuf, void *r goto fallback; } ucc_coll_args_t coll = { - .mask = UCC_COLL_ARGS_FIELD_PREDEFINED_REDUCTIONS, + .mask = 0, .coll_type = UCC_COLL_TYPE_ALLREDUCE, .src.info = { .buffer = (void*)sbuf, @@ -41,11 +41,11 @@ static inline ucc_status_t mca_coll_ucc_allreduce_init(const void *sbuf, void *r }, .dst.info = { .buffer = rbuf, + .count = count, + .datatype = ucc_dt, .mem_type = UCC_MEMORY_TYPE_UNKNOWN }, - .reduce = { - .predefined_op = ucc_op, - }, + .op = ucc_op, }; if (MPI_IN_PLACE == sbuf) { coll.mask |= UCC_COLL_ARGS_FIELD_FLAGS; @@ -68,7 +68,7 @@ int mca_coll_ucc_allreduce(const void *sbuf, void *rbuf, int count, UCC_VERBOSE(3, "running ucc allreduce"); COLL_UCC_CHECK(mca_coll_ucc_allreduce_init(sbuf, rbuf, count, dtype, op, ucc_module, &req, NULL)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); COLL_UCC_CHECK(coll_ucc_req_wait(req)); return OMPI_SUCCESS; fallback: @@ -85,17 +85,20 @@ int mca_coll_ucc_iallreduce(const void *sbuf, void *rbuf, int count, { mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; ucc_coll_req_h req; - mca_coll_ucc_req_t *coll_req; + mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iallreduce"); COLL_UCC_GET_REQ(coll_req); COLL_UCC_CHECK(mca_coll_ucc_allreduce_init(sbuf, rbuf, count, dtype, op, ucc_module, &req, coll_req)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); *request = &coll_req->super; return OMPI_SUCCESS; fallback: - UCC_VERBOSE(3, "running fallback allreduce"); + UCC_VERBOSE(3, "running fallback iallreduce"); + if (coll_req) { + mca_coll_ucc_req_free((ompi_request_t **)&coll_req); + } return ucc_module->previous_iallreduce(sbuf, rbuf, count, dtype, op, comm, request, ucc_module->previous_iallreduce_module); } diff --git a/ompi/mca/coll/ucc/coll_ucc_alltoall.c b/ompi/mca/coll/ucc/coll_ucc_alltoall.c index 6553848a5a3..d6871f23b69 100644 --- a/ompi/mca/coll/ucc/coll_ucc_alltoall.c +++ b/ompi/mca/coll/ucc/coll_ucc_alltoall.c @@ -15,11 +15,11 @@ static inline ucc_status_t mca_coll_ucc_alltoall_init(const void *sbuf, int scou ucc_coll_req_h *req, mca_coll_ucc_req_t *coll_req) { - size_t dt_size; ucc_datatype_t ucc_sdt, ucc_rdt; + int comm_size = ompi_comm_size(ucc_module->comm); - if (!ompi_datatype_is_contiguous_memory_layout(sdtype, scount) || - !ompi_datatype_is_contiguous_memory_layout(rdtype, rcount)) { + if (!ompi_datatype_is_contiguous_memory_layout(sdtype, scount * comm_size) || + !ompi_datatype_is_contiguous_memory_layout(rdtype, rcount * comm_size)) { goto fallback; } ucc_sdt = ompi_dtype_to_ucc_dtype(sdtype); @@ -32,19 +32,18 @@ static inline ucc_status_t mca_coll_ucc_alltoall_init(const void *sbuf, int scou goto fallback; } - opal_datatype_type_size(&sdtype->super, &dt_size); ucc_coll_args_t coll = { .mask = 0, .coll_type = UCC_COLL_TYPE_ALLTOALL, .src.info = { .buffer = (void*)sbuf, - .count = scount, + .count = scount * comm_size, .datatype = ucc_sdt, .mem_type = UCC_MEMORY_TYPE_UNKNOWN }, .dst.info = { .buffer = (void*)rbuf, - .count = rcount, + .count = rcount * comm_size, .datatype = ucc_rdt, .mem_type = UCC_MEMORY_TYPE_UNKNOWN } @@ -72,7 +71,7 @@ int mca_coll_ucc_alltoall(const void *sbuf, int scount, struct ompi_datatype_t * COLL_UCC_CHECK(mca_coll_ucc_alltoall_init(sbuf, scount, sdtype, rbuf, rcount, rdtype, ucc_module, &req, NULL)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); COLL_UCC_CHECK(coll_ucc_req_wait(req)); return OMPI_SUCCESS; fallback: @@ -89,18 +88,21 @@ int mca_coll_ucc_ialltoall(const void *sbuf, int scount, struct ompi_datatype_t { mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; ucc_coll_req_h req; - mca_coll_ucc_req_t *coll_req; + mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ialltoall"); COLL_UCC_GET_REQ(coll_req); COLL_UCC_CHECK(mca_coll_ucc_alltoall_init(sbuf, scount, sdtype, rbuf, rcount, rdtype, ucc_module, &req, coll_req)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); *request = &coll_req->super; return OMPI_SUCCESS; fallback: UCC_VERBOSE(3, "running fallback ialltoall"); + if (coll_req) { + mca_coll_ucc_req_free((ompi_request_t **)&coll_req); + } return ucc_module->previous_ialltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, request, ucc_module->previous_ialltoall_module); } diff --git a/ompi/mca/coll/ucc/coll_ucc_alltoallv.c b/ompi/mca/coll/ucc/coll_ucc_alltoallv.c index ded2f9d551e..75b0dd6b6b7 100644 --- a/ompi/mca/coll/ucc/coll_ucc_alltoallv.c +++ b/ompi/mca/coll/ucc/coll_ucc_alltoallv.c @@ -17,7 +17,6 @@ static inline ucc_status_t mca_coll_ucc_alltoallv_init(const void *sbuf, const i ucc_coll_req_h *req, mca_coll_ucc_req_t *coll_req) { - size_t dt_size; ucc_datatype_t ucc_sdt, ucc_rdt; ucc_sdt = ompi_dtype_to_ucc_dtype(sdtype); @@ -30,12 +29,9 @@ static inline ucc_status_t mca_coll_ucc_alltoallv_init(const void *sbuf, const i goto fallback; } - opal_datatype_type_size(&sdtype->super, &dt_size); ucc_coll_args_t coll = { - .mask = UCC_COLL_ARGS_FIELD_FLAGS, + .mask = 0, .coll_type = UCC_COLL_TYPE_ALLTOALLV, - .flags = UCC_COLL_ARGS_FLAG_CONTIG_SRC_BUFFER | - UCC_COLL_ARGS_FLAG_CONTIG_DST_BUFFER, .src.info_v = { .buffer = (void*)sbuf, .counts = (ucc_count_t*)scounts, @@ -53,6 +49,7 @@ static inline ucc_status_t mca_coll_ucc_alltoallv_init(const void *sbuf, const i }; if (MPI_IN_PLACE == sbuf) { + coll.mask = UCC_COLL_ARGS_FIELD_FLAGS; coll.flags = UCC_COLL_ARGS_FLAG_IN_PLACE; } COLL_UCC_REQ_INIT(coll_req, req, coll, ucc_module); @@ -76,7 +73,7 @@ int mca_coll_ucc_alltoallv(const void *sbuf, const int *scounts, COLL_UCC_CHECK(mca_coll_ucc_alltoallv_init(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, ucc_module, &req, NULL)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); COLL_UCC_CHECK(coll_ucc_req_wait(req)); return OMPI_SUCCESS; fallback: @@ -96,18 +93,21 @@ int mca_coll_ucc_ialltoallv(const void *sbuf, const int *scounts, { mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; ucc_coll_req_h req; - mca_coll_ucc_req_t *coll_req; + mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ialltoallv"); COLL_UCC_GET_REQ(coll_req); COLL_UCC_CHECK(mca_coll_ucc_alltoallv_init(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, ucc_module, &req, coll_req)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); *request = &coll_req->super; return OMPI_SUCCESS; fallback: UCC_VERBOSE(3, "running fallback ialltoallv"); + if (coll_req) { + mca_coll_ucc_req_free((ompi_request_t **)&coll_req); + } return ucc_module->previous_ialltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, request, ucc_module->previous_ialltoallv_module); diff --git a/ompi/mca/coll/ucc/coll_ucc_barrier.c b/ompi/mca/coll/ucc/coll_ucc_barrier.c index fb95fb9f9e8..fdbc11b49aa 100644 --- a/ompi/mca/coll/ucc/coll_ucc_barrier.c +++ b/ompi/mca/coll/ucc/coll_ucc_barrier.c @@ -30,7 +30,7 @@ int mca_coll_ucc_barrier(struct ompi_communicator_t *comm, UCC_VERBOSE(3, "running ucc barrier"); COLL_UCC_CHECK(mca_coll_ucc_barrier_init(ucc_module, &req, NULL)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); COLL_UCC_CHECK(coll_ucc_req_wait(req)); return OMPI_SUCCESS; fallback: @@ -44,16 +44,19 @@ int mca_coll_ucc_ibarrier(struct ompi_communicator_t *comm, { mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; ucc_coll_req_h req; - mca_coll_ucc_req_t *coll_req; + mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ibarrier"); COLL_UCC_GET_REQ(coll_req); COLL_UCC_CHECK(mca_coll_ucc_barrier_init(ucc_module, &req, coll_req)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); *request = &coll_req->super; return OMPI_SUCCESS; fallback: UCC_VERBOSE(3, "running fallback ibarrier"); + if (coll_req) { + mca_coll_ucc_req_free((ompi_request_t **)&coll_req); + } return ucc_module->previous_ibarrier(comm, request, ucc_module->previous_ibarrier_module); } diff --git a/ompi/mca/coll/ucc/coll_ucc_bcast.c b/ompi/mca/coll/ucc/coll_ucc_bcast.c index 2388c74f838..661f336e64d 100644 --- a/ompi/mca/coll/ucc/coll_ucc_bcast.c +++ b/ompi/mca/coll/ucc/coll_ucc_bcast.c @@ -45,7 +45,7 @@ int mca_coll_ucc_bcast(void *buf, int count, struct ompi_datatype_t *dtype, UCC_VERBOSE(3, "running ucc bcast"); COLL_UCC_CHECK(mca_coll_ucc_bcast_init(buf, count, dtype, root, ucc_module, &req, NULL)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); COLL_UCC_CHECK(coll_ucc_req_wait(req)); return OMPI_SUCCESS; fallback: @@ -61,17 +61,20 @@ int mca_coll_ucc_ibcast(void *buf, int count, struct ompi_datatype_t *dtype, { mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; ucc_coll_req_h req; - mca_coll_ucc_req_t *coll_req; + mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ibcast"); COLL_UCC_GET_REQ(coll_req); COLL_UCC_CHECK(mca_coll_ucc_bcast_init(buf, count, dtype, root, ucc_module, &req, coll_req)); - COLL_UCC_CHECK(ucc_collective_post(req)); + COLL_UCC_POST_AND_CHECK(req); *request = &coll_req->super; return OMPI_SUCCESS; fallback: UCC_VERBOSE(3, "running fallback ibcast"); + if (coll_req) { + mca_coll_ucc_req_free((ompi_request_t **)&coll_req); + } return ucc_module->previous_ibcast(buf, count, dtype, root, comm, request, ucc_module->previous_ibcast_module); } diff --git a/ompi/mca/coll/ucc/coll_ucc_common.h b/ompi/mca/coll/ucc/coll_ucc_common.h index ffcb2de8b8f..5ae034d7ff5 100644 --- a/ompi/mca/coll/ucc/coll_ucc_common.h +++ b/ompi/mca/coll/ucc/coll_ucc_common.h @@ -18,6 +18,13 @@ } \ } while(0) +#define COLL_UCC_POST_AND_CHECK(_req) do { \ + if (UCC_OK != ucc_collective_post(_req)) { \ + ucc_collective_finalize(_req); \ + goto fallback; \ + } \ + } while(0) + #define COLL_UCC_GET_REQ(_coll_req) do { \ opal_free_list_item_t *item; \ item = opal_free_list_wait (&mca_coll_ucc_component.requests); \ @@ -55,6 +62,7 @@ static inline ucc_status_t coll_ucc_req_wait(ucc_coll_req_h req) if (status < 0) { UCC_ERROR("ucc_collective_test failed: %s", ucc_status_string(status)); + ucc_collective_finalize(req); return status; } ucc_context_progress(mca_coll_ucc_component.ucc_context); diff --git a/ompi/mca/coll/ucc/coll_ucc_component.c b/ompi/mca/coll/ucc/coll_ucc_component.c index 985a30b5486..341a76fad0d 100644 --- a/ompi/mca/coll/ucc/coll_ucc_component.c +++ b/ompi/mca/coll/ucc/coll_ucc_component.c @@ -114,6 +114,12 @@ static ucc_coll_type_t mca_coll_ucc_str_to_type(const char *str) return UCC_COLL_TYPE_ALLTOALL; } else if (0 == strcasecmp(str, "alltoallv")) { return UCC_COLL_TYPE_ALLTOALLV; + } else if (0 == strcasecmp(str, "allgather")) { + return UCC_COLL_TYPE_ALLGATHER; + } else if (0 == strcasecmp(str, "allgatherv")) { + return UCC_COLL_TYPE_ALLGATHERV; + } else if (0 == strcasecmp(str, "reduce")) { + return UCC_COLL_TYPE_REDUCE; } UCC_ERROR("incorrect value for cts: %s, allowed: %s", str, COLL_UCC_CTS_STR); diff --git a/ompi/mca/coll/ucc/coll_ucc_module.c b/ompi/mca/coll/ucc/coll_ucc_module.c index 1a3ca719648..ec16261cb6d 100644 --- a/ompi/mca/coll/ucc/coll_ucc_module.c +++ b/ompi/mca/coll/ucc/coll_ucc_module.c @@ -27,17 +27,23 @@ int mca_coll_ucc_init_query(bool enable_progress_threads, bool enable_mpi_thread static void mca_coll_ucc_module_clear(mca_coll_ucc_module_t *ucc_module) { - ucc_module->ucc_team = NULL; - ucc_module->previous_allreduce = NULL; - ucc_module->previous_iallreduce = NULL; - ucc_module->previous_barrier = NULL; - ucc_module->previous_ibarrier = NULL; - ucc_module->previous_bcast = NULL; - ucc_module->previous_ibcast = NULL; - ucc_module->previous_alltoall = NULL; - ucc_module->previous_ialltoall = NULL; - ucc_module->previous_alltoallv = NULL; - ucc_module->previous_ialltoallv = NULL; + ucc_module->ucc_team = NULL; + ucc_module->previous_allreduce = NULL; + ucc_module->previous_iallreduce = NULL; + ucc_module->previous_barrier = NULL; + ucc_module->previous_ibarrier = NULL; + ucc_module->previous_bcast = NULL; + ucc_module->previous_ibcast = NULL; + ucc_module->previous_alltoall = NULL; + ucc_module->previous_ialltoall = NULL; + ucc_module->previous_alltoallv = NULL; + ucc_module->previous_ialltoallv = NULL; + ucc_module->previous_allgather = NULL; + ucc_module->previous_iallgather = NULL; + ucc_module->previous_allgatherv = NULL; + ucc_module->previous_iallgatherv = NULL; + ucc_module->previous_reduce = NULL; + ucc_module->previous_ireduce = NULL; } static void mca_coll_ucc_module_construct(mca_coll_ucc_module_t *ucc_module) @@ -68,6 +74,12 @@ static void mca_coll_ucc_module_destruct(mca_coll_ucc_module_t *ucc_module) OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_ialltoall_module); OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_alltoallv_module); OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_ialltoallv_module); + OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_allgather_module); + OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_iallgather_module); + OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_allgatherv_module); + OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_iallgatherv_module); + OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_reduce_module); + OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_ireduce_module); mca_coll_ucc_module_clear(ucc_module); } @@ -93,6 +105,12 @@ static int mca_coll_ucc_save_coll_handlers(mca_coll_ucc_module_t *ucc_module) SAVE_PREV_COLL_API(ialltoall); SAVE_PREV_COLL_API(alltoallv); SAVE_PREV_COLL_API(ialltoallv); + SAVE_PREV_COLL_API(allgather); + SAVE_PREV_COLL_API(iallgather); + SAVE_PREV_COLL_API(allgatherv); + SAVE_PREV_COLL_API(iallgatherv); + SAVE_PREV_COLL_API(reduce); + SAVE_PREV_COLL_API(ireduce); return OMPI_SUCCESS; } @@ -244,7 +262,8 @@ static int mca_coll_ucc_init_ctx() { ctx_params.oob.req_test = oob_allgather_test; ctx_params.oob.req_free = oob_allgather_free; ctx_params.oob.coll_info = (void*)MPI_COMM_WORLD; - ctx_params.oob.participants = ompi_comm_size(&ompi_mpi_comm_world.comm); + ctx_params.oob.n_oob_eps = ompi_comm_size(&ompi_mpi_comm_world.comm); + ctx_params.oob.oob_ep = ompi_comm_rank(&ompi_mpi_comm_world.comm); if (UCC_OK != ucc_context_config_read(cm->ucc_lib, NULL, &ctx_config)) { UCC_ERROR("UCC context config read failed"); goto cleanup_lib; @@ -272,7 +291,8 @@ static int mca_coll_ucc_init_ctx() { } ucc_context_config_release(ctx_config); - copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN; + copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function) + MPI_COMM_NULL_COPY_FN; del_fn.attr_communicator_delete_fn = ucc_comm_attr_del_fn; if (OMPI_SUCCESS != ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &ucc_comm_attr_keyval, NULL ,0, NULL)) { @@ -300,6 +320,53 @@ static int mca_coll_ucc_init_ctx() { cm->libucc_initialized = false; return OMPI_ERROR; } + +uint64_t rank_map_cb(uint64_t ep, void *cb_ctx) +{ + struct ompi_communicator_t *comm = cb_ctx; + + return ((ompi_process_name_t*)&ompi_comm_peer_lookup(comm, ep)->super. + proc_name)->vpid; +} + +static inline ucc_ep_map_t get_rank_map(struct ompi_communicator_t *comm) +{ + ucc_ep_map_t map; + int64_t r1, r2, stride, i; + int is_strided; + + map.ep_num = ompi_comm_size(comm); + if (comm == &ompi_mpi_comm_world.comm) { + map.type = UCC_EP_MAP_FULL; + return map; + } + + /* try to detect strided pattern */ + is_strided = 1; + r1 = rank_map_cb(0, comm); + r2 = rank_map_cb(1, comm); + stride = r2 - r1; + for (i = 2; i < map.ep_num; i++) { + r1 = r2; + r2 = rank_map_cb(i, comm); + if (r2 - r1 != stride) { + is_strided = 0; + break; + } + } + + if (is_strided) { + map.type = UCC_EP_MAP_STRIDED; + map.strided.start = r1; + map.strided.stride = stride; + } else { + map.type = UCC_EP_MAP_CB; + map.cb.cb = rank_map_cb; + map.cb.cb_ctx = (void*)comm; + } + + return map; +} /* * Initialize module on the communicator */ @@ -311,18 +378,20 @@ static int mca_coll_ucc_module_enable(mca_coll_base_module_t *module, ucc_status_t status; int rc; ucc_team_params_t team_params = { - .mask = UCC_TEAM_PARAM_FIELD_EP | - UCC_TEAM_PARAM_FIELD_EP_RANGE | - UCC_TEAM_PARAM_FIELD_OOB, - .oob = { - .allgather = oob_allgather, - .req_test = oob_allgather_test, - .req_free = oob_allgather_free, - .coll_info = (void*)comm, - .participants = ompi_comm_size(comm) + .mask = UCC_TEAM_PARAM_FIELD_EP_MAP | + UCC_TEAM_PARAM_FIELD_EP | + UCC_TEAM_PARAM_FIELD_EP_RANGE | + UCC_TEAM_PARAM_FIELD_ID, + .ep_map = { + .type = (comm == &ompi_mpi_comm_world.comm) ? + UCC_EP_MAP_FULL : UCC_EP_MAP_CB, + .ep_num = ompi_comm_size(comm), + .cb.cb = rank_map_cb, + .cb.cb_ctx = (void*)comm }, .ep = ompi_comm_rank(comm), - .ep_range = UCC_COLLECTIVE_EP_RANGE_CONTIG + .ep_range = UCC_COLLECTIVE_EP_RANGE_CONTIG, + .id = comm->c_contextid }; UCC_VERBOSE(2,"creating ucc_team for comm %p, comm_id %d, comm_size %d", (void*)comm,comm->c_contextid,ompi_comm_size(comm)); @@ -414,11 +483,14 @@ mca_coll_ucc_comm_query(struct ompi_communicator_t *comm, int *priority) ucc_module->comm = comm; ucc_module->super.coll_module_enable = mca_coll_ucc_module_enable; *priority = cm->ucc_priority; - SET_COLL_PTR(ucc_module, BARRIER, barrier); - SET_COLL_PTR(ucc_module, BCAST, bcast); - SET_COLL_PTR(ucc_module, ALLREDUCE, allreduce); - SET_COLL_PTR(ucc_module, ALLTOALL, alltoall); - SET_COLL_PTR(ucc_module, ALLTOALLV, alltoallv); + SET_COLL_PTR(ucc_module, BARRIER, barrier); + SET_COLL_PTR(ucc_module, BCAST, bcast); + SET_COLL_PTR(ucc_module, ALLREDUCE, allreduce); + SET_COLL_PTR(ucc_module, ALLTOALL, alltoall); + SET_COLL_PTR(ucc_module, ALLTOALLV, alltoallv); + SET_COLL_PTR(ucc_module, REDUCE, reduce); + SET_COLL_PTR(ucc_module, ALLGATHER, allgather); + SET_COLL_PTR(ucc_module, ALLGATHERV, allgatherv); return &ucc_module->super; } diff --git a/ompi/mca/coll/ucc/coll_ucc_reduce.c b/ompi/mca/coll/ucc/coll_ucc_reduce.c new file mode 100644 index 00000000000..c936caad312 --- /dev/null +++ b/ompi/mca/coll/ucc/coll_ucc_reduce.c @@ -0,0 +1,107 @@ +/** + * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + */ + +#include "coll_ucc_common.h" + +static inline ucc_status_t mca_coll_ucc_reduce_init(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, + mca_coll_ucc_module_t *ucc_module, + ucc_coll_req_h *req, + mca_coll_ucc_req_t *coll_req) +{ + ucc_datatype_t ucc_dt; + ucc_reduction_op_t ucc_op; + + ucc_dt = ompi_dtype_to_ucc_dtype(dtype); + ucc_op = ompi_op_to_ucc_op(op); + if (OPAL_UNLIKELY(COLL_UCC_DT_UNSUPPORTED == ucc_dt)) { + UCC_VERBOSE(5, "ompi_datatype is not supported: dtype = %s", + dtype->super.name); + goto fallback; + } + if (OPAL_UNLIKELY(COLL_UCC_OP_UNSUPPORTED == ucc_op)) { + UCC_VERBOSE(5, "ompi_op is not supported: op = %s", + op->o_name); + goto fallback; + } + ucc_coll_args_t coll = { + .mask = 0, + .coll_type = UCC_COLL_TYPE_REDUCE, + .root = root, + .src.info = { + .buffer = (void*)sbuf, + .count = count, + .datatype = ucc_dt, + .mem_type = UCC_MEMORY_TYPE_UNKNOWN + }, + .dst.info = { + .buffer = rbuf, + .count = count, + .datatype = ucc_dt, + .mem_type = UCC_MEMORY_TYPE_UNKNOWN + }, + .op = ucc_op, + }; + if (MPI_IN_PLACE == sbuf) { + coll.mask |= UCC_COLL_ARGS_FIELD_FLAGS; + coll.flags = UCC_COLL_ARGS_FLAG_IN_PLACE; + } + COLL_UCC_REQ_INIT(coll_req, req, coll, ucc_module); + return UCC_OK; +fallback: + return UCC_ERR_NOT_SUPPORTED; +} + +int mca_coll_ucc_reduce(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, + struct ompi_communicator_t *comm, + struct mca_coll_base_module_2_4_0_t *module) +{ + mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; + ucc_coll_req_h req; + + UCC_VERBOSE(3, "running ucc reduce"); + COLL_UCC_CHECK(mca_coll_ucc_reduce_init(sbuf, rbuf, count, dtype, op, + root, ucc_module, &req, NULL)); + COLL_UCC_POST_AND_CHECK(req); + COLL_UCC_CHECK(coll_ucc_req_wait(req)); + return OMPI_SUCCESS; +fallback: + UCC_VERBOSE(3, "running fallback reduce"); + return ucc_module->previous_reduce(sbuf, rbuf, count, dtype, op, root, + comm, ucc_module->previous_reduce_module); +} + +int mca_coll_ucc_ireduce(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, + struct ompi_communicator_t *comm, + ompi_request_t** request, + struct mca_coll_base_module_2_4_0_t *module) +{ + mca_coll_ucc_module_t *ucc_module = (mca_coll_ucc_module_t*)module; + ucc_coll_req_h req; + mca_coll_ucc_req_t *coll_req = NULL; + + UCC_VERBOSE(3, "running ucc ireduce"); + COLL_UCC_GET_REQ(coll_req); + COLL_UCC_CHECK(mca_coll_ucc_reduce_init(sbuf, rbuf, count, dtype, op, root, + ucc_module, &req, coll_req)); + COLL_UCC_POST_AND_CHECK(req); + *request = &coll_req->super; + return OMPI_SUCCESS; +fallback: + UCC_VERBOSE(3, "running fallback ireduce"); + if (coll_req) { + mca_coll_ucc_req_free((ompi_request_t **)&coll_req); + } + return ucc_module->previous_ireduce(sbuf, rbuf, count, dtype, op, root, + comm, request, ucc_module->previous_ireduce_module); +} diff --git a/ompi/mca/coll/ucc/configure.m4 b/ompi/mca/coll/ucc/configure.m4 index bc1d56bb17d..ed8a4875e7c 100644 --- a/ompi/mca/coll/ucc/configure.m4 +++ b/ompi/mca/coll/ucc/configure.m4 @@ -28,6 +28,8 @@ AC_DEFUN([MCA_ompi_coll_ucc_CONFIG],[ coll_ucc_WRAPPER_EXTRA_LIBS="$coll_ucc_LIBS" $1], [$2]) + + OPAL_SUMMARY_ADD([[Miscellaneous]],[[Open UCC]],[$1],[$coll_ucc_happy])])]) # substitute in the things needed to build ucc AC_SUBST([coll_ucc_CFLAGS]) diff --git a/ompi/mca/common/ompio/common_ompio_file_view.c b/ompi/mca/common/ompio/common_ompio_file_view.c index c32b50347a0..e53582aa8e2 100644 --- a/ompi/mca/common/ompio/common_ompio_file_view.c +++ b/ompi/mca/common/ompio/common_ompio_file_view.c @@ -222,15 +222,17 @@ int mca_common_ompio_set_view (ompio_file_t *fh, opal_cstring_t *stripe_str; /* Check the info object set during File_open */ - opal_info_get (fh->f_info, "cb_nodes", &stripe_str, &flag); + opal_info_get (info, "cb_nodes", &stripe_str, &flag); if ( flag ) { sscanf ( stripe_str->string, "%d", &num_cb_nodes ); OMPIO_MCA_PRINT_INFO(fh, "cb_nodes", stripe_str->string, ""); + /* add the key/value to the file's info object */ + opal_info_set_cstring(fh->f_info, "cb_nodes", stripe_str); OBJ_RELEASE(stripe_str); } else { /* Check the info object set during file_set_view */ - opal_info_get (info, "cb_nodes", &stripe_str, &flag); + opal_info_get (fh->f_info, "cb_nodes", &stripe_str, &flag); if ( flag ) { sscanf ( stripe_str->string, "%d", &num_cb_nodes ); OMPIO_MCA_PRINT_INFO(fh, "cb_nodes", stripe_str->string, ""); @@ -325,7 +327,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh, } bool info_is_set=false; - opal_info_get (fh->f_info, "collective_buffering", &stripe_str, &flag); + opal_info_get (info, "collective_buffering", &stripe_str, &flag); if ( flag ) { if ( strncmp ( stripe_str->string, "false", sizeof("true") )){ info_is_set = true; @@ -333,9 +335,11 @@ int mca_common_ompio_set_view (ompio_file_t *fh, } else { OMPIO_MCA_PRINT_INFO(fh, "collective_buffering", stripe_str->string, ""); } + /* add the key/value to the file's info object */ + opal_info_set_cstring(fh->f_info, "collective_buffering", stripe_str); OBJ_RELEASE(stripe_str); } else { - opal_info_get (info, "collective_buffering", &stripe_str, &flag); + opal_info_get (fh->f_info, "collective_buffering", &stripe_str, &flag); if ( flag ) { if ( strncmp ( stripe_str->string, "false", sizeof("true") )){ info_is_set = true; diff --git a/ompi/mca/fbtl/base/Makefile.am b/ompi/mca/fbtl/base/Makefile.am index 7758c02f745..3c8ca757290 100644 --- a/ompi/mca/fbtl/base/Makefile.am +++ b/ompi/mca/fbtl/base/Makefile.am @@ -22,6 +22,10 @@ headers += \ libmca_fbtl_la_SOURCES += \ base/fbtl_base_frame.c \ - base/fbtl_base_file_select.c \ - base/fbtl_base_file_unselect.c \ base/fbtl_base_find_available.c + +if OMPI_OMPIO_SUPPORT +libmca_fbtl_la_SOURCES += \ + base/fbtl_base_file_select.c \ + base/fbtl_base_file_unselect.c +endif diff --git a/ompi/mca/fcoll/base/Makefile.am b/ompi/mca/fcoll/base/Makefile.am index c91d548bac2..6b372623f9e 100644 --- a/ompi/mca/fcoll/base/Makefile.am +++ b/ompi/mca/fcoll/base/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# Copyright (c) 2008-2022 University of Houston. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,8 +23,13 @@ headers += \ libmca_fcoll_la_SOURCES += \ base/fcoll_base_frame.c \ + base/fcoll_base_find_available.c + +if OMPI_OMPIO_SUPPORT +libmca_fcoll_la_SOURCES += \ base/fcoll_base_file_select.c \ base/fcoll_base_file_unselect.c \ - base/fcoll_base_find_available.c \ base/fcoll_base_sort.c \ + base/fcoll_base_file_read_all.c \ base/fcoll_base_coll_array.c +endif diff --git a/ompi/mca/fcoll/base/base.h b/ompi/mca/fcoll/base/base.h index 353c062a599..10fd98cdeae 100644 --- a/ompi/mca/fcoll/base/base.h +++ b/ompi/mca/fcoll/base/base.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2008-2022 University of Houston. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -55,6 +55,10 @@ OMPI_DECLSPEC int ompi_fcoll_base_sort_iovec (struct iovec *iov, int num_entries OMPI_DECLSPEC mca_fcoll_base_component_t* mca_fcoll_base_component_lookup(const char* name); +OMPI_DECLSPEC int mca_fcoll_base_file_read_all (ompio_file_t *fh, void *buf, int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); + /* * Globals */ diff --git a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_read_all.c b/ompi/mca/fcoll/base/fcoll_base_file_read_all.c similarity index 98% rename from ompi/mca/fcoll/vulcan/fcoll_vulcan_file_read_all.c rename to ompi/mca/fcoll/base/fcoll_base_file_read_all.c index a00bf8e340a..5007918d282 100644 --- a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_read_all.c +++ b/ompi/mca/fcoll/base/fcoll_base_file_read_all.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2021 University of Houston. All rights reserved. + * Copyright (c) 2008-2022 University of Houston. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -20,12 +20,12 @@ */ #include "ompi_config.h" -#include "fcoll_vulcan.h" #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/fcoll/fcoll.h" #include "ompi/mca/fcoll/base/fcoll_base_coll_array.h" +#include "ompi/mca/fcoll/base/base.h" #include "ompi/mca/common/ompio/common_ompio.h" #include "ompi/mca/io/io.h" #include "math.h" @@ -33,6 +33,8 @@ #include #define DEBUG_ON 0 +#define FCOLL_BASE_SHUFFLE_TAG 123 +#define INIT_LEN 10 /*Used for loading file-offsets per aggregator*/ typedef struct mca_io_ompio_local_io_array{ @@ -49,7 +51,7 @@ static int read_heap_sort (mca_io_ompio_local_io_array *io_array, int -mca_fcoll_vulcan_file_read_all (ompio_file_t *fh, +mca_fcoll_base_file_read_all (ompio_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype, @@ -89,7 +91,7 @@ mca_fcoll_vulcan_file_read_all (ompio_file_t *fh, /* array that contains the sorted indices of the global_iov */ int *sorted = NULL; int *displs = NULL; - int vulcan_num_io_procs; + int base_num_io_procs; size_t max_data = 0; MPI_Aint *total_bytes_per_process = NULL; ompi_datatype_t **sendtype = NULL; @@ -126,14 +128,14 @@ mca_fcoll_vulcan_file_read_all (ompio_file_t *fh, status->_ucount = max_data; } - vulcan_num_io_procs = fh->f_get_mca_parameter_value ( "num_aggregators", strlen ("num_aggregators")); - if ( OMPI_ERR_MAX == vulcan_num_io_procs ) { + base_num_io_procs = fh->f_get_mca_parameter_value ( "num_aggregators", strlen ("num_aggregators")); + if ( OMPI_ERR_MAX == base_num_io_procs ) { ret = OMPI_ERROR; goto exit; } ret = mca_common_ompio_set_aggregator_props ((struct ompio_file_t *) fh, - vulcan_num_io_procs, + base_num_io_procs, max_data); if (OMPI_SUCCESS != ret){ goto exit; @@ -741,7 +743,7 @@ mca_fcoll_vulcan_file_read_all (ompio_file_t *fh, 1, sendtype[i], fh->f_procs_in_group[i], - FCOLL_VULCAN_SHUFFLE_TAG, + FCOLL_BASE_SHUFFLE_TAG, MCA_PML_BASE_SEND_STANDARD, fh->f_comm, &send_req[i])); @@ -822,7 +824,7 @@ mca_fcoll_vulcan_file_read_all (ompio_file_t *fh, 1, newType, my_aggregator, - FCOLL_VULCAN_SHUFFLE_TAG, + FCOLL_BASE_SHUFFLE_TAG, fh->f_comm, &recv_req)); @@ -867,7 +869,7 @@ mca_fcoll_vulcan_file_read_all (ompio_file_t *fh, nentry.aggregator = 1; else nentry.aggregator = 0; - nentry.nprocs_for_coll = vulcan_num_io_procs; + nentry.nprocs_for_coll = base_num_io_procs; if (!mca_common_ompio_full_print_queue(fh->f_coll_read_time)){ mca_common_ompio_register_print_entry(fh->f_coll_read_time, nentry); diff --git a/ompi/mca/fcoll/dynamic/Makefile.am b/ompi/mca/fcoll/dynamic/Makefile.am index 603fa55df72..b6012002c77 100644 --- a/ompi/mca/fcoll/dynamic/Makefile.am +++ b/ompi/mca/fcoll/dynamic/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008-2015 University of Houston. All rights reserved. +# Copyright (c) 2008-2022 University of Houston. All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # Copyright (c) 2018 Research Organization for Information Science @@ -25,7 +25,6 @@ sources = \ fcoll_dynamic.h \ fcoll_dynamic_module.c \ fcoll_dynamic_component.c \ - fcoll_dynamic_file_read_all.c \ fcoll_dynamic_file_write_all.c # Make the output library in this directory, and name it either diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic.h b/ompi/mca/fcoll/dynamic/fcoll_dynamic.h index d929def61ae..bc6d1eb5567 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic.h +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2022 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -50,13 +50,6 @@ int mca_fcoll_dynamic_component_file_unquery (ompio_file_t *file); int mca_fcoll_dynamic_module_init (ompio_file_t *file); int mca_fcoll_dynamic_module_finalize (ompio_file_t *file); -int mca_fcoll_dynamic_file_read_all (ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t * status); - - int mca_fcoll_dynamic_file_write_all (ompio_file_t *fh, const void *buf, int count, diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c deleted file mode 100644 index b743d08a0ea..00000000000 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c +++ /dev/null @@ -1,1083 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. - * Copyright (c) 2017-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "fcoll_dynamic.h" - -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/mca/fcoll/fcoll.h" -#include "ompi/mca/fcoll/base/fcoll_base_coll_array.h" -#include "ompi/mca/common/ompio/common_ompio.h" -#include "ompi/mca/io/io.h" -#include "math.h" -#include "ompi/mca/pml/pml.h" -#include - -#define DEBUG_ON 0 - -/*Used for loading file-offsets per aggregator*/ -typedef struct mca_io_ompio_local_io_array{ - OMPI_MPI_OFFSET_TYPE offset; - MPI_Aint length; - int process_id; -}mca_io_ompio_local_io_array; - - -static int read_heap_sort (mca_io_ompio_local_io_array *io_array, - int num_entries, - int *sorted); - - - -int -mca_fcoll_dynamic_file_read_all (ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t *status) -{ - MPI_Aint position = 0; - MPI_Aint total_bytes = 0; /* total bytes to be read */ - MPI_Aint bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ - MPI_Aint bytes_per_cycle = 0; /* total read in each cycle by each process*/ - int index = 0, ret=OMPI_SUCCESS; - int cycles = 0; - int i=0, j=0, l=0; - int n=0; /* current position in total_bytes_per_process array */ - MPI_Aint bytes_remaining = 0; /* how many bytes have been read from the current - value from total_bytes_per_process */ - int *sorted_file_offsets=NULL, entries_per_aggregator=0; - int bytes_received = 0; - int blocks = 0; - /* iovec structure and count of the buffer passed in */ - uint32_t iov_count = 0; - struct iovec *decoded_iov = NULL; - int iov_index = 0; - size_t current_position = 0; - struct iovec *local_iov_array=NULL, *global_iov_array=NULL; - char *receive_buf = NULL; - MPI_Aint *memory_displacements=NULL; - /* global iovec at the readers that contain the iovecs created from - file_set_view */ - uint32_t total_fview_count = 0; - int local_count = 0; - int *fview_count = NULL, *disp_index=NULL, *temp_disp_index=NULL; - int current_index=0, temp_index=0; - int **blocklen_per_process=NULL; - MPI_Aint **displs_per_process=NULL; - char *global_buf = NULL; - MPI_Aint global_count = 0; - mca_io_ompio_local_io_array *file_offsets_for_agg=NULL; - - /* array that contains the sorted indices of the global_iov */ - int *sorted = NULL; - int *displs = NULL; - int dynamic_num_io_procs; - size_t max_data = 0; - MPI_Aint *total_bytes_per_process = NULL; - ompi_datatype_t **sendtype = NULL; - MPI_Request *send_req=NULL, recv_req=NULL; - int my_aggregator =-1; - bool recvbuf_is_contiguous=false; - size_t ftype_size; - ptrdiff_t ftype_extent, lb; - - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; - double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; - double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0; - mca_common_ompio_print_entry nentry; -#endif - - /************************************************************************** - ** 1. In case the data is not contigous in memory, decode it into an iovec - **************************************************************************/ - - opal_datatype_type_size ( &datatype->super, &ftype_size ); - opal_datatype_get_extent ( &datatype->super, &lb, &ftype_extent ); - - if ( (ftype_extent == (ptrdiff_t) ftype_size) && - opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && - 0 == lb ) { - recvbuf_is_contiguous = true; - } - - - if (! recvbuf_is_contiguous ) { - ret = mca_common_ompio_decode_datatype ((struct ompio_file_t *)fh, - datatype, - count, - buf, - &max_data, - fh->f_mem_convertor, - &decoded_iov, - &iov_count); - if (OMPI_SUCCESS != ret){ - goto exit; - } - } - else { - max_data = count * datatype->super.size; - } - - if ( MPI_STATUS_IGNORE != status ) { - status->_ucount = max_data; - } - - dynamic_num_io_procs = fh->f_get_mca_parameter_value ( "num_aggregators", strlen ("num_aggregators")); - if ( OMPI_ERR_MAX == dynamic_num_io_procs ) { - ret = OMPI_ERROR; - goto exit; - } - ret = mca_common_ompio_set_aggregator_props ((struct ompio_file_t *) fh, - dynamic_num_io_procs, - max_data); - if (OMPI_SUCCESS != ret){ - goto exit; - } - my_aggregator = fh->f_procs_in_group[0]; - - /************************************************************************** - ** 2. Determine the total amount of data to be written - **************************************************************************/ - total_bytes_per_process = (MPI_Aint*)malloc(fh->f_procs_per_group*sizeof(MPI_Aint)); - if (NULL == total_bytes_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); -#endif - ret = ompi_fcoll_base_coll_allgather_array (&max_data, - 1, - MPI_LONG, - total_bytes_per_process, - 1, - MPI_LONG, - 0, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - if (OMPI_SUCCESS != ret){ - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; -#endif - - for (i=0 ; if_procs_per_group ; i++) { - total_bytes += total_bytes_per_process[i]; - } - - if (NULL != total_bytes_per_process) { - free (total_bytes_per_process); - total_bytes_per_process = NULL; - } - - /********************************************************************* - *** 3. Generate the File offsets/lengths corresponding to this write - ********************************************************************/ - ret = fh->f_generate_current_file_view ((struct ompio_file_t *) fh, - max_data, - &local_iov_array, - &local_count); - - if (ret != OMPI_SUCCESS){ - goto exit; - } - - /************************************************************* - *** 4. Allgather the File View information at all processes - *************************************************************/ - - fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); - if (NULL == fview_count) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); -#endif - ret = ompi_fcoll_base_coll_allgather_array (&local_count, - 1, - MPI_INT, - fview_count, - 1, - MPI_INT, - 0, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if (OMPI_SUCCESS != ret){ - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; -#endif - - displs = (int*)malloc (fh->f_procs_per_group*sizeof(int)); - if (NULL == displs) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - displs[0] = 0; - total_fview_count = fview_count[0]; - for (i=1 ; if_procs_per_group ; i++) { - total_fview_count += fview_count[i]; - displs[i] = displs[i-1] + fview_count[i-1]; - } - -#if DEBUG_ON - if (my_aggregator == fh->f_rank) { - for (i=0 ; if_procs_per_group ; i++) { - printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", - fh->f_rank, - i, - fview_count[i], - displs[i]); -} -} -#endif - - /* allocate the global iovec */ - if (0 != total_fview_count) { - global_iov_array = (struct iovec*)malloc (total_fview_count * - sizeof(struct iovec)); - if (NULL == global_iov_array) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); -#endif - ret = ompi_fcoll_base_coll_allgatherv_array (local_iov_array, - local_count, - fh->f_iov_type, - global_iov_array, - fview_count, - displs, - fh->f_iov_type, - 0, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if (OMPI_SUCCESS != ret){ - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; -#endif - - /**************************************************************************************** - *** 5. Sort the global offset/lengths list based on the offsets. - *** The result of the sort operation is the 'sorted', an integer array, - *** which contains the indexes of the global_iov_array based on the offset. - *** For example, if global_iov_array[x].offset is followed by global_iov_array[y].offset - *** in the file, and that one is followed by global_iov_array[z].offset, than - *** sorted[0] = x, sorted[1]=y and sorted[2]=z; - ******************************************************************************************/ - if (0 != total_fview_count) { - sorted = (int *)malloc (total_fview_count * sizeof(int)); - if (NULL == sorted) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - ompi_fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted); - } - - if (NULL != local_iov_array) { - free (local_iov_array); - local_iov_array = NULL; - } - -#if DEBUG_ON - if (my_aggregator == fh->f_rank) { - for (i=0 ; if_rank, - global_iov_array[sorted[i]].iov_base, - global_iov_array[sorted[i]].iov_len); - } - } -#endif - - /************************************************************* - *** 6. Determine the number of cycles required to execute this - *** operation - *************************************************************/ - bytes_per_cycle = fh->f_bytes_per_agg; - cycles = ceil((double)total_bytes/bytes_per_cycle); - - if ( my_aggregator == fh->f_rank) { - disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); - if (NULL == disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); - if (NULL == blocklen_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - displs_per_process = (MPI_Aint **)malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); - if (NULL == displs_per_process){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - for (i=0;if_procs_per_group;i++){ - blocklen_per_process[i] = NULL; - displs_per_process[i] = NULL; - } - - send_req = (MPI_Request *) malloc (fh->f_procs_per_group * sizeof(MPI_Request)); - if (NULL == send_req){ - opal_output ( 1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - global_buf = (char *) malloc (bytes_per_cycle); - if (NULL == global_buf){ - opal_output(1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); - if (NULL == sendtype) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - for(l=0;lf_procs_per_group;l++){ - sendtype[l] = MPI_DATATYPE_NULL; - } - } - - - - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rexch = MPI_Wtime(); -#endif - n = 0; - bytes_remaining = 0; - current_index = 0; - - for (index = 0; index < cycles; index++) { - /********************************************************************** - *** 7a. Getting ready for next cycle: initializing and freeing buffers - **********************************************************************/ - if (my_aggregator == fh->f_rank) { - if (NULL != fh->f_io_array) { - free (fh->f_io_array); - fh->f_io_array = NULL; - } - fh->f_num_of_io_entries = 0; - - if (NULL != sendtype){ - for (i =0; i< fh->f_procs_per_group; i++) { - if ( MPI_DATATYPE_NULL != sendtype[i] ) { - ompi_datatype_destroy(&sendtype[i]); - sendtype[i] = MPI_DATATYPE_NULL; - } - } - } - - for(l=0;lf_procs_per_group;l++){ - disp_index[l] = 1; - - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); - if (NULL == blocklen_per_process[l]) { - opal_output (1, "OUT OF MEMORY for blocklen\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); - if (NULL == displs_per_process[l]){ - opal_output (1, "OUT OF MEMORY for displs\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - - if(NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements = NULL; - } - } /* (my_aggregator == fh->f_rank */ - - /************************************************************************** - *** 7b. Determine the number of bytes to be actually read in this cycle - **************************************************************************/ - if (cycles-1 == index) { - bytes_to_read_in_cycle = total_bytes - bytes_per_cycle*index; - } - else { - bytes_to_read_in_cycle = bytes_per_cycle; - } - -#if DEBUG_ON - if (my_aggregator == fh->f_rank) { - printf ("****%d: CYCLE %d Bytes %d**********\n", - fh->f_rank, - index, - bytes_to_write_in_cycle); - } -#endif - - /***************************************************************** - *** 7c. Calculate how much data will be contributed in this cycle - *** by each process - *****************************************************************/ - bytes_received = 0; - - while (bytes_to_read_in_cycle) { - /* This next block identifies which process is the holder - ** of the sorted[current_index] element; - */ - blocks = fview_count[0]; - for (j=0 ; jf_procs_per_group ; j++) { - if (sorted[current_index] < blocks) { - n = j; - break; - } - else { - blocks += fview_count[j+1]; - } - } - - if (bytes_remaining) { - /* Finish up a partially used buffer from the previous cycle */ - if (bytes_remaining <= bytes_to_read_in_cycle) { - /* Data fits completely into the block */ - if (my_aggregator == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; - displs_per_process[n][disp_index[n] - 1] = - (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + - (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); - - blocklen_per_process[n] = (int *) realloc - ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); - displs_per_process[n] = (MPI_Aint *) realloc - ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); - blocklen_per_process[n][disp_index[n]] = 0; - displs_per_process[n][disp_index[n]] = 0; - disp_index[n] += 1; - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += bytes_remaining; - } - current_index ++; - bytes_to_read_in_cycle -= bytes_remaining; - bytes_remaining = 0; - continue; - } - else { - /* the remaining data from the previous cycle is larger than the - bytes_to_write_in_cycle, so we have to segment again */ - if (my_aggregator == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; - displs_per_process[n][disp_index[n] - 1] = - (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + - (global_iov_array[sorted[current_index]].iov_len - - bytes_remaining); - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += bytes_to_read_in_cycle; - } - bytes_remaining -= bytes_to_read_in_cycle; - bytes_to_read_in_cycle = 0; - break; - } - } - else { - /* No partially used entry available, have to start a new one */ - if (bytes_to_read_in_cycle < - (MPI_Aint) global_iov_array[sorted[current_index]].iov_len) { - /* This entry has more data than we can sendin one cycle */ - if (my_aggregator == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; - displs_per_process[n][disp_index[n] - 1] = - (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base ; - } - - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += bytes_to_read_in_cycle; - } - bytes_remaining = global_iov_array[sorted[current_index]].iov_len - - bytes_to_read_in_cycle; - bytes_to_read_in_cycle = 0; - break; - } - else { - /* Next data entry is less than bytes_to_write_in_cycle */ - if (my_aggregator == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = - global_iov_array[sorted[current_index]].iov_len; - displs_per_process[n][disp_index[n] - 1] = (ptrdiff_t) - global_iov_array[sorted[current_index]].iov_base; - blocklen_per_process[n] = - (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); - displs_per_process[n] = (MPI_Aint *)realloc - ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); - blocklen_per_process[n][disp_index[n]] = 0; - displs_per_process[n][disp_index[n]] = 0; - disp_index[n] += 1; - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += - global_iov_array[sorted[current_index]].iov_len; - } - bytes_to_read_in_cycle -= - global_iov_array[sorted[current_index]].iov_len; - current_index ++; - continue; - } - } - } /* end while (bytes_to_read_in_cycle) */ - - /************************************************************************* - *** 7d. Calculate the displacement on where to put the data and allocate - *** the recieve buffer (global_buf) - *************************************************************************/ - if (my_aggregator == fh->f_rank) { - entries_per_aggregator=0; - for (i=0;if_procs_per_group; i++){ - for (j=0;j 0) - entries_per_aggregator++ ; - } - } - if (entries_per_aggregator > 0){ - file_offsets_for_agg = (mca_io_ompio_local_io_array *) - malloc(entries_per_aggregator*sizeof(mca_io_ompio_local_io_array)); - if (NULL == file_offsets_for_agg) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - sorted_file_offsets = (int *) - malloc (entries_per_aggregator*sizeof(int)); - if (NULL == sorted_file_offsets){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - /*Moving file offsets to an IO array!*/ - temp_index = 0; - global_count = 0; - for (i=0;if_procs_per_group; i++){ - for(j=0;j 0){ - file_offsets_for_agg[temp_index].length = - blocklen_per_process[i][j]; - global_count += blocklen_per_process[i][j]; - file_offsets_for_agg[temp_index].process_id = i; - file_offsets_for_agg[temp_index].offset = - displs_per_process[i][j]; - temp_index++; - } - } - } - } - else{ - continue; - } - - /* Sort the displacements for each aggregator */ - read_heap_sort (file_offsets_for_agg, - entries_per_aggregator, - sorted_file_offsets); - - memory_displacements = (MPI_Aint *) malloc - (entries_per_aggregator * sizeof(MPI_Aint)); - memory_displacements[sorted_file_offsets[0]] = 0; - for (i=1; if_io_array = (mca_common_ompio_io_array_t *) malloc - (entries_per_aggregator * sizeof (mca_common_ompio_io_array_t)); - if (NULL == fh->f_io_array) { - opal_output(1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - fh->f_num_of_io_entries = 0; - fh->f_io_array[0].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; - fh->f_io_array[0].length = - file_offsets_for_agg[sorted_file_offsets[0]].length; - fh->f_io_array[0].memory_address = - global_buf+memory_displacements[sorted_file_offsets[0]]; - fh->f_num_of_io_entries++; - for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += - file_offsets_for_agg[sorted_file_offsets[i]].length; - } - else{ - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[i]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[i]]; - fh->f_num_of_io_entries++; - } - } - - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_read_time = MPI_Wtime(); -#endif - - if (fh->f_num_of_io_entries) { - if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { - opal_output (1, "READ FAILED\n"); - ret = OMPI_ERROR; - goto exit; - } - } - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_read_time = MPI_Wtime(); - read_time += end_read_time - start_read_time; -#endif - /********************************************************** - ******************** DONE READING ************************ - *********************************************************/ - - temp_disp_index = (int *)calloc (1, fh->f_procs_per_group * sizeof (int)); - if (NULL == temp_disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - for (i=0; if_procs_per_group;i++){ - send_req[i] = MPI_REQUEST_NULL; - if ( 0 < disp_index[i] ) { - ompi_datatype_create_hindexed(disp_index[i], - blocklen_per_process[i], - displs_per_process[i], - MPI_BYTE, - &sendtype[i]); - ompi_datatype_commit(&sendtype[i]); - ret = MCA_PML_CALL (isend(global_buf, - 1, - sendtype[i], - fh->f_procs_in_group[i], - 123, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - &send_req[i])); - if(OMPI_SUCCESS != ret){ - goto exit; - } - } - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; -#endif - } - - /********************************************************** - *** 7f. Scatter the Data from the readers - *********************************************************/ - if ( recvbuf_is_contiguous ) { - receive_buf = &((char*)buf)[position]; - } - else if (bytes_received) { - /* allocate a receive buffer and copy the data that needs - to be received into it in case the data is non-contigous - in memory */ - receive_buf = malloc (bytes_received); - if (NULL == receive_buf) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); -#endif - ret = MCA_PML_CALL(irecv(receive_buf, - bytes_received, - MPI_BYTE, - my_aggregator, - 123, - fh->f_comm, - &recv_req)); - if (OMPI_SUCCESS != ret){ - goto exit; - } - - - if (my_aggregator == fh->f_rank){ - ret = ompi_request_wait_all (fh->f_procs_per_group, - send_req, - MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } - } - - ret = ompi_request_wait (&recv_req, MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } - position += bytes_received; - - /* If data is not contigous in memory, copy the data from the - receive buffer into the buffer passed in */ - if (!recvbuf_is_contiguous ) { - ptrdiff_t mem_address; - size_t remaining = 0; - size_t temp_position = 0; - - remaining = bytes_received; - - while (remaining) { - mem_address = (ptrdiff_t) - (decoded_iov[iov_index].iov_base) + current_position; - - if (remaining >= - (decoded_iov[iov_index].iov_len - current_position)) { - memcpy ((IOVBASE_TYPE *) mem_address, - receive_buf+temp_position, - decoded_iov[iov_index].iov_len - current_position); - remaining = remaining - - (decoded_iov[iov_index].iov_len - current_position); - temp_position = temp_position + - (decoded_iov[iov_index].iov_len - current_position); - iov_index = iov_index + 1; - current_position = 0; - } - else { - memcpy ((IOVBASE_TYPE *) mem_address, - receive_buf+temp_position, - remaining); - current_position = current_position + remaining; - remaining = 0; - } - } - - if (NULL != receive_buf) { - free (receive_buf); - receive_buf = NULL; - } - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; -#endif - } /* end for (index=0; index < cycles; index ++) */ - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rexch = MPI_Wtime(); - read_exch += end_rexch - start_rexch; - nentry.time[0] = read_time; - nentry.time[1] = rcomm_time; - nentry.time[2] = read_exch; - if (my_aggregator == fh->f_rank) - nentry.aggregator = 1; - else - nentry.aggregator = 0; - nentry.nprocs_for_coll = dynamic_num_io_procs; - if (!mca_common_ompio_full_print_queue(fh->f_coll_read_time)){ - mca_common_ompio_register_print_entry(fh->f_coll_read_time, - nentry); - } -#endif - -exit: - if (!recvbuf_is_contiguous) { - if (NULL != receive_buf) { - free (receive_buf); - receive_buf = NULL; - } - } - if (NULL != global_buf) { - free (global_buf); - global_buf = NULL; - } - if (NULL != sorted) { - free (sorted); - sorted = NULL; - } - if (NULL != global_iov_array) { - free (global_iov_array); - global_iov_array = NULL; - } - if (NULL != fview_count) { - free (fview_count); - fview_count = NULL; - } - if (NULL != decoded_iov) { - free (decoded_iov); - decoded_iov = NULL; - } - if (NULL != local_iov_array){ - free(local_iov_array); - local_iov_array=NULL; - } - - if (NULL != displs) { - free (displs); - displs = NULL; - } - if (my_aggregator == fh->f_rank) { - - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - if (NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements= NULL; - } - if (NULL != sendtype){ - for (i = 0; i < fh->f_procs_per_group; i++) { - if ( MPI_DATATYPE_NULL != sendtype[i] ) { - ompi_datatype_destroy(&sendtype[i]); - } - } - free(sendtype); - sendtype=NULL; - } - - if (NULL != disp_index){ - free(disp_index); - disp_index = NULL; - } - - if ( NULL != blocklen_per_process){ - for(l=0;lf_procs_per_group;l++){ - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - } - - free(blocklen_per_process); - blocklen_per_process = NULL; - } - - if (NULL != displs_per_process){ - for (l=0; if_procs_per_group; l++){ - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - } - free(displs_per_process); - displs_per_process = NULL; - } - if ( NULL != send_req ) { - free ( send_req ); - send_req = NULL; - } - } - return ret; -} - - -static int read_heap_sort (mca_io_ompio_local_io_array *io_array, - int num_entries, - int *sorted) -{ - int i = 0; - int j = 0; - int left = 0; - int right = 0; - int largest = 0; - int heap_size = num_entries - 1; - int temp = 0; - unsigned char done = 0; - int* temp_arr = NULL; - - temp_arr = (int*)malloc(num_entries*sizeof(int)); - if (NULL == temp_arr) { - opal_output (1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - temp_arr[0] = 0; - for (i = 1; i < num_entries; ++i) { - temp_arr[i] = i; - } - /* num_entries can be a large no. so NO RECURSION */ - for (i = num_entries/2-1 ; i>=0 ; i--) { - done = 0; - j = i; - largest = j; - - while (!done) { - left = j*2+1; - right = j*2+2; - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { - largest = left; - } - else { - largest = j; - } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > - io_array[temp_arr[largest]].offset)) { - largest = right; - } - if (largest != j) { - temp = temp_arr[largest]; - temp_arr[largest] = temp_arr[j]; - temp_arr[j] = temp; - j = largest; - } - else { - done = 1; - } - } - } - - for (i = num_entries-1; i >=1; --i) { - temp = temp_arr[0]; - temp_arr[0] = temp_arr[i]; - temp_arr[i] = temp; - heap_size--; - done = 0; - j = 0; - largest = j; - - while (!done) { - left = j*2+1; - right = j*2+2; - - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > - io_array[temp_arr[j]].offset)) { - largest = left; - } - else { - largest = j; - } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > - io_array[temp_arr[largest]].offset)) { - largest = right; - } - if (largest != j) { - temp = temp_arr[largest]; - temp_arr[largest] = temp_arr[j]; - temp_arr[j] = temp; - j = largest; - } - else { - done = 1; - } - } - sorted[i] = temp_arr[i]; - } - sorted[0] = temp_arr[0]; - - if (NULL != temp_arr) { - free(temp_arr); - temp_arr = NULL; - } - return OMPI_SUCCESS; -} - - - diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c index 96b88a3f849..ef47ebfcb74 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2008-2022 University of Houston. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -37,7 +37,7 @@ static mca_fcoll_base_module_1_0_0_t dynamic = { mca_fcoll_dynamic_module_init, mca_fcoll_dynamic_module_finalize, - mca_fcoll_dynamic_file_read_all, + mca_fcoll_base_file_read_all, NULL, /* iread_all */ mca_fcoll_dynamic_file_write_all, NULL, /*iwrite_all */ diff --git a/ompi/mca/fcoll/dynamic_gen2/Makefile.am b/ompi/mca/fcoll/dynamic_gen2/Makefile.am index a4e91e5cd7e..8699fa7f095 100644 --- a/ompi/mca/fcoll/dynamic_gen2/Makefile.am +++ b/ompi/mca/fcoll/dynamic_gen2/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008-2015 University of Houston. All rights reserved. +# Copyright (c) 2008-2022 University of Houston. All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # Copyright (c) 2018 Research Organization for Information Science @@ -25,7 +25,6 @@ sources = \ fcoll_dynamic_gen2.h \ fcoll_dynamic_gen2_module.c \ fcoll_dynamic_gen2_component.c \ - fcoll_dynamic_gen2_file_read_all.c \ fcoll_dynamic_gen2_file_write_all.c # Make the output library in this directory, and name it either diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h index d0767a3046f..942d3922de7 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2021 University of Houston. All rights reserved. + * Copyright (c) 2008-2022 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -54,13 +54,6 @@ int mca_fcoll_dynamic_gen2_component_file_unquery (ompio_file_t *file); int mca_fcoll_dynamic_gen2_module_init (ompio_file_t *file); int mca_fcoll_dynamic_gen2_module_finalize (ompio_file_t *file); -int mca_fcoll_dynamic_gen2_file_read_all (ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t * status); - - int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh, const void *buf, int count, diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c deleted file mode 100644 index d64f3127a94..00000000000 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c +++ /dev/null @@ -1,1082 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2021 University of Houston. All rights reserved. - * Copyright (c) 2017-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "fcoll_dynamic_gen2.h" - -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/mca/fcoll/fcoll.h" -#include "ompi/mca/fcoll/base/fcoll_base_coll_array.h" -#include "ompi/mca/common/ompio/common_ompio.h" -#include "ompi/mca/io/io.h" -#include "math.h" -#include "ompi/mca/pml/pml.h" -#include - -#define DEBUG_ON 0 - -/*Used for loading file-offsets per aggregator*/ -typedef struct mca_io_ompio_local_io_array{ - OMPI_MPI_OFFSET_TYPE offset; - MPI_Aint length; - int process_id; -}mca_io_ompio_local_io_array; - - -static int read_heap_sort (mca_io_ompio_local_io_array *io_array, - int num_entries, - int *sorted); - - - -int -mca_fcoll_dynamic_gen2_file_read_all (ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t *status) -{ - MPI_Aint total_bytes = 0; /* total bytes to be read */ - MPI_Aint bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ - MPI_Aint bytes_per_cycle = 0; /* total read in each cycle by each process*/ - int index = 0, ret=OMPI_SUCCESS; - int cycles = 0; - int i=0, j=0, l=0; - int n=0; /* current position in total_bytes_per_process array */ - MPI_Aint bytes_remaining = 0; /* how many bytes have been read from the current - value from total_bytes_per_process */ - int *sorted_file_offsets=NULL, entries_per_aggregator=0; - int bytes_received = 0; - int blocks = 0; - /* iovec structure and count of the buffer passed in */ - uint32_t iov_count = 0; - struct iovec *decoded_iov = NULL; - int iov_index = 0; - size_t current_position = 0; - struct iovec *local_iov_array=NULL, *global_iov_array=NULL; - MPI_Aint *memory_displacements=NULL; - /* global iovec at the readers that contain the iovecs created from - file_set_view */ - uint32_t total_fview_count = 0; - int local_count = 0; - int *fview_count = NULL, *disp_index=NULL, *temp_disp_index=NULL; - int current_index=0, temp_index=0; - int **blocklen_per_process=NULL; - MPI_Aint **displs_per_process=NULL; - char *global_buf = NULL; - MPI_Aint global_count = 0; - mca_io_ompio_local_io_array *file_offsets_for_agg=NULL; - - /* array that contains the sorted indices of the global_iov */ - int *sorted = NULL; - int *displs = NULL; - int dynamic_gen2_num_io_procs; - size_t max_data = 0; - MPI_Aint *total_bytes_per_process = NULL; - ompi_datatype_t **sendtype = NULL; - MPI_Request *send_req = NULL; - MPI_Request recv_req = MPI_REQUEST_NULL; - int my_aggregator =-1; - - int* blocklength_proc = NULL; - ptrdiff_t* displs_proc = NULL; - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; - double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; - double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0; - mca_common_ompio_print_entry nentry; -#endif - - /************************************************************************** - ** 1. In case the data is not contigous in memory, decode it into an iovec - **************************************************************************/ - ret = mca_common_ompio_decode_datatype ((struct ompio_file_t *)fh, - datatype, - count, - buf, - &max_data, - fh->f_mem_convertor, - &decoded_iov, - &iov_count); - if (OMPI_SUCCESS != ret){ - goto exit; - } - - if ( MPI_STATUS_IGNORE != status ) { - status->_ucount = max_data; - } - - dynamic_gen2_num_io_procs = fh->f_get_mca_parameter_value ( "num_aggregators", strlen ("num_aggregators")); - if ( OMPI_ERR_MAX == dynamic_gen2_num_io_procs ) { - ret = OMPI_ERROR; - goto exit; - } - ret = mca_common_ompio_set_aggregator_props ((struct ompio_file_t *) fh, - dynamic_gen2_num_io_procs, - max_data); - if (OMPI_SUCCESS != ret){ - goto exit; - } - my_aggregator = fh->f_procs_in_group[0]; - - /************************************************************************** - ** 2. Determine the total amount of data to be written - **************************************************************************/ - total_bytes_per_process = (MPI_Aint*)malloc(fh->f_procs_per_group*sizeof(MPI_Aint)); - if (NULL == total_bytes_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); -#endif - ret = ompi_fcoll_base_coll_allgather_array (&max_data, - 1, - MPI_LONG, - total_bytes_per_process, - 1, - MPI_LONG, - 0, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - if (OMPI_SUCCESS != ret){ - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; -#endif - - for (i=0 ; if_procs_per_group ; i++) { - total_bytes += total_bytes_per_process[i]; - } - - if (NULL != total_bytes_per_process) { - free (total_bytes_per_process); - total_bytes_per_process = NULL; - } - - /********************************************************************* - *** 3. Generate the File offsets/lengths corresponding to this write - ********************************************************************/ - ret = fh->f_generate_current_file_view ((struct ompio_file_t *) fh, - max_data, - &local_iov_array, - &local_count); - - if (ret != OMPI_SUCCESS){ - goto exit; - } - - /************************************************************* - *** 4. Allgather the File View information at all processes - *************************************************************/ - - fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); - if (NULL == fview_count) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); -#endif - ret = ompi_fcoll_base_coll_allgather_array (&local_count, - 1, - MPI_INT, - fview_count, - 1, - MPI_INT, - 0, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if (OMPI_SUCCESS != ret){ - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; -#endif - - displs = (int*)malloc (fh->f_procs_per_group*sizeof(int)); - if (NULL == displs) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - displs[0] = 0; - total_fview_count = fview_count[0]; - for (i=1 ; if_procs_per_group ; i++) { - total_fview_count += fview_count[i]; - displs[i] = displs[i-1] + fview_count[i-1]; - } - -#if DEBUG_ON - if (my_aggregator == fh->f_rank) { - for (i=0 ; if_procs_per_group ; i++) { - printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", - fh->f_rank, - i, - fview_count[i], - displs[i]); -} -} -#endif - - /* allocate the global iovec */ - if (0 != total_fview_count) { - global_iov_array = (struct iovec*)malloc (total_fview_count * - sizeof(struct iovec)); - if (NULL == global_iov_array) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); -#endif - ret = ompi_fcoll_base_coll_allgatherv_array (local_iov_array, - local_count, - fh->f_iov_type, - global_iov_array, - fview_count, - displs, - fh->f_iov_type, - 0, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if (OMPI_SUCCESS != ret){ - goto exit; - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; -#endif - - /**************************************************************************************** - *** 5. Sort the global offset/lengths list based on the offsets. - *** The result of the sort operation is the 'sorted', an integer array, - *** which contains the indexes of the global_iov_array based on the offset. - *** For example, if global_iov_array[x].offset is followed by global_iov_array[y].offset - *** in the file, and that one is followed by global_iov_array[z].offset, than - *** sorted[0] = x, sorted[1]=y and sorted[2]=z; - ******************************************************************************************/ - if (0 != total_fview_count) { - sorted = (int *)malloc (total_fview_count * sizeof(int)); - if (NULL == sorted) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - ompi_fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted); - } - - if (NULL != local_iov_array) { - free (local_iov_array); - local_iov_array = NULL; - } - -#if DEBUG_ON - if (my_aggregator == fh->f_rank) { - for (i=0 ; if_rank, - global_iov_array[sorted[i]].iov_base, - global_iov_array[sorted[i]].iov_len); - } - } -#endif - - /************************************************************* - *** 6. Determine the number of cycles required to execute this - *** operation - *************************************************************/ - bytes_per_cycle = fh->f_bytes_per_agg; - cycles = ceil((double)total_bytes/bytes_per_cycle); - - if ( my_aggregator == fh->f_rank) { - disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); - if (NULL == disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); - if (NULL == blocklen_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - displs_per_process = (MPI_Aint **)malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); - if (NULL == displs_per_process){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - for (i=0;if_procs_per_group;i++){ - blocklen_per_process[i] = NULL; - displs_per_process[i] = NULL; - } - - send_req = (MPI_Request *) malloc (fh->f_procs_per_group * sizeof(MPI_Request)); - if (NULL == send_req){ - opal_output ( 1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - global_buf = (char *) malloc (bytes_per_cycle); - if (NULL == global_buf){ - opal_output(1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); - if (NULL == sendtype) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - for(l=0;lf_procs_per_group;l++){ - sendtype[l] = MPI_DATATYPE_NULL; - } - } - - - - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rexch = MPI_Wtime(); -#endif - n = 0; - bytes_remaining = 0; - current_index = 0; - - for (index = 0; index < cycles; index++) { - /********************************************************************** - *** 7a. Getting ready for next cycle: initializing and freeing buffers - **********************************************************************/ - if (my_aggregator == fh->f_rank) { - if (NULL != fh->f_io_array) { - free (fh->f_io_array); - fh->f_io_array = NULL; - } - fh->f_num_of_io_entries = 0; - - if (NULL != sendtype){ - for (i =0; i< fh->f_procs_per_group; i++) { - if ( MPI_DATATYPE_NULL != sendtype[i] ) { - ompi_datatype_destroy(&sendtype[i]); - sendtype[i] = MPI_DATATYPE_NULL; - } - } - } - - for(l=0;lf_procs_per_group;l++){ - disp_index[l] = 1; - - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); - if (NULL == blocklen_per_process[l]) { - opal_output (1, "OUT OF MEMORY for blocklen\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); - if (NULL == displs_per_process[l]){ - opal_output (1, "OUT OF MEMORY for displs\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - - if(NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements = NULL; - } - } /* (my_aggregator == fh->f_rank */ - - /************************************************************************** - *** 7b. Determine the number of bytes to be actually read in this cycle - **************************************************************************/ - if (cycles-1 == index) { - bytes_to_read_in_cycle = total_bytes - bytes_per_cycle*index; - } - else { - bytes_to_read_in_cycle = bytes_per_cycle; - } - -#if DEBUG_ON - if (my_aggregator == fh->f_rank) { - printf ("****%d: CYCLE %d Bytes %d**********\n", - fh->f_rank, - index, - bytes_to_write_in_cycle); - } -#endif - - /***************************************************************** - *** 7c. Calculate how much data will be contributed in this cycle - *** by each process - *****************************************************************/ - bytes_received = 0; - - while (bytes_to_read_in_cycle) { - /* This next block identifies which process is the holder - ** of the sorted[current_index] element; - */ - blocks = fview_count[0]; - for (j=0 ; jf_procs_per_group ; j++) { - if (sorted[current_index] < blocks) { - n = j; - break; - } - else { - blocks += fview_count[j+1]; - } - } - - if (bytes_remaining) { - /* Finish up a partially used buffer from the previous cycle */ - if (bytes_remaining <= bytes_to_read_in_cycle) { - /* Data fits completely into the block */ - if (my_aggregator == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; - displs_per_process[n][disp_index[n] - 1] = - (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + - (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); - - blocklen_per_process[n] = (int *) realloc - ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); - displs_per_process[n] = (MPI_Aint *) realloc - ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); - blocklen_per_process[n][disp_index[n]] = 0; - displs_per_process[n][disp_index[n]] = 0; - disp_index[n] += 1; - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += bytes_remaining; - } - current_index ++; - bytes_to_read_in_cycle -= bytes_remaining; - bytes_remaining = 0; - continue; - } - else { - /* the remaining data from the previous cycle is larger than the - bytes_to_write_in_cycle, so we have to segment again */ - if (my_aggregator == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; - displs_per_process[n][disp_index[n] - 1] = - (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + - (global_iov_array[sorted[current_index]].iov_len - - bytes_remaining); - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += bytes_to_read_in_cycle; - } - bytes_remaining -= bytes_to_read_in_cycle; - bytes_to_read_in_cycle = 0; - break; - } - } - else { - /* No partially used entry available, have to start a new one */ - if (bytes_to_read_in_cycle < - (MPI_Aint) global_iov_array[sorted[current_index]].iov_len) { - /* This entry has more data than we can sendin one cycle */ - if (my_aggregator == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; - displs_per_process[n][disp_index[n] - 1] = - (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base ; - } - - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += bytes_to_read_in_cycle; - } - bytes_remaining = global_iov_array[sorted[current_index]].iov_len - - bytes_to_read_in_cycle; - bytes_to_read_in_cycle = 0; - break; - } - else { - /* Next data entry is less than bytes_to_write_in_cycle */ - if (my_aggregator == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = - global_iov_array[sorted[current_index]].iov_len; - displs_per_process[n][disp_index[n] - 1] = (ptrdiff_t) - global_iov_array[sorted[current_index]].iov_base; - blocklen_per_process[n] = - (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); - displs_per_process[n] = (MPI_Aint *)realloc - ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); - blocklen_per_process[n][disp_index[n]] = 0; - displs_per_process[n][disp_index[n]] = 0; - disp_index[n] += 1; - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += - global_iov_array[sorted[current_index]].iov_len; - } - bytes_to_read_in_cycle -= - global_iov_array[sorted[current_index]].iov_len; - current_index ++; - continue; - } - } - } /* end while (bytes_to_read_in_cycle) */ - - /************************************************************************* - *** 7d. Calculate the displacement on where to put the data and allocate - *** the recieve buffer (global_buf) - *************************************************************************/ - if (my_aggregator == fh->f_rank) { - entries_per_aggregator=0; - for (i=0;if_procs_per_group; i++){ - for (j=0;j 0) - entries_per_aggregator++ ; - } - } - if (entries_per_aggregator > 0){ - file_offsets_for_agg = (mca_io_ompio_local_io_array *) - malloc(entries_per_aggregator*sizeof(mca_io_ompio_local_io_array)); - if (NULL == file_offsets_for_agg) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - sorted_file_offsets = (int *) - malloc (entries_per_aggregator*sizeof(int)); - if (NULL == sorted_file_offsets){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - /*Moving file offsets to an IO array!*/ - temp_index = 0; - global_count = 0; - for (i=0;if_procs_per_group; i++){ - for(j=0;j 0){ - file_offsets_for_agg[temp_index].length = - blocklen_per_process[i][j]; - global_count += blocklen_per_process[i][j]; - file_offsets_for_agg[temp_index].process_id = i; - file_offsets_for_agg[temp_index].offset = - displs_per_process[i][j]; - temp_index++; - } - } - } - } - else{ - continue; - } - - /* Sort the displacements for each aggregator */ - read_heap_sort (file_offsets_for_agg, - entries_per_aggregator, - sorted_file_offsets); - - memory_displacements = (MPI_Aint *) malloc - (entries_per_aggregator * sizeof(MPI_Aint)); - memory_displacements[sorted_file_offsets[0]] = 0; - for (i=1; if_io_array = (mca_common_ompio_io_array_t *) malloc - (entries_per_aggregator * sizeof (mca_common_ompio_io_array_t)); - if (NULL == fh->f_io_array) { - opal_output(1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - fh->f_num_of_io_entries = 0; - fh->f_io_array[0].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; - fh->f_io_array[0].length = - file_offsets_for_agg[sorted_file_offsets[0]].length; - fh->f_io_array[0].memory_address = - global_buf+memory_displacements[sorted_file_offsets[0]]; - fh->f_num_of_io_entries++; - for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += - file_offsets_for_agg[sorted_file_offsets[i]].length; - } - else{ - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[i]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[i]]; - fh->f_num_of_io_entries++; - } - } - - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_read_time = MPI_Wtime(); -#endif - - if (fh->f_num_of_io_entries) { - if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { - opal_output (1, "READ FAILED\n"); - ret = OMPI_ERROR; - goto exit; - } - } - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_read_time = MPI_Wtime(); - read_time += end_read_time - start_read_time; -#endif - /********************************************************** - ******************** DONE READING ************************ - *********************************************************/ - - temp_disp_index = (int *)calloc (1, fh->f_procs_per_group * sizeof (int)); - if (NULL == temp_disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - for (i=0; if_procs_per_group;i++){ - size_t datatype_size; - send_req[i] = MPI_REQUEST_NULL; - if ( 0 < disp_index[i] ) { - ompi_datatype_create_hindexed(disp_index[i], - blocklen_per_process[i], - displs_per_process[i], - MPI_BYTE, - &sendtype[i]); - ompi_datatype_commit(&sendtype[i]); - opal_datatype_type_size(&sendtype[i]->super, &datatype_size); - - if(datatype_size) { - ret = MCA_PML_CALL (isend(global_buf, - 1, - sendtype[i], - fh->f_procs_in_group[i], - FCOLL_DYNAMIC_GEN2_SHUFFLE_TAG, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - &send_req[i])); - if(OMPI_SUCCESS != ret){ - goto exit; - } - } - } - } -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; -#endif - } - - /********************************************************** - *** 7f. Scatter the Data from the readers - *********************************************************/ - if(bytes_received) { - size_t remaining = bytes_received; - int block_index = -1; - int blocklength_size = INIT_LEN; - - ptrdiff_t recv_mem_address = 0; - ompi_datatype_t *newType = MPI_DATATYPE_NULL; - - blocklength_proc = (int *) calloc (blocklength_size, sizeof (int)); - displs_proc = (ptrdiff_t *) calloc (blocklength_size, sizeof (ptrdiff_t)); - - if (NULL == blocklength_proc || NULL == displs_proc ) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - while (remaining) { - block_index++; - - if(0 == block_index) { - recv_mem_address = (ptrdiff_t) (decoded_iov[iov_index].iov_base) + current_position; - } - else { - // Reallocate more memory if blocklength_size is not enough - if(0 == block_index % INIT_LEN) { - blocklength_size += INIT_LEN; - blocklength_proc = (int *) realloc(blocklength_proc, blocklength_size * sizeof(int)); - displs_proc = (ptrdiff_t *) realloc(displs_proc, blocklength_size * sizeof(ptrdiff_t)); - } - displs_proc[block_index] = (ptrdiff_t) (decoded_iov[iov_index].iov_base) + - current_position - recv_mem_address; - } - - if (remaining >= (decoded_iov[iov_index].iov_len - current_position)) { - blocklength_proc[block_index] = decoded_iov[iov_index].iov_len - current_position; - - remaining = remaining - blocklength_proc[block_index]; - iov_index = iov_index + 1; - current_position = 0; - } - else { - blocklength_proc[block_index] = remaining; - current_position += remaining; - remaining = 0; - } - } - - ompi_datatype_create_hindexed(block_index+1, - blocklength_proc, - displs_proc, - MPI_BYTE, - &newType); - ompi_datatype_commit(&newType); - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); -#endif - ret = MCA_PML_CALL(irecv((char *)recv_mem_address, - 1, - newType, - my_aggregator, - FCOLL_DYNAMIC_GEN2_SHUFFLE_TAG, - fh->f_comm, - &recv_req)); - - if ( MPI_DATATYPE_NULL != newType ) { - ompi_datatype_destroy(&newType); - } - - if (OMPI_SUCCESS != ret){ - goto exit; - } - } - - if (my_aggregator == fh->f_rank){ - ret = ompi_request_wait_all (fh->f_procs_per_group, - send_req, - MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } - } - - ret = ompi_request_wait (&recv_req, MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - if(bytes_received) { - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; - } -#endif - } /* end for (index=0; index < cycles; index ++) */ - -#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN - end_rexch = MPI_Wtime(); - read_exch += end_rexch - start_rexch; - nentry.time[0] = read_time; - nentry.time[1] = rcomm_time; - nentry.time[2] = read_exch; - if (my_aggregator == fh->f_rank) - nentry.aggregator = 1; - else - nentry.aggregator = 0; - nentry.nprocs_for_coll = dynamic_gen2_num_io_procs; - if (!mca_common_ompio_full_print_queue(fh->f_coll_read_time)){ - mca_common_ompio_register_print_entry(fh->f_coll_read_time, - nentry); - } -#endif - -exit: - if (NULL != global_buf) { - free (global_buf); - global_buf = NULL; - } - if (NULL != sorted) { - free (sorted); - sorted = NULL; - } - if (NULL != global_iov_array) { - free (global_iov_array); - global_iov_array = NULL; - } - if (NULL != fview_count) { - free (fview_count); - fview_count = NULL; - } - if (NULL != decoded_iov) { - free (decoded_iov); - decoded_iov = NULL; - } - if (NULL != local_iov_array){ - free(local_iov_array); - local_iov_array=NULL; - } - - if (NULL != displs) { - free (displs); - displs = NULL; - } - - if (NULL != blocklength_proc) { - free (blocklength_proc); - blocklength_proc = NULL; - } - - if (NULL != displs_proc) { - free (displs_proc); - displs_proc = NULL; - } - - if (my_aggregator == fh->f_rank) { - - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - if (NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements= NULL; - } - if (NULL != sendtype){ - for (i = 0; i < fh->f_procs_per_group; i++) { - if ( MPI_DATATYPE_NULL != sendtype[i] ) { - ompi_datatype_destroy(&sendtype[i]); - } - } - free(sendtype); - sendtype=NULL; - } - - if (NULL != disp_index){ - free(disp_index); - disp_index = NULL; - } - - if ( NULL != blocklen_per_process){ - for(l=0;lf_procs_per_group;l++){ - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - } - - free(blocklen_per_process); - blocklen_per_process = NULL; - } - - if (NULL != displs_per_process){ - for (l=0; if_procs_per_group; l++){ - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - } - free(displs_per_process); - displs_per_process = NULL; - } - if ( NULL != send_req ) { - free ( send_req ); - send_req = NULL; - } - } - return ret; -} - - -static int read_heap_sort (mca_io_ompio_local_io_array *io_array, - int num_entries, - int *sorted) -{ - int i = 0; - int j = 0; - int left = 0; - int right = 0; - int largest = 0; - int heap_size = num_entries - 1; - int temp = 0; - unsigned char done = 0; - int* temp_arr = NULL; - - temp_arr = (int*)malloc(num_entries*sizeof(int)); - if (NULL == temp_arr) { - opal_output (1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - temp_arr[0] = 0; - for (i = 1; i < num_entries; ++i) { - temp_arr[i] = i; - } - /* num_entries can be a large no. so NO RECURSION */ - for (i = num_entries/2-1 ; i>=0 ; i--) { - done = 0; - j = i; - largest = j; - - while (!done) { - left = j*2+1; - right = j*2+2; - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { - largest = left; - } - else { - largest = j; - } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > - io_array[temp_arr[largest]].offset)) { - largest = right; - } - if (largest != j) { - temp = temp_arr[largest]; - temp_arr[largest] = temp_arr[j]; - temp_arr[j] = temp; - j = largest; - } - else { - done = 1; - } - } - } - - for (i = num_entries-1; i >=1; --i) { - temp = temp_arr[0]; - temp_arr[0] = temp_arr[i]; - temp_arr[i] = temp; - heap_size--; - done = 0; - j = 0; - largest = j; - - while (!done) { - left = j*2+1; - right = j*2+2; - - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > - io_array[temp_arr[j]].offset)) { - largest = left; - } - else { - largest = j; - } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > - io_array[temp_arr[largest]].offset)) { - largest = right; - } - if (largest != j) { - temp = temp_arr[largest]; - temp_arr[largest] = temp_arr[j]; - temp_arr[j] = temp; - j = largest; - } - else { - done = 1; - } - } - sorted[i] = temp_arr[i]; - } - sorted[0] = temp_arr[0]; - - if (NULL != temp_arr) { - free(temp_arr); - temp_arr = NULL; - } - return OMPI_SUCCESS; -} - - - diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c index c3786cf3dfd..ac51b0e212a 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2008-2022 University of Houston. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -37,7 +37,7 @@ static mca_fcoll_base_module_1_0_0_t dynamic_gen2 = { mca_fcoll_dynamic_gen2_module_init, mca_fcoll_dynamic_gen2_module_finalize, - mca_fcoll_dynamic_gen2_file_read_all, + mca_fcoll_base_file_read_all, NULL, /* iread_all */ mca_fcoll_dynamic_gen2_file_write_all, NULL, /*iwrite_all */ diff --git a/ompi/mca/fcoll/vulcan/Makefile.am b/ompi/mca/fcoll/vulcan/Makefile.am index 63f1ad9da23..5406124c958 100644 --- a/ompi/mca/fcoll/vulcan/Makefile.am +++ b/ompi/mca/fcoll/vulcan/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008-2018 University of Houston. All rights reserved. +# Copyright (c) 2008-2022 University of Houston. All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. @@ -24,7 +24,6 @@ sources = \ fcoll_vulcan.h \ fcoll_vulcan_module.c \ fcoll_vulcan_component.c \ - fcoll_vulcan_file_read_all.c \ fcoll_vulcan_file_write_all.c # Make the output library in this directory, and name it either diff --git a/ompi/mca/fcoll/vulcan/fcoll_vulcan.h b/ompi/mca/fcoll/vulcan/fcoll_vulcan.h index 46c1f471634..12c278bbeb5 100644 --- a/ompi/mca/fcoll/vulcan/fcoll_vulcan.h +++ b/ompi/mca/fcoll/vulcan/fcoll_vulcan.h @@ -56,13 +56,6 @@ int mca_fcoll_vulcan_component_file_unquery (ompio_file_t *file); int mca_fcoll_vulcan_module_init (ompio_file_t *file); int mca_fcoll_vulcan_module_finalize (ompio_file_t *file); -int mca_fcoll_vulcan_file_read_all (ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t * status); - - int mca_fcoll_vulcan_file_write_all (ompio_file_t *fh, const void *buf, int count, diff --git a/ompi/mca/fcoll/vulcan/fcoll_vulcan_module.c b/ompi/mca/fcoll/vulcan/fcoll_vulcan_module.c index f3f847b9fcf..e1b7152d963 100644 --- a/ompi/mca/fcoll/vulcan/fcoll_vulcan_module.c +++ b/ompi/mca/fcoll/vulcan/fcoll_vulcan_module.c @@ -37,7 +37,7 @@ static mca_fcoll_base_module_1_0_0_t vulcan = { mca_fcoll_vulcan_module_init, mca_fcoll_vulcan_module_finalize, - mca_fcoll_vulcan_file_read_all, + mca_fcoll_base_file_read_all, NULL, /* iread_all */ mca_fcoll_vulcan_file_write_all, NULL, /*iwrite_all */ diff --git a/ompi/mca/fs/base/Makefile.am b/ompi/mca/fs/base/Makefile.am index 215c8cdc28d..9ed1e563456 100644 --- a/ompi/mca/fs/base/Makefile.am +++ b/ompi/mca/fs/base/Makefile.am @@ -22,12 +22,16 @@ headers += \ libmca_fs_la_SOURCES += \ base/fs_base_frame.c \ + base/fs_base_find_available.c + +if OMPI_OMPIO_SUPPORT +libmca_fs_la_SOURCES += \ base/fs_base_file_select.c \ base/fs_base_file_unselect.c \ - base/fs_base_find_available.c \ base/fs_base_get_parent_dir.c \ base/fs_base_file_close.c \ base/fs_base_file_sync.c \ base/fs_base_file_delete.c \ base/fs_base_file_set_size.c \ base/fs_base_file_get_size.c +endif diff --git a/ompi/mca/fs/ime/fs_ime.h b/ompi/mca/fs/ime/fs_ime.h index 170f03a06e2..02a98773eac 100644 --- a/ompi/mca/fs/ime/fs_ime.h +++ b/ompi/mca/fs/ime/fs_ime.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2018 DataDirect Networks. All rights reserved. + * Copyright (c) 2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +35,7 @@ int mca_fs_ime_component_file_unquery (ompio_file_t *file); int mca_fs_ime_module_init (ompio_file_t *file); int mca_fs_ime_module_finalize (ompio_file_t *file); -int mca_fs_ime_native_fini(); +int mca_fs_ime_native_fini(void); OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_ime_component; /* diff --git a/ompi/mca/fs/ime/fs_ime_component.c b/ompi/mca/fs/ime/fs_ime_component.c index ea4e06de96a..5b6febff964 100644 --- a/ompi/mca/fs/ime/fs_ime_component.c +++ b/ompi/mca/fs/ime/fs_ime_component.c @@ -1,6 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2018 DataDirect Networks. All rights reserved. + * Copyright (c) 2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,6 +46,7 @@ mca_fs_base_component_2_0_0_t mca_fs_ime_component = { MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION), .mca_register_component_params = register_component, + .mca_close_component = mca_fs_ime_native_fini, }, .fsm_data = { /* This component is checkpointable */ diff --git a/ompi/mca/hook/comm_method/hook_comm_method_fns.c b/ompi/mca/hook/comm_method/hook_comm_method_fns.c index 42762884dce..ae71b1cbefa 100644 --- a/ompi/mca/hook/comm_method/hook_comm_method_fns.c +++ b/ompi/mca/hook/comm_method/hook_comm_method_fns.c @@ -22,15 +22,9 @@ #include "ompi/mca/bml/base/base.h" #include "ompi/mca/mtl/base/base.h" -// In regular strncpy up to n bytes are copied, so if the 'to' buffer -// was char string[16] and you called strncpy(string, , 16) you could -// get 16 bytes of chars without a null. My preferred API is to let -// n be the size of the buffer, and to let n-1 chars be copied, and -// to guarantee null termination. static void mystrncpy(char *to, const char *from, int n) { - strncpy(to, from, n-1); - to[n-1] = 0; + snprintf(to, n, "%s", from); } // For converting comm_method strings to comm_method id# and back. @@ -551,13 +545,17 @@ ompi_report_comm_methods(int called_from_location) // 1 = from init, 2 = from fi fp = fopen(mca_hook_comm_method_fakefile, "r"); for (i=0; ic_index] = comm->c_index % + ompi_mtl_ofi.total_ctxts_used; + if (!ompi_mtl_ofi.threshold_comm_context_id) { + ompi_mtl_ofi.threshold_comm_context_id = comm->c_index; + + opal_show_help("help-mtl-ofi.txt", "SEP thread grouping ctxt limit", true, ctxt_id, + ompi_process_info.nodename, __FILE__, __LINE__); + } + + return OMPI_SUCCESS; + } + + /* Init context info for Scalable EPs */ + ret = fi_tx_context(ompi_mtl_ofi.sep, ctxt_id, NULL, &ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep, NULL); + if (ret) { + MTL_OFI_LOG_FI_ERR(ret, "fi_tx_context failed"); + goto init_error; + } + + ret = fi_rx_context(ompi_mtl_ofi.sep, ctxt_id, NULL, &ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, NULL); + if (ret) { + MTL_OFI_LOG_FI_ERR(ret, "fi_rx_context failed"); + goto init_error; + } + + ret = fi_cq_open(ompi_mtl_ofi.domain, &cq_attr, &ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, NULL); + if (ret) { + MTL_OFI_LOG_FI_ERR(ret, "fi_cq_open failed"); + goto init_error; + } + + /* Bind CQ to TX/RX context object */ + ret = fi_ep_bind(ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep, (fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, + FI_TRANSMIT | FI_SELECTIVE_COMPLETION); + if (0 != ret) { + MTL_OFI_LOG_FI_ERR(ret, "fi_bind CQ-EP (FI_TRANSMIT) failed"); + goto init_error; + } + + ret = fi_ep_bind(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, (fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, + FI_RECV | FI_SELECTIVE_COMPLETION); + if (0 != ret) { + MTL_OFI_LOG_FI_ERR(ret, "fi_bind CQ-EP (FI_RECV) failed"); + goto init_error; + } + + /* Enable Endpoint for communication. This commits the bind operations */ + ret = fi_enable(ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep); + if (0 != ret) { + MTL_OFI_LOG_FI_ERR(ret, "fi_enable (send context) failed"); + goto init_error; + } + + ret = fi_enable(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep); + if (0 != ret) { + MTL_OFI_LOG_FI_ERR(ret, "fi_enable (recv context) failed"); + goto init_error; + } + +init_regular_ep: + /* Initialize per-context lock */ + OBJ_CONSTRUCT(&ompi_mtl_ofi.ofi_ctxt[ctxt_id].context_lock, opal_mutex_t); + + if (!ompi_mtl_ofi.is_initialized) { + ret = opal_progress_register(ompi_mtl_ofi_progress_no_inline); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, opal_common_ofi.output, + "%s:%d: opal_progress_register failed: %d\n", + __FILE__, __LINE__, ret); + goto init_error; + } + } + + ompi_mtl_ofi.comm_to_context[comm->c_index] = ompi_mtl_ofi.total_ctxts_used; + ompi_mtl_ofi.total_ctxts_used++; + + return OMPI_SUCCESS; + +init_error: + if (ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep) { + (void) fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep); + } + + if (ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep) { + (void) fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep); + } + + if (ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq) { + (void) fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq); + } + + return ret; +} + +static int ompi_mtl_ofi_finalize_contexts(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm, + mca_mtl_ofi_ep_type ep_type) +{ + int ret = OMPI_SUCCESS, ctxt_id = 0; + + if (OFI_REGULAR_EP == ep_type) { + /* For regular EPs, simply destruct Lock object and exit */ + goto finalize_regular_ep; + } + + if (ompi_mtl_ofi.thread_grouping && + ompi_mtl_ofi.threshold_comm_context_id && + ((uint32_t) ompi_mtl_ofi.threshold_comm_context_id <= comm->c_index)) { + return OMPI_SUCCESS; + } + + ctxt_id = ompi_mtl_ofi.thread_grouping ? + ompi_mtl_ofi.comm_to_context[comm->c_index] : 0; + + /* + * For regular EPs, TX/RX contexts are aliased to SEP object which is + * closed in ompi_mtl_ofi_finalize(). So, skip handling those here. + */ + if ((ret = fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep))) { + goto finalize_err; + } + + if ((ret = fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep))) { + goto finalize_err; + } + + if ((ret = fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq))) { + goto finalize_err; + } + +finalize_regular_ep: + /* Destroy context lock */ + OBJ_DESTRUCT(&ompi_mtl_ofi.ofi_ctxt[ctxt_id].context_lock); + + return OMPI_SUCCESS; + +finalize_err: + opal_show_help("help-mtl-ofi.txt", "OFI call fail", true, + "fi_close", + ompi_process_info.nodename, __FILE__, __LINE__, + fi_strerror(-ret), ret); + + return OMPI_ERROR; +} + int ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, @@ -182,3 +358,91 @@ ompi_mtl_ofi_del_procs(struct mca_mtl_base_module_t *mtl, return OMPI_SUCCESS; } + +int ompi_mtl_ofi_add_comm(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm) +{ + int ret; + uint32_t comm_size; + mca_mtl_comm_t* mtl_comm = OBJ_NEW(mca_mtl_comm_t); + + mca_mtl_ofi_ep_type ep_type = (0 == ompi_mtl_ofi.enable_sep) ? + OFI_REGULAR_EP : OFI_SCALABLE_EP; + + if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + if (OMPI_COMM_IS_INTER(comm)) { + comm_size = ompi_comm_remote_size(comm); + } else { + comm_size = ompi_comm_size(comm); + } + mtl_comm->c_index_vec = (c_index_vec_t *)malloc(sizeof(c_index_vec_t) * comm_size); + if (NULL == mtl_comm->c_index_vec) { + OBJ_RELEASE(mtl_comm); + goto error; + } else { + for (uint32_t i=0; i < comm_size; i++) { + mtl_comm->c_index_vec[i].c_index_state = 2; + } + } + if (OMPI_COMM_IS_INTRA(comm)) { + mtl_comm->c_index_vec[comm->c_my_rank].c_index = comm->c_index; + mtl_comm->c_index_vec[comm->c_my_rank].c_index_state = 0; + } + + comm->c_mtl_comm = mtl_comm; + + } else { + + comm->c_mtl_comm = NULL; + + } + + /* + * If thread grouping enabled, add new OFI context for each communicator + * other than MPI_COMM_SELF. + */ + if ((ompi_mtl_ofi.thread_grouping && (MPI_COMM_SELF != comm)) || + /* If no thread grouping, add new OFI context only + * for MPI_COMM_WORLD. + */ + (!ompi_mtl_ofi.thread_grouping && (!ompi_mtl_ofi.is_initialized))) { + + ret = ompi_mtl_ofi_init_contexts(mtl, comm, ep_type); + ompi_mtl_ofi.is_initialized = true; + + if (OMPI_SUCCESS != ret) { + goto error; + } + } + + return OMPI_SUCCESS; + +error: + return OMPI_ERROR; +} + +int ompi_mtl_ofi_del_comm(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm) +{ + int ret = OMPI_SUCCESS; + mca_mtl_ofi_ep_type ep_type = (0 == ompi_mtl_ofi.enable_sep) ? + OFI_REGULAR_EP : OFI_SCALABLE_EP; + + if(NULL != comm->c_mtl_comm) { + free(comm->c_mtl_comm->c_index_vec); + OBJ_RELEASE(comm->c_mtl_comm); + comm->c_mtl_comm = NULL; + } + + /* + * Clean up OFI contexts information. + */ + if ((ompi_mtl_ofi.thread_grouping && (MPI_COMM_SELF != comm)) || + (!ompi_mtl_ofi.thread_grouping && (MPI_COMM_WORLD == comm))) { + + ret = ompi_mtl_ofi_finalize_contexts(mtl, comm, ep_type); + } + + return ret; +} + diff --git a/ompi/mca/mtl/ofi/mtl_ofi.h b/ompi/mca/mtl/ofi/mtl_ofi.h index 6626c754a97..e122663db07 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.h +++ b/ompi/mca/mtl/ofi/mtl_ofi.h @@ -2,11 +2,14 @@ * Copyright (c) 2013-2018 Intel, Inc. All rights reserved * Copyright (c) 2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. All rights + * Copyright (c) 2019-2021 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2018-2020 Amazon.com, Inc. or its affiliates. All rights * reserved. - * + * Copyright (c) 2021 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2021 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -65,19 +68,50 @@ extern int ompi_mtl_ofi_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t **procs); +extern int ompi_mtl_ofi_add_comm(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm); +extern int ompi_mtl_ofi_del_comm(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm); + int ompi_mtl_ofi_progress_no_inline(void); #if OPAL_HAVE_THREAD_LOCAL -extern opal_thread_local int per_thread_ctx; -extern opal_thread_local struct fi_cq_tagged_entry wc[MTL_OFI_MAX_PROG_EVENT_COUNT]; +extern opal_thread_local int ompi_mtl_ofi_per_thread_ctx; +extern opal_thread_local struct fi_cq_tagged_entry ompi_mtl_ofi_wc[MTL_OFI_MAX_PROG_EVENT_COUNT]; #endif +#define MCA_MTL_OFI_CID_NOT_EXCHANGED 2 +#define MCA_MTL_OFI_CID_EXCHANGING 1 +#define MCA_MTL_OFI_CID_EXCHANGED 0 + +typedef struct { + uint32_t c_index:30; + uint32_t c_index_state:2; +} c_index_vec_t; + +typedef struct mca_mtl_comm_t { + opal_object_t super; + c_index_vec_t *c_index_vec; +} mca_mtl_comm_t; + +OBJ_CLASS_DECLARATION(mca_mtl_comm_t); + +struct mca_mtl_ofi_cid_hdr_t { + ompi_comm_extended_cid_t hdr_cid; + int16_t hdr_src_c_index; + int32_t hdr_src; + bool need_response; + bool ofi_cq_data; +}; + +typedef struct mca_mtl_ofi_cid_hdr_t mca_mtl_ofi_cid_hdr_t; + /* Set OFI context for operations which generate completion events */ __opal_attribute_always_inline__ static inline void set_thread_context(int ctxt) { #if OPAL_HAVE_THREAD_LOCAL - per_thread_ctx = ctxt; + ompi_mtl_ofi_per_thread_ctx = ctxt; return; #endif } @@ -87,7 +121,7 @@ __opal_attribute_always_inline__ static inline void get_thread_context(int *ctxt) { #if OPAL_HAVE_THREAD_LOCAL - *ctxt = per_thread_ctx; + *ctxt = ompi_mtl_ofi_per_thread_ctx; #endif return; } @@ -106,7 +140,7 @@ ompi_mtl_ofi_context_progress(int ctxt_id) struct fi_cq_err_entry error = { 0 }; ssize_t ret; #if !OPAL_HAVE_THREAD_LOCAL - struct fi_cq_tagged_entry wc[MTL_OFI_MAX_PROG_EVENT_COUNT]; + struct fi_cq_tagged_entry ompi_mtl_ofi_wc[MTL_OFI_MAX_PROG_EVENT_COUNT]; #endif /** @@ -114,16 +148,16 @@ ompi_mtl_ofi_context_progress(int ctxt_id) * From the completion's op_context, we get the associated OFI request. * Call the request's callback. */ - ret = fi_cq_read(ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, (void *)&wc, + ret = fi_cq_read(ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, (void *)&ompi_mtl_ofi_wc, ompi_mtl_ofi.ofi_progress_event_count); if (ret > 0) { count+= ret; events_read = ret; for (i = 0; i < events_read; i++) { - if (NULL != wc[i].op_context) { - ofi_req = TO_OFI_REQ(wc[i].op_context); + if (NULL != ompi_mtl_ofi_wc[i].op_context) { + ofi_req = TO_OFI_REQ(ompi_mtl_ofi_wc[i].op_context); assert(ofi_req); - ret = ofi_req->event_callback(&wc[i], ofi_req); + ret = ofi_req->event_callback(&ompi_mtl_ofi_wc[i], ofi_req); if (OMPI_SUCCESS != ret) { opal_output(0, "%s:%d: Error returned by request event callback: %zd.\n" "*** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n", @@ -446,6 +480,135 @@ ompi_mtl_ofi_map_comm_to_ctxt(uint32_t comm_id) return ompi_mtl_ofi.comm_to_context[comm_id]; } +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_post_recv_excid_buffer(bool blocking, struct ompi_communicator_t *comm, int src); + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_send_excid(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm, + int dest, + bool ofi_cq_data, + bool is_send); + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_recv_excid_error_callback(struct fi_cq_err_entry *error, + ompi_mtl_ofi_request_t *ofi_req) +{ + ompi_status_public_t *status; + assert(ofi_req->super.ompi_req); + status = &ofi_req->super.ompi_req->req_status; + status->MPI_TAG = MTL_OFI_GET_TAG(ofi_req->match_bits); + status->MPI_SOURCE = mtl_ofi_get_source((struct fi_cq_tagged_entry *) error); + + switch (error->err) { + case FI_ETRUNC: + status->MPI_ERROR = MPI_ERR_TRUNCATE; + break; + case FI_ECANCELED: + status->_cancelled = true; + break; + default: + status->MPI_ERROR = MPI_ERR_INTERN; + } + + ofi_req->super.completion_callback(&ofi_req->super); + return OMPI_SUCCESS; +} + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_post_recv_excid_buffer_callback(struct fi_cq_tagged_entry *wc, + ompi_mtl_ofi_request_t *ofi_req) +{ + ofi_req->completion_count--; + int ret; + mca_mtl_ofi_cid_hdr_t *buffer = (mca_mtl_ofi_cid_hdr_t *)ofi_req->buffer; + ompi_comm_extended_cid_t excid; + ompi_communicator_t *comm; + int src = buffer->hdr_src; + mca_mtl_comm_t *mtl_comm; + + excid.cid_base = buffer->hdr_cid.cid_base; + excid.cid_sub.u64 = buffer->hdr_cid.cid_sub.u64; + for (int i = 0; i < 8; i++) { + excid.cid_sub.u8[i] = buffer->hdr_cid.cid_sub.u8[i]; + } + + comm = ompi_comm_lookup_cid(excid); + if (comm == NULL) { + comm = ompi_comm_lookup(buffer->hdr_src_c_index); + } + + if (comm == NULL) { + return OMPI_SUCCESS; + } + + mtl_comm = comm->c_mtl_comm; + + if (mtl_comm->c_index_vec[src].c_index_state == MCA_MTL_OFI_CID_NOT_EXCHANGED + && buffer->need_response) { + mtl_comm->c_index_vec[src].c_index = buffer->hdr_src_c_index; + mtl_comm->c_index_vec[src].c_index_state = MCA_MTL_OFI_CID_EXCHANGED; + ret = ompi_mtl_ofi_send_excid(ofi_req->mtl, comm, src, buffer->ofi_cq_data, false); + } else { + mtl_comm->c_index_vec[src].c_index_state = MCA_MTL_OFI_CID_EXCHANGED; + mtl_comm->c_index_vec[src].c_index = buffer->hdr_src_c_index; + } + + ret = ompi_mtl_ofi_post_recv_excid_buffer(false, comm, -1); + return ret; +} + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_post_recv_excid_buffer(bool blocking, struct ompi_communicator_t *comm, int src) +{ + int ctxt_id = 0; + ssize_t ret; + ompi_mtl_ofi_request_t *ofi_req = malloc(sizeof(ompi_mtl_ofi_request_t)); + mca_mtl_ofi_cid_hdr_t *start = malloc(sizeof(mca_mtl_ofi_cid_hdr_t)); + size_t length = sizeof(mca_mtl_ofi_cid_hdr_t); + mca_mtl_comm_t *mtl_comm; + + mtl_comm = comm->c_mtl_comm; + + set_thread_context(ctxt_id); + + ofi_req->type = OMPI_MTL_OFI_RECV; + ofi_req->event_callback = ompi_mtl_ofi_post_recv_excid_buffer_callback; + ofi_req->error_callback = ompi_mtl_ofi_recv_excid_error_callback; + ofi_req->buffer = start; + ofi_req->length = length; + ofi_req->convertor = NULL; + ofi_req->req_started = false; + ofi_req->status.MPI_ERROR = OMPI_SUCCESS; + ofi_req->remote_addr = 0UL; + ofi_req->match_bits = 0UL; + ofi_req->completion_count = 1; + ofi_req->comm = comm; + + MTL_OFI_RETRY_UNTIL_DONE(fi_recv(ompi_mtl_ofi.ofi_ctxt[0].rx_ep, + start, + length, + NULL, + FI_ADDR_UNSPEC, + (void *)&ofi_req->ctx), ret); + if (OPAL_UNLIKELY(0 > ret)) { + if (NULL != ofi_req->buffer) { + free(ofi_req->buffer); + } + MTL_OFI_LOG_FI_ERR(ret, "fi_recv failed"); + return ompi_mtl_ofi_get_error(ret); + } + + if (blocking) { + assert(src != -1); + while (mtl_comm->c_index_vec[src].c_index_state > MCA_MTL_OFI_CID_EXCHANGED) { + ompi_mtl_ofi_progress(); + } + } + + return OMPI_SUCCESS; +} + __opal_attribute_always_inline__ static inline int ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req, struct ompi_communicator_t *comm, @@ -458,7 +621,11 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req, ssize_t ret = OMPI_SUCCESS; int ctxt_id = 0; - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } set_thread_context(ctxt_id); ack_req = malloc(sizeof(ompi_mtl_ofi_request_t)); @@ -491,18 +658,125 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req, return OMPI_SUCCESS; } -__opal_attribute_always_inline__ static inline int -ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl, +static int +ompi_mtl_ofi_send_excid(struct mca_mtl_base_module_t *mtl, struct ompi_communicator_t *comm, int dest, - int tag, - struct opal_convertor_t *convertor, - mca_pml_base_send_mode_t mode, - bool ofi_cq_data) + bool ofi_cq_data, + bool is_send) +{ + ssize_t ret = OMPI_SUCCESS; + ompi_mtl_ofi_request_t *ofi_req = malloc(sizeof(ompi_mtl_ofi_request_t)); + int ctxt_id = 0; + mca_mtl_ofi_cid_hdr_t *start = malloc(sizeof(mca_mtl_ofi_cid_hdr_t)); + ompi_proc_t *ompi_proc = NULL; + mca_mtl_ofi_endpoint_t *endpoint = NULL; + fi_addr_t sep_peer_fiaddr = 0; + mca_mtl_comm_t *mtl_comm; + + mtl_comm = comm->c_mtl_comm; + + ctxt_id = 0; + set_thread_context(ctxt_id); + + /** + * Create a send request, start it and wait until it completes. + */ + ofi_req->event_callback = ompi_mtl_ofi_send_callback; + ofi_req->error_callback = ompi_mtl_ofi_send_error_callback; + + ompi_proc = ompi_comm_peer_lookup(comm, dest); + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); + + /* For Scalable Endpoints, gather target receive context */ + sep_peer_fiaddr = fi_rx_addr(endpoint->peer_fiaddr, ctxt_id, ompi_mtl_ofi.rx_ctx_bits); + + start->hdr_cid = comm->c_contextid; + start->hdr_src = comm->c_my_rank; + start->hdr_src_c_index = comm->c_index; + start->ofi_cq_data = ofi_cq_data; + if (mtl_comm->c_index_vec[dest].c_index_state > MCA_MTL_OFI_CID_EXCHANGED) { + start->need_response = true; + } else { + start->need_response = false; + } + size_t length = sizeof(mca_mtl_ofi_cid_hdr_t); + + ofi_req->length = length; + ofi_req->status.MPI_ERROR = OMPI_SUCCESS; + ofi_req->completion_count = 0; + if (OPAL_UNLIKELY(length > endpoint->mtl_ofi_module->max_msg_size)) { + opal_show_help("help-mtl-ofi.txt", + "message too big", false, + length, endpoint->mtl_ofi_module->max_msg_size); + return OMPI_ERROR; + } + + if (OPAL_UNLIKELY(ofi_req->status.MPI_ERROR != OMPI_SUCCESS)) + return ofi_req->status.MPI_ERROR; + + if (ompi_mtl_ofi.max_inject_size >= length) { + if (ofi_cq_data) { + MTL_OFI_RETRY_UNTIL_DONE(fi_injectdata(ompi_mtl_ofi.ofi_ctxt[0].tx_ep, + start, + length, + comm->c_my_rank, + sep_peer_fiaddr), ret); + } else { + MTL_OFI_RETRY_UNTIL_DONE(fi_inject(ompi_mtl_ofi.ofi_ctxt[0].tx_ep, + start, + length, + sep_peer_fiaddr), ret); + } + if (OPAL_UNLIKELY(0 > ret)) { + MTL_OFI_LOG_FI_ERR(ret, + ofi_cq_data ? "fi_injectdata failed" + : "fi_inject failed"); + + ofi_req->status.MPI_ERROR = ompi_mtl_ofi_get_error(ret); + return ofi_req->status.MPI_ERROR; + } + } else { + ofi_req->completion_count = 1; + if (ofi_cq_data) { + MTL_OFI_RETRY_UNTIL_DONE(fi_senddata(ompi_mtl_ofi.ofi_ctxt[0].tx_ep, + start, + length, + NULL, + comm->c_my_rank, + sep_peer_fiaddr, + (void *) &ofi_req->ctx), ret); + } else { + MTL_OFI_RETRY_UNTIL_DONE(fi_send(ompi_mtl_ofi.ofi_ctxt[0].tx_ep, + start, + length, + NULL, + sep_peer_fiaddr, + (void *) &ofi_req->ctx), ret); + } + if (OPAL_UNLIKELY(0 > ret)) { + MTL_OFI_LOG_FI_ERR(ret, + ofi_cq_data ? "fi_tsenddata failed" + : "fi_tsend failed"); + ofi_req->status.MPI_ERROR = ompi_mtl_ofi_get_error(ret); + } + } + + return ofi_req->status.MPI_ERROR; +} + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm, + int dest, + int tag, + struct opal_convertor_t *convertor, + mca_pml_base_send_mode_t mode, + bool ofi_cq_data) { ssize_t ret = OMPI_SUCCESS; ompi_mtl_ofi_request_t ofi_req; - int ompi_ret, ctxt_id = 0; + int ompi_ret, ctxt_id = 0, c_index_for_tag; void *start; bool free_after; size_t length; @@ -512,10 +786,32 @@ ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl, ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */ fi_addr_t src_addr = 0; fi_addr_t sep_peer_fiaddr = 0; + mca_mtl_comm_t *mtl_comm; + + if (OPAL_LIKELY(OMPI_COMM_IS_GLOBAL_INDEX(comm))) { + c_index_for_tag = comm->c_index; + } else { + mtl_comm = comm->c_mtl_comm; + if (mtl_comm->c_index_vec[dest].c_index_state == MCA_MTL_OFI_CID_NOT_EXCHANGED) { + mtl_comm->c_index_vec[dest].c_index_state = MCA_MTL_OFI_CID_EXCHANGING; + ompi_ret = ompi_mtl_ofi_send_excid(mtl, comm, dest, ofi_cq_data, true); + } + + if (mtl_comm->c_index_vec[dest].c_index_state > MCA_MTL_OFI_CID_EXCHANGED) { + while (mtl_comm->c_index_vec[dest].c_index_state > MCA_MTL_OFI_CID_EXCHANGED) { + ompi_ret = ompi_mtl_ofi_post_recv_excid_buffer(true, comm, dest); + } + } + c_index_for_tag = mtl_comm->c_index_vec[dest].c_index; + } ompi_mtl_ofi_set_mr_null(&ofi_req); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); set_thread_context(ctxt_id); /** @@ -548,10 +844,10 @@ ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl, } if (ofi_cq_data) { - match_bits = mtl_ofi_create_send_tag_CQD(comm->c_contextid, tag); + match_bits = mtl_ofi_create_send_tag_CQD(c_index_for_tag, tag); src_addr = sep_peer_fiaddr; } else { - match_bits = mtl_ofi_create_send_tag(comm->c_contextid, + match_bits = mtl_ofi_create_send_tag(c_index_for_tag, comm->c_my_rank, tag); /* src_addr is ignored when FI_DIRECTED_RECV is not supported */ } @@ -659,7 +955,7 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl, { ssize_t ret = OMPI_SUCCESS; ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t *) mtl_request; - int ompi_ret, ctxt_id = 0; + int ompi_ret, ctxt_id = 0, c_index_for_tag; void *start; size_t length; bool free_after; @@ -668,10 +964,31 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl, mca_mtl_ofi_endpoint_t *endpoint = NULL; ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */ fi_addr_t sep_peer_fiaddr = 0; + mca_mtl_comm_t *mtl_comm; ompi_mtl_ofi_set_mr_null(ofi_req); - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + c_index_for_tag = comm->c_index; + } else { + mtl_comm = comm->c_mtl_comm; + if (mtl_comm->c_index_vec[dest].c_index_state == MCA_MTL_OFI_CID_NOT_EXCHANGED) { + mtl_comm->c_index_vec[dest].c_index_state = MCA_MTL_OFI_CID_EXCHANGING; + ompi_ret = ompi_mtl_ofi_send_excid(mtl, comm, dest, ofi_cq_data, true); + } + if (mtl_comm->c_index_vec[dest].c_index_state > MCA_MTL_OFI_CID_EXCHANGED) { + while (mtl_comm->c_index_vec[dest].c_index_state > MCA_MTL_OFI_CID_EXCHANGED) { + ompi_ret = ompi_mtl_ofi_post_recv_excid_buffer(true, comm, dest); + } + } + c_index_for_tag = mtl_comm->c_index_vec[dest].c_index; + } + + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } set_thread_context(ctxt_id); ofi_req->event_callback = ompi_mtl_ofi_isend_callback; @@ -699,9 +1016,9 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl, } if (ofi_cq_data) { - match_bits = mtl_ofi_create_send_tag_CQD(comm->c_contextid, tag); + match_bits = mtl_ofi_create_send_tag_CQD(c_index_for_tag, tag); } else { - match_bits = mtl_ofi_create_send_tag(comm->c_contextid, + match_bits = mtl_ofi_create_send_tag(c_index_for_tag, comm->c_my_rank, tag); /* src_addr is ignored when FI_DIRECTED_RECV is not supported */ } @@ -767,7 +1084,11 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc, ompi_status_public_t *status = NULL; struct fi_msg_tagged tagged_msg; - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = ofi_req->comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } assert(ofi_req->super.ompi_req); status = &ofi_req->super.ompi_req->req_status; @@ -909,10 +1230,29 @@ ompi_mtl_ofi_irecv_generic(struct mca_mtl_base_module_t *mtl, void *start; size_t length; bool free_after; + mca_mtl_comm_t *mtl_comm; ompi_mtl_ofi_set_mr_null(ofi_req); - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + mtl_comm = comm->c_mtl_comm; + if ((src == MPI_ANY_SOURCE || mtl_comm->c_index_vec[src].c_index_state > MCA_MTL_OFI_CID_EXCHANGED) && + !ompi_mtl_ofi.has_posted_initial_buffer) { + ompi_mtl_ofi.has_posted_initial_buffer = true; + ompi_ret = ompi_mtl_ofi_post_recv_excid_buffer(false, comm, -1); + } + if (src >= 0 && mtl_comm->c_index_vec[src].c_index_state == MCA_MTL_OFI_CID_NOT_EXCHANGED) { + mtl_comm->c_index_vec[src].c_index_state = MCA_MTL_OFI_CID_EXCHANGING; + ompi_ret = ompi_mtl_ofi_send_excid(mtl, comm, src, ofi_cq_data, false); + } + } + + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } + set_thread_context(ctxt_id); if (ofi_cq_data) { @@ -922,10 +1262,10 @@ ompi_mtl_ofi_irecv_generic(struct mca_mtl_base_module_t *mtl, remote_addr = fi_rx_addr(endpoint->peer_fiaddr, ctxt_id, ompi_mtl_ofi.rx_ctx_bits); } - mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_contextid, + mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_index, tag); } else { - mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_contextid, src, + mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_index, src, tag); /* src_addr is ignored when FI_DIRECTED_RECV is not used */ } @@ -1048,7 +1388,12 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl, ompi_mtl_ofi_set_mr_null(ofi_req); - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } + set_thread_context(ctxt_id); ompi_ret = ompi_mtl_datatype_recv_buf(convertor, @@ -1158,8 +1503,26 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl, struct fi_msg_tagged msg; uint64_t msgflags = FI_PEEK | FI_COMPLETION; int ctxt_id = 0; + mca_mtl_comm_t *mtl_comm; + + if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + mtl_comm = comm->c_mtl_comm; + if ((src == MPI_ANY_SOURCE || mtl_comm->c_index_vec[src].c_index_state > MCA_MTL_OFI_CID_EXCHANGED) && + !ompi_mtl_ofi.has_posted_initial_buffer) { + ompi_mtl_ofi.has_posted_initial_buffer = true; + ret = ompi_mtl_ofi_post_recv_excid_buffer(false, comm, -1); + } + if (src >= 0 && mtl_comm->c_index_vec[src].c_index_state == MCA_MTL_OFI_CID_NOT_EXCHANGED) { + mtl_comm->c_index_vec[src].c_index_state = MCA_MTL_OFI_CID_EXCHANGING; + ret = ompi_mtl_ofi_send_excid(mtl, comm, src, ofi_cq_data, false); + } + } - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } set_thread_context(ctxt_id); if (ofi_cq_data) { @@ -1170,11 +1533,11 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl, remote_proc = fi_rx_addr(endpoint->peer_fiaddr, ctxt_id, ompi_mtl_ofi.rx_ctx_bits); } - mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_contextid, + mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_index, tag); } else { - mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_contextid, src, + mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_index, src, tag); /* src_addr is ignored when FI_DIRECTED_RECV is not used */ } @@ -1214,7 +1577,7 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl, *flag = ofi_req.match_state; if (1 == *flag) { if (MPI_STATUS_IGNORE != status) { - *status = ofi_req.status; + OMPI_COPY_STATUS(status, ofi_req.status, false); } } @@ -1240,8 +1603,26 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl, struct fi_msg_tagged msg; uint64_t msgflags = FI_PEEK | FI_CLAIM | FI_COMPLETION; int ctxt_id = 0; + mca_mtl_comm_t *mtl_comm; + + if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + mtl_comm = comm->c_mtl_comm; + if ((src == MPI_ANY_SOURCE || mtl_comm->c_index_vec[src].c_index_state > MCA_MTL_OFI_CID_EXCHANGED) + && !ompi_mtl_ofi.has_posted_initial_buffer) { + ompi_mtl_ofi.has_posted_initial_buffer = true; + ret = ompi_mtl_ofi_post_recv_excid_buffer(false, comm, -1); + } + if (src >= 0 && mtl_comm->c_index_vec[src].c_index_state == MCA_MTL_OFI_CID_NOT_EXCHANGED) { + mtl_comm->c_index_vec[src].c_index_state = MCA_MTL_OFI_CID_EXCHANGING; + ret = ompi_mtl_ofi_send_excid(mtl, comm, src, ofi_cq_data, false); + } + } - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } set_thread_context(ctxt_id); ofi_req = malloc(sizeof *ofi_req); @@ -1260,12 +1641,12 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl, remote_proc = fi_rx_addr(endpoint->peer_fiaddr, ctxt_id, ompi_mtl_ofi.rx_ctx_bits); } - mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_contextid, + mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_index, tag); } else { /* src_addr is ignored when FI_DIRECTED_RECV is not used */ - mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_contextid, src, + mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_index, src, tag); } @@ -1306,7 +1687,7 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl, *matched = ofi_req->match_state; if (1 == *matched) { if (MPI_STATUS_IGNORE != status) { - *status = ofi_req->status; + OMPI_COPY_STATUS(status, ofi_req->status, false); } (*message) = ompi_message_alloc(); @@ -1335,7 +1716,7 @@ ompi_mtl_ofi_cancel(struct mca_mtl_base_module_t *mtl, int ret, ctxt_id = 0; ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t*) mtl_request; - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid); + ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_index); switch (ofi_req->type) { case OMPI_MTL_OFI_SEND: @@ -1375,228 +1756,6 @@ ompi_mtl_ofi_cancel(struct mca_mtl_base_module_t *mtl, return OMPI_SUCCESS; } -static int ompi_mtl_ofi_init_contexts(struct mca_mtl_base_module_t *mtl, - struct ompi_communicator_t *comm, - mca_mtl_ofi_ep_type ep_type) -{ - int ret; - int ctxt_id = ompi_mtl_ofi.total_ctxts_used; - struct fi_cq_attr cq_attr = {0}; - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = ompi_mtl_ofi.ofi_progress_event_count; - - if (OFI_REGULAR_EP == ep_type) { - /* - * For regular endpoints, just create the Lock object and register - * progress function. - */ - goto init_regular_ep; - } - - /* - * We only create upto Max number of contexts asked for by the user. - * If user enables thread grouping feature and creates more number of - * communicators than available contexts, then we set the threshold - * context_id so that new communicators created beyond the threshold - * will be assigned to contexts in a round-robin fashion. - */ - if (ompi_mtl_ofi.num_ofi_contexts <= ompi_mtl_ofi.total_ctxts_used) { - ompi_mtl_ofi.comm_to_context[comm->c_contextid] = comm->c_contextid % - ompi_mtl_ofi.total_ctxts_used; - if (!ompi_mtl_ofi.threshold_comm_context_id) { - ompi_mtl_ofi.threshold_comm_context_id = comm->c_contextid; - - opal_show_help("help-mtl-ofi.txt", "SEP thread grouping ctxt limit", true, ctxt_id, - ompi_process_info.nodename, __FILE__, __LINE__); - } - - return OMPI_SUCCESS; - } - - /* Init context info for Scalable EPs */ - ret = fi_tx_context(ompi_mtl_ofi.sep, ctxt_id, NULL, &ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep, NULL); - if (ret) { - MTL_OFI_LOG_FI_ERR(ret, "fi_tx_context failed"); - goto init_error; - } - - ret = fi_rx_context(ompi_mtl_ofi.sep, ctxt_id, NULL, &ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, NULL); - if (ret) { - MTL_OFI_LOG_FI_ERR(ret, "fi_rx_context failed"); - goto init_error; - } - - ret = fi_cq_open(ompi_mtl_ofi.domain, &cq_attr, &ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, NULL); - if (ret) { - MTL_OFI_LOG_FI_ERR(ret, "fi_cq_open failed"); - goto init_error; - } - - /* Bind CQ to TX/RX context object */ - ret = fi_ep_bind(ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep, (fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, - FI_TRANSMIT | FI_SELECTIVE_COMPLETION); - if (0 != ret) { - MTL_OFI_LOG_FI_ERR(ret, "fi_bind CQ-EP (FI_TRANSMIT) failed"); - goto init_error; - } - - ret = fi_ep_bind(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, (fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, - FI_RECV | FI_SELECTIVE_COMPLETION); - if (0 != ret) { - MTL_OFI_LOG_FI_ERR(ret, "fi_bind CQ-EP (FI_RECV) failed"); - goto init_error; - } - - /* Enable Endpoint for communication. This commits the bind operations */ - ret = fi_enable(ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep); - if (0 != ret) { - MTL_OFI_LOG_FI_ERR(ret, "fi_enable (send context) failed"); - goto init_error; - } - - ret = fi_enable(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep); - if (0 != ret) { - MTL_OFI_LOG_FI_ERR(ret, "fi_enable (recv context) failed"); - goto init_error; - } - -init_regular_ep: - /* Initialize per-context lock */ - OBJ_CONSTRUCT(&ompi_mtl_ofi.ofi_ctxt[ctxt_id].context_lock, opal_mutex_t); - - if (MPI_COMM_WORLD == comm) { - ret = opal_progress_register(ompi_mtl_ofi_progress_no_inline); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(1, opal_common_ofi.output, - "%s:%d: opal_progress_register failed: %d\n", - __FILE__, __LINE__, ret); - goto init_error; - } - } - - ompi_mtl_ofi.comm_to_context[comm->c_contextid] = ompi_mtl_ofi.total_ctxts_used; - ompi_mtl_ofi.total_ctxts_used++; - - return OMPI_SUCCESS; - -init_error: - if (ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep) { - (void) fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep); - } - - if (ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep) { - (void) fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep); - } - - if (ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq) { - (void) fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq); - } - - return ret; -} - -static int ompi_mtl_ofi_finalize_contexts(struct mca_mtl_base_module_t *mtl, - struct ompi_communicator_t *comm, - mca_mtl_ofi_ep_type ep_type) -{ - int ret = OMPI_SUCCESS, ctxt_id = 0; - - if (OFI_REGULAR_EP == ep_type) { - /* For regular EPs, simply destruct Lock object and exit */ - goto finalize_regular_ep; - } - - if (ompi_mtl_ofi.thread_grouping && - ompi_mtl_ofi.threshold_comm_context_id && - ((uint32_t) ompi_mtl_ofi.threshold_comm_context_id <= comm->c_contextid)) { - return OMPI_SUCCESS; - } - - ctxt_id = ompi_mtl_ofi.thread_grouping ? - ompi_mtl_ofi.comm_to_context[comm->c_contextid] : 0; - - /* - * For regular EPs, TX/RX contexts are aliased to SEP object which is - * closed in ompi_mtl_ofi_finalize(). So, skip handling those here. - */ - if ((ret = fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].tx_ep))) { - goto finalize_err; - } - - if ((ret = fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep))) { - goto finalize_err; - } - - if ((ret = fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq))) { - goto finalize_err; - } - -finalize_regular_ep: - /* Destroy context lock */ - OBJ_DESTRUCT(&ompi_mtl_ofi.ofi_ctxt[ctxt_id].context_lock); - - return OMPI_SUCCESS; - -finalize_err: - opal_show_help("help-mtl-ofi.txt", "OFI call fail", true, - "fi_close", - ompi_process_info.nodename, __FILE__, __LINE__, - fi_strerror(-ret), ret); - - return OMPI_ERROR; -} - -__opal_attribute_always_inline__ static inline int -ompi_mtl_ofi_add_comm(struct mca_mtl_base_module_t *mtl, - struct ompi_communicator_t *comm) -{ - int ret; - mca_mtl_ofi_ep_type ep_type = (0 == ompi_mtl_ofi.enable_sep) ? - OFI_REGULAR_EP : OFI_SCALABLE_EP; - - /* - * If thread grouping enabled, add new OFI context for each communicator - * other than MPI_COMM_SELF. - */ - if ((ompi_mtl_ofi.thread_grouping && (MPI_COMM_SELF != comm)) || - /* If no thread grouping, add new OFI context only - * for MPI_COMM_WORLD. - */ - (!ompi_mtl_ofi.thread_grouping && (MPI_COMM_WORLD == comm))) { - - ret = ompi_mtl_ofi_init_contexts(mtl, comm, ep_type); - - if (OMPI_SUCCESS != ret) { - goto error; - } - } - - return OMPI_SUCCESS; - -error: - return OMPI_ERROR; -} - -__opal_attribute_always_inline__ static inline int -ompi_mtl_ofi_del_comm(struct mca_mtl_base_module_t *mtl, - struct ompi_communicator_t *comm) -{ - int ret = OMPI_SUCCESS; - mca_mtl_ofi_ep_type ep_type = (0 == ompi_mtl_ofi.enable_sep) ? - OFI_REGULAR_EP : OFI_SCALABLE_EP; - - /* - * Clean up OFI contexts information. - */ - if ((ompi_mtl_ofi.thread_grouping && (MPI_COMM_SELF != comm)) || - (!ompi_mtl_ofi.thread_grouping && (MPI_COMM_WORLD == comm))) { - - ret = ompi_mtl_ofi_finalize_contexts(mtl, comm, ep_type); - } - - return ret; -} - #ifdef MCA_ompi_mtl_DIRECT_CALL __opal_attribute_always_inline__ static inline int diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index a7e1c46a6b5..e918507c8d2 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -2,11 +2,11 @@ /* * Copyright (c) 2013-2018 Intel, Inc. All rights reserved * - * Copyright (c) 2014-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2014-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2020-2021 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -40,8 +40,8 @@ static int av_type; static int ofi_tag_mode; #if OPAL_HAVE_THREAD_LOCAL - opal_thread_local int per_thread_ctx; - opal_thread_local struct fi_cq_tagged_entry wc[MTL_OFI_MAX_PROG_EVENT_COUNT]; + opal_thread_local int ompi_mtl_ofi_per_thread_ctx; + opal_thread_local struct fi_cq_tagged_entry ompi_mtl_ofi_wc[MTL_OFI_MAX_PROG_EVENT_COUNT]; #endif /* @@ -254,9 +254,7 @@ ompi_mtl_ofi_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_ofi.num_ofi_contexts); - opal_common_ofi_register_mca_variables(&mca_mtl_ofi_component.super.mtl_version); - - return OMPI_SUCCESS; + return opal_common_ofi_mca_register(&mca_mtl_ofi_component.super.mtl_version); } @@ -285,8 +283,7 @@ ompi_mtl_ofi_component_open(void) "provider_exclude")) { return OMPI_ERR_NOT_AVAILABLE; } - - return OMPI_SUCCESS; + return opal_common_ofi_open(); } static int @@ -303,8 +300,7 @@ ompi_mtl_ofi_component_close(void) #if OPAL_CUDA_SUPPORT mca_common_cuda_fini(); #endif - opal_common_ofi_mca_deregister(); - return OMPI_SUCCESS; + return opal_common_ofi_close(); } int @@ -340,7 +336,7 @@ select_ofi_provider(struct fi_info *providers, } opal_output_verbose(1, opal_common_ofi.output, - "%s:%d: mtl:ofi:prov: %s\n", + "%s:%d: mtl:ofi:provider: %s\n", __FILE__, __LINE__, (prov ? prov->fabric_attr->prov_name : "none")); @@ -364,8 +360,8 @@ select_ofi_provider(struct fi_info *providers, */ if (NULL != prov) { prov = opal_mca_common_ofi_select_provider(prov, &ompi_process_info); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: mtl:ofi:provider: %s\n", + opal_output_verbose(1, opal_common_ofi.output, + "%s:%d: mtl:ofi:provider:domain: %s\n", __FILE__, __LINE__, (prov ? prov->domain_attr->name : "none")); } @@ -374,8 +370,8 @@ select_ofi_provider(struct fi_info *providers, } static void -ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode, int *bits_for_cid) { - switch (ofi_tag_mode) { +ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode_arg, int *bits_for_cid) { + switch (ofi_tag_mode_arg) { case MTL_OFI_TAG_1: *bits_for_cid = (int) MTL_OFI_CID_BIT_COUNT_1; ompi_mtl_ofi.base.mtl_max_tag = (int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_1 - 1)) - 1); @@ -518,6 +514,37 @@ static int ompi_mtl_ofi_init_regular_ep(struct fi_info * prov, int universe_size return ret; } +#if OPAL_CUDA_SUPPORT && HAVE_DECL_FI_OPT_FI_HMEM_P2P + /* + * Set the FI_HMEM peer to peer option to ENABLED. This notifies Libfabric + * that the provider can decide whether to use device peer to peer support + * for network transfers, and allows copies if p2p is not supported. + * + * Note that this option may not be supported by the provider, so continue + * if FI_HMEM is supported by the provider but it does not support this + * setopt option. This setopt parameter was introduced in Libfabric 1.14. + * + * The version check is needed as one of the Libfabric setopt handlers + * incorrectly assumed all option values are size_t, which was also fixed + * in 1.14. + */ + int setopt_val = FI_HMEM_P2P_ENABLED; + + if (FI_VERSION_GE(fi_version(), FI_VERSION(1, 14))) { + ret = fi_setopt(&ompi_mtl_ofi.sep->fid, + FI_OPT_ENDPOINT, FI_OPT_FI_HMEM_P2P, + &setopt_val, sizeof(setopt_val)); + + if (!(0 == ret || -FI_ENOPROTOOPT == ret)) { + opal_show_help("help-mtl-ofi.txt", "OFI call fail", true, + "fi_setopt", + ompi_process_info.nodename, __FILE__, __LINE__, + fi_strerror(-ret), -ret); + return ret; + } + } +#endif /* OPAL_CUDA_SUPPORT && FI_OPT_FI_HMEM_P2P */ + /** * Create the objects that will be bound to the endpoint. * The objects include: @@ -582,8 +609,6 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, int universe_size; char *univ_size_str; - opal_common_ofi_mca_register(); - opal_output_verbose(1, opal_common_ofi.output, "%s:%d: mtl:ofi:provider_include = \"%s\"\n", __FILE__, __LINE__, *opal_common_ofi.prov_include); @@ -639,7 +664,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, interface and local communication and remote communication. */ hints->mode = FI_CONTEXT | FI_CONTEXT2; hints->ep_attr->type = FI_EP_RDM; - hints->caps |= FI_TAGGED | FI_LOCAL_COMM | FI_REMOTE_COMM | FI_DIRECTED_RECV; + hints->caps |= FI_MSG | FI_TAGGED | FI_LOCAL_COMM | FI_REMOTE_COMM | FI_DIRECTED_RECV; hints->tx_attr->msg_order = FI_ORDER_SAS; hints->rx_attr->msg_order = FI_ORDER_SAS; hints->rx_attr->op_flags = FI_COMPLETION; @@ -893,6 +918,20 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, } } + /* this must be called during single threaded part of the code and + * before Libfabric configures its memory monitors. Easiest to do + * that before domain open. Silently ignore not-supported errors, + * as they are not critical to program correctness, but only + * indicate that LIbfabric will have to pick a different, possibly + * less optimial, monitor. */ + ret = opal_common_ofi_export_memory_monitor(); + if (0 != ret && -FI_ENOSYS != ret) { + opal_output_verbose(1, opal_common_ofi.output, + "Failed to inject Libfabric memory monitor: %s", + fi_strerror(-ret)); + } + + /** * Open fabric * The getinfo struct returns a fabric attribute struct that can be used to @@ -1060,6 +1099,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, * Set the ANY_SRC address. */ ompi_mtl_ofi.any_addr = FI_ADDR_UNSPEC; + ompi_mtl_ofi.is_initialized = false; + ompi_mtl_ofi.has_posted_initial_buffer = false; + + ompi_mtl_ofi.base.mtl_flags |= MCA_MTL_BASE_FLAG_SUPPORTS_EXT_CID; #if OPAL_CUDA_SUPPORT mca_common_cuda_stage_one_init(); diff --git a/ompi/mca/mtl/ofi/mtl_ofi_types.h b/ompi/mca/mtl/ofi/mtl_ofi_types.h index a2c2f3d4308..4d04e8ef6e5 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_types.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_types.h @@ -95,6 +95,9 @@ typedef struct mca_mtl_ofi_module_t { /** Optimized function Symbol Tables **/ struct ompi_mtl_ofi_symtable sym_table; + bool is_initialized; + bool has_posted_initial_buffer; + } mca_mtl_ofi_module_t; extern mca_mtl_ofi_module_t ompi_mtl_ofi; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 132358b5638..2c66afd734e 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -229,36 +229,6 @@ ompi_mtl_portals4_component_open(void) sizeof(ompi_mtl_portals4_request_t) - sizeof(struct mca_mtl_request_t); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Flow control: " -#if OMPI_MTL_PORTALS4_FLOW_CONTROL - "yes" -#else - "no" -#endif - ); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Max message size: %lu", (unsigned long) - ompi_mtl_portals4.max_msg_size_mtl); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Short limit: %d", (int) - ompi_mtl_portals4.short_limit); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Eager limit: %d", (int) - ompi_mtl_portals4.eager_limit); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Short receive blocks: %d", - ompi_mtl_portals4.recv_short_num); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Send queue size: %d", ompi_mtl_portals4.send_queue_size); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Recv queue size: %d", ompi_mtl_portals4.recv_queue_size); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Long protocol: %s", - (ompi_mtl_portals4.protocol == eager) ? "Eager" : - (ompi_mtl_portals4.protocol == rndv) ? "Rendezvous" : - "Other"); - OBJ_CONSTRUCT(&ompi_mtl_portals4.fl_message, opal_free_list_t); opal_free_list_init(&ompi_mtl_portals4.fl_message, sizeof(ompi_mtl_portals4_message_t) + @@ -291,6 +261,31 @@ ompi_mtl_portals4_component_open(void) ompi_mtl_portals4.use_flowctl=0; #endif + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Flow control: %s", + ompi_mtl_portals4.use_flowctl ? "yes" : "no"); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Max message size: %lu", (unsigned long) + ompi_mtl_portals4.max_msg_size_mtl); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Short limit: %d", (int) + ompi_mtl_portals4.short_limit); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Eager limit: %d", (int) + ompi_mtl_portals4.eager_limit); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Short receive blocks: %d", + ompi_mtl_portals4.recv_short_num); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Send queue size: %d", ompi_mtl_portals4.send_queue_size); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Recv queue size: %d", ompi_mtl_portals4.recv_queue_size); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Long protocol: %s", + (ompi_mtl_portals4.protocol == eager) ? "Eager" : + (ompi_mtl_portals4.protocol == rndv) ? "Rendezvous" : + "Other"); + return OMPI_SUCCESS; } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h index cd62743faea..c9271723a6a 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h @@ -39,7 +39,7 @@ OBJ_CLASS_DECLARATION(ompi_mtl_portals4_pending_request_t); struct ompi_mtl_portals4_flowctl_t { - int32_t flowctl_active; + opal_atomic_int32_t flowctl_active; opal_atomic_int32_t send_slots; int32_t max_send_slots; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_probe.c b/ompi/mca/mtl/portals4/mtl_portals4_probe.c index 5f2a991cfe7..e40ea029df6 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_probe.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_probe.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -120,7 +120,9 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl, *flag = request.found_match; if (1 == *flag) { - *status = request.status; + if (MPI_STATUS_IGNORE != status) { + OMPI_COPY_STATUS(status, request.status, false); + } } return OMPI_SUCCESS; @@ -198,7 +200,9 @@ ompi_mtl_portals4_improbe(struct mca_mtl_base_module_t *mtl, *matched = request.found_match; if (1 == *matched) { - *status = request.status; + if (MPI_STATUS_IGNORE != status) { + OMPI_COPY_STATUS(status, request.status, false); + } (*message) = ompi_message_alloc(); if (NULL == (*message)) { diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index f2737428e26..e19971a8d91 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -468,7 +468,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, ptl_request->super.type = portals4_req_recv; ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; #if OPAL_ENABLE_DEBUG - ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); + ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((opal_atomic_int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->hdr_data = 0; #endif ptl_request->buffer_ptr = (free_after) ? start : NULL; @@ -549,7 +549,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl, } #if OPAL_ENABLE_DEBUG - ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); + ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((opal_atomic_int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->hdr_data = 0; #endif ptl_request->super.type = portals4_req_recv; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_request.h b/ompi/mca/mtl/portals4/mtl_portals4_request.h index 72b44a41a51..5644bd9192a 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_request.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_request.h @@ -62,7 +62,7 @@ struct ompi_mtl_portals4_isend_request_t { #endif ptl_size_t length; opal_atomic_int32_t pending_get; - opal_atomic_uint32_t event_count; + opal_atomic_int32_t event_count; }; typedef struct ompi_mtl_portals4_isend_request_t ompi_mtl_portals4_isend_request_t; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index 27291eed559..b3c6a0fe04a 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -40,7 +40,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, ompi_mtl_portals4_base_request_t* ptl_base_request, bool *complete) { - int retval = OMPI_SUCCESS, ret, val, add = 1; + int retval = OMPI_SUCCESS, ret = 0, val = 0, add = 1; ompi_mtl_portals4_isend_request_t* ptl_request = (ompi_mtl_portals4_isend_request_t*) ptl_base_request; @@ -161,7 +161,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, ptl_request->me_h = PTL_INVALID_HANDLE; add++; } - val = OPAL_THREAD_ADD_FETCH32((int32_t*)&ptl_request->event_count, add); + val = OPAL_THREAD_ADD_FETCH32(&ptl_request->event_count, add); assert(val <= 3); if (val == 3) { @@ -492,7 +492,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); if (OMPI_SUCCESS != ret) return ret; - ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*)&ompi_mtl_portals4.opcount, 1); + ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((opal_atomic_int64_t*)&ompi_mtl_portals4.opcount, 1); ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->length = length; ptl_request->event_count = 0; diff --git a/ompi/mca/mtl/psm2/mtl_psm2_probe.c b/ompi/mca/mtl/psm2/mtl_psm2_probe.c index b81317507be..c5b7f7ab93a 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_probe.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_probe.c @@ -39,7 +39,7 @@ int ompi_mtl_psm2_iprobe(struct mca_mtl_base_module_t* mtl, psm2_mq_status2_t mqstat; psm2_error_t err; - PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + PSM2_MAKE_TAGSEL(src, tag, comm->c_index, mqtag, tagsel); err = psm2_mq_iprobe2(ompi_mtl_psm2.mq, PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqstat); @@ -88,7 +88,7 @@ ompi_mtl_psm2_improbe(struct mca_mtl_base_module_t *mtl, psm2_mq_req_t mqreq; psm2_error_t err; - PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + PSM2_MAKE_TAGSEL(src, tag, comm->c_index, mqtag, tagsel); err = psm2_mq_improbe2(ompi_mtl_psm2.mq, PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqreq, &mqstat); diff --git a/ompi/mca/mtl/psm2/mtl_psm2_recv.c b/ompi/mca/mtl/psm2/mtl_psm2_recv.c index ff5c54067ce..83fdfcfec81 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_recv.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_recv.c @@ -63,7 +63,7 @@ ompi_mtl_psm2_irecv(struct mca_mtl_base_module_t* mtl, mtl_psm2_request->convertor = convertor; mtl_psm2_request->type = OMPI_mtl_psm2_IRECV; - PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + PSM2_MAKE_TAGSEL(src, tag, comm->c_index, mqtag, tagsel); err = psm2_mq_irecv2(ompi_mtl_psm2.mq, PSM2_MQ_ANY_ADDR, diff --git a/ompi/mca/mtl/psm2/mtl_psm2_send.c b/ompi/mca/mtl/psm2/mtl_psm2_send.c index 6acb30cf6d2..59742ace546 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_send.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_send.c @@ -48,7 +48,7 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl, assert(mtl == &ompi_mtl_psm2.super); - PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); + PSM2_MAKE_MQTAG(comm->c_index, comm->c_my_rank, tag, mqtag); ret = ompi_mtl_datatype_pack(convertor, &mtl_psm2_request.buf, @@ -106,7 +106,7 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl, assert(mtl == &ompi_mtl_psm2.super); - PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); + PSM2_MAKE_MQTAG(comm->c_index, comm->c_my_rank, tag, mqtag); ret = ompi_mtl_datatype_pack(convertor, diff --git a/ompi/mca/op/avx/configure.m4 b/ompi/mca/op/avx/configure.m4 index 72490f5cc7d..44e834301bb 100644 --- a/ompi/mca/op/avx/configure.m4 +++ b/ompi/mca/op/avx/configure.m4 @@ -51,7 +51,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__AVX512F__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif __m512 vA, vB; _mm512_add_ps(vA, vB) @@ -68,7 +68,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__AVX512F__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif __m512 vA, vB; _mm512_add_ps(vA, vB) @@ -91,7 +91,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__AVX512F__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif int A[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; __m512i vA = _mm512_loadu_si512((__m512i*)&(A[1])) @@ -113,7 +113,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__AVX512F__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif __m512i vA, vB; _mm512_mullo_epi64(vA, vB) @@ -134,7 +134,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__AVX2__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif __m256i vA, vB, vC; vC = _mm256_and_si256(vA, vB) @@ -150,7 +150,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__AVX2__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif __m256i vA, vB, vC; vC = _mm256_and_si256(vA, vB) @@ -173,7 +173,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__AVX2__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif int A[8] = {0, 1, 2, 3, 4, 5, 6, 7}; __m256i vA = _mm256_loadu_si256((__m256i*)&A) @@ -199,7 +199,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__AVX__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif __m256 vA, vB, vC; vC = _mm256_add_ps(vA, vB) @@ -217,7 +217,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__SSE4_1__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif __m128i vA, vB; (void)_mm_max_epi8(vA, vB) @@ -236,7 +236,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__SSE3__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif int A[4] = {0, 1, 2, 3}; __m128i vA = _mm_lddqu_si128((__m128i*)&A) @@ -256,7 +256,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [[ __m256 vA, vB, vC; #if defined(__ICC) && !defined(__AVX__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif vC = _mm256_add_ps(vA, vB) ]])], @@ -273,7 +273,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__SSE4_1__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif __m128i vA, vB; (void)_mm_max_epi8(vA, vB) @@ -287,7 +287,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ [AC_LANG_PROGRAM([[#include ]], [[ #if defined(__ICC) && !defined(__SSE3__) -#error "icc needs the -m flags to provide the AVX* detection macros +#error "icc needs the -m flags to provide the AVX* detection macros" #endif int A[4] = {0, 1, 2, 3}; __m128i vA = _mm_lddqu_si128((__m128i*)&A) diff --git a/ompi/mca/op/avx/op_avx_component.c b/ompi/mca/op/avx/op_avx_component.c index f116e3b8192..a2f01a373e2 100644 --- a/ompi/mca/op/avx/op_avx_component.c +++ b/ompi/mca/op/avx/op_avx_component.c @@ -21,7 +21,6 @@ #include "ompi_config.h" #include "opal/util/printf.h" -#include "ompi/include/mpi_portable_platform.h" #include "ompi/constants.h" #include "ompi/op/op.h" diff --git a/ompi/mca/op/base/op_base_functions.c b/ompi/mca/op/base/op_base_functions.c index f4a095fc960..f245da7205f 100644 --- a/ompi/mca/op/base/op_base_functions.c +++ b/ompi/mca/op/base/op_base_functions.c @@ -45,7 +45,7 @@ int i; \ type *a = (type *) in; \ type *b = (type *) out; \ - for (i = 0; i < *count; ++i) { \ + for (i = *count; i > 0; i--) { \ *(b++) op *(a++); \ } \ } @@ -65,7 +65,7 @@ int i; \ type *a = (type *) in; \ type *b = (type *) out; \ - for (i = 0; i < *count; ++i) { \ + for (i = *count; i > 0; i--) { \ *(b) = current_func(*(b), *(a)); \ ++b; \ ++a; \ @@ -93,7 +93,7 @@ int i; \ ompi_op_predefined_##type_name##_t *a = (ompi_op_predefined_##type_name##_t*) in; \ ompi_op_predefined_##type_name##_t *b = (ompi_op_predefined_##type_name##_t*) out; \ - for (i = 0; i < *count; ++i, ++a, ++b) { \ + for (i = *count; i > 0; i--, ++a, ++b) { \ if (a->v op b->v) { \ b->v = a->v; \ b->k = a->k; \ @@ -117,7 +117,7 @@ int i; \ type (*a)[2] = (type (*)[2]) in; \ type (*b)[2] = (type (*)[2]) out; \ - for (i = 0; i < *count; ++i, ++a, ++b) { \ + for (i = *count; i > 0; i--, ++a, ++b) { \ (*b)[0] += (*a)[0]; \ (*b)[1] += (*a)[1]; \ } \ @@ -138,7 +138,7 @@ type (*a)[2] = (type (*)[2]) in; \ type (*b)[2] = (type (*)[2]) out; \ type c[2]; \ - for (i = 0; i < *count; ++i, ++a, ++b) { \ + for (i = *count; i > 0; i--, ++a, ++b) { \ c[0] = (*a)[0] * (*b)[0] - (*a)[1] * (*b)[1]; \ c[1] = (*a)[0] * (*b)[1] + (*a)[1] * (*b)[0]; \ (*b)[0] = c[0]; \ @@ -693,7 +693,7 @@ LOC_FUNC(minloc, long_double_int, <) type *a1 = (type *) in1; \ type *a2 = (type *) in2; \ type *b = (type *) out; \ - for (i = 0; i < *count; ++i) { \ + for (i = *count; i > 0; i--) { \ *(b++) = *(a1++) op *(a2++); \ } \ } @@ -715,7 +715,7 @@ LOC_FUNC(minloc, long_double_int, <) type *a1 = (type *) in1; \ type *a2 = (type *) in2; \ type *b = (type *) out; \ - for (i = 0; i < *count; ++i) { \ + for (i = *count; i > 0; i--) { \ *(b) = current_func(*(a1), *(a2)); \ ++b; \ ++a1; \ @@ -748,7 +748,7 @@ LOC_FUNC(minloc, long_double_int, <) ompi_op_predefined_##type_name##_t *a1 = (ompi_op_predefined_##type_name##_t*) in1; \ ompi_op_predefined_##type_name##_t *a2 = (ompi_op_predefined_##type_name##_t*) in2; \ ompi_op_predefined_##type_name##_t *b = (ompi_op_predefined_##type_name##_t*) out; \ - for (i = 0; i < *count; ++i, ++a1, ++a2, ++b ) { \ + for (i = *count; i > 0; i--, ++a1, ++a2, ++b ) { \ if (a1->v op a2->v) { \ b->v = a1->v; \ b->k = a1->k; \ @@ -778,7 +778,7 @@ LOC_FUNC(minloc, long_double_int, <) type (*a1)[2] = (type (*)[2]) in1; \ type (*a2)[2] = (type (*)[2]) in2; \ type (*b)[2] = (type (*)[2]) out; \ - for (i = 0; i < *count; ++i, ++a1, ++a2, ++b) { \ + for (i = *count; i > 0; i--, ++a1, ++a2, ++b) { \ (*b)[0] = (*a1)[0] + (*a2)[0]; \ (*b)[1] = (*a1)[1] + (*a2)[1]; \ } \ @@ -800,7 +800,7 @@ LOC_FUNC(minloc, long_double_int, <) type (*a1)[2] = (type (*)[2]) in1; \ type (*a2)[2] = (type (*)[2]) in2; \ type (*b)[2] = (type (*)[2]) out; \ - for (i = 0; i < *count; ++i, ++a1, ++a2, ++b) { \ + for (i = *count; i > 0; i--, ++a1, ++a2, ++b) { \ (*b)[0] = (*a1)[0] * (*a2)[0] - (*a1)[1] * (*a2)[1]; \ (*b)[1] = (*a1)[0] * (*a2)[1] + (*a1)[1] * (*a2)[0]; \ } \ diff --git a/ompi/mca/osc/base/base.h b/ompi/mca/osc/base/base.h index f52f64e6ea7..48986b2d776 100644 --- a/ompi/mca/osc/base/base.h +++ b/ompi/mca/osc/base/base.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University. * All rights reserved. @@ -8,6 +9,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2016-2021 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,17 +41,17 @@ BEGIN_C_DECLS int ompi_osc_base_find_available(bool enable_progress_threads, bool enable_mpi_threads); +void ompi_osc_base_set_memory_alignment(struct opal_info_t *info, + size_t *memory_alignment); + int ompi_osc_base_select(ompi_win_t *win, void **base, size_t size, int disp_unit, ompi_communicator_t *comm, - opal_info_t *info, int flavor, int *model); -int ompi_osc_base_finalize(void); - OMPI_DECLSPEC extern mca_base_framework_t ompi_osc_base_framework; diff --git a/ompi/mca/osc/base/osc_base_frame.c b/ompi/mca/osc/base/osc_base_frame.c index 24a6a9dc126..8b9d56414fb 100644 --- a/ompi/mca/osc/base/osc_base_frame.c +++ b/ompi/mca/osc/base/osc_base_frame.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University. * All rights reserved. @@ -9,6 +10,8 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,7 +25,7 @@ #include "ompi/mca/mca.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" - +#include "opal/include/opal/align.h" #include "ompi/mca/osc/osc.h" #include "ompi/mca/osc/base/base.h" @@ -36,6 +39,39 @@ #include "ompi/mca/osc/base/static-components.h" +void +ompi_osc_base_set_memory_alignment(struct opal_info_t *info, + size_t *memory_alignment) +{ + int flag; + opal_cstring_t *align_info_str; + + opal_info_get(info, "mpi_minimum_memory_alignment", &align_info_str, &flag); + if (flag) { + long long tmp_align = atoll(align_info_str->string); + OBJ_RELEASE(align_info_str); + if ((long long) OPAL_ALIGN_MIN < tmp_align) { + *memory_alignment = tmp_align; + } + } +} + +static int ompi_osc_base_finalize(void) +{ + opal_list_item_t* item; + + /* Finalize all available modules */ + while (NULL != + (item = opal_list_remove_first(&ompi_osc_base_framework.framework_components))) { + ompi_osc_base_component_t *component = (ompi_osc_base_component_t*) + ((mca_base_component_list_item_t*) item)->cli_component; + component->osc_finalize(); + OBJ_RELEASE(item); + } + return OMPI_SUCCESS; +} + + int ompi_osc_base_find_available(bool enable_progress_threads, bool enable_mpi_threads) @@ -56,22 +92,9 @@ ompi_osc_base_find_available(bool enable_progress_threads, OBJ_RELEASE(cli); } } - return OMPI_SUCCESS; -} -int -ompi_osc_base_finalize(void) -{ - opal_list_item_t* item; + ompi_mpi_instance_append_finalize (ompi_osc_base_finalize); - /* Finalize all available modules */ - while (NULL != - (item = opal_list_remove_first(&ompi_osc_base_framework.framework_components))) { - ompi_osc_base_component_t *component = (ompi_osc_base_component_t*) - ((mca_base_component_list_item_t*) item)->cli_component; - component->osc_finalize(); - OBJ_RELEASE(item); - } return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/base/osc_base_init.c b/ompi/mca/osc/base/osc_base_init.c index 02229c51060..bd949b6d242 100644 --- a/ompi/mca/osc/base/osc_base_init.c +++ b/ompi/mca/osc/base/osc_base_init.c @@ -36,7 +36,6 @@ ompi_osc_base_select(ompi_win_t *win, size_t size, int disp_unit, ompi_communicator_t *comm, - opal_info_t *info, int flavor, int *model) { @@ -55,7 +54,8 @@ ompi_osc_base_select(ompi_win_t *win, ompi_osc_base_component_t *component = (ompi_osc_base_component_t*) ((mca_base_component_list_item_t*) item)->cli_component; - priority = component->osc_query(win, base, size, disp_unit, comm, info, flavor); + priority = component->osc_query(win, base, size, disp_unit, comm, + win->super.s_info, flavor); if (priority < 0) { if (MPI_WIN_FLAVOR_SHARED == flavor && OMPI_ERR_RMA_SHARED == priority) { /* NTH: quick fix to return OMPI_ERR_RMA_SHARED */ @@ -86,5 +86,6 @@ ompi_osc_base_select(ompi_win_t *win, "select: component %s selected", best_component->osc_version.mca_component_name ); - return best_component->osc_select(win, base, size, disp_unit, comm, info, flavor, model); + return best_component->osc_select(win, base, size, disp_unit, comm, + win->super.s_info, flavor, model); } diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index 4adaed38e3a..a2e04678813 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -175,7 +175,7 @@ number_of_fragments(ptl_size_t length, ptl_size_t maxlength) { ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1; OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, - "%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag)); + "%s,%d : %ld fragment(s)", __FILE__, __LINE__, nb_frag)); return nb_frag; } @@ -217,7 +217,7 @@ segmentedPut(opal_atomic_int64_t *opcount, opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlPut failed with return value %d", - __FUNCTION__, __LINE__, ret); + __FILE__, __LINE__, ret); return ret; } put_length -= frag_length; @@ -261,7 +261,7 @@ segmentedGet(opal_atomic_int64_t *opcount, opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlGet failed with return value %d", - __FUNCTION__, __LINE__, ret); + __FILE__, __LINE__, ret); return ret; } get_length -= frag_length; @@ -310,7 +310,7 @@ segmentedAtomic(opal_atomic_int64_t *opcount, opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlAtomic failed with return value %d", - __FUNCTION__, __LINE__, ret); + __FILE__, __LINE__, ret); return ret; } length -= frag_length; @@ -362,7 +362,7 @@ segmentedFetchAtomic(opal_atomic_int64_t *opcount, opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlFetchAtomic failed with return value %d", - __FUNCTION__, __LINE__, ret); + __FILE__, __LINE__, ret); return ret; } length -= frag_length; @@ -414,7 +414,7 @@ segmentedSwap(opal_atomic_int64_t *opcount, opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlSwap failed with return value %d", - __FUNCTION__, __LINE__, ret); + __FILE__, __LINE__, ret); return ret; } length -= frag_length; @@ -543,7 +543,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", - __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount)); + __FILE__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount)); ret = PtlGet(module->origin_iovec_md_h, (ptl_size_t) origin_lb, length, @@ -555,7 +555,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d PtlGet() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -628,7 +628,7 @@ atomic_get_to_iovec(ompi_osc_portals4_module_t *module, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", - __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount)); + __FILE__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount)); ret = segmentedGet(&module->opcount, module->origin_iovec_md_h, (ptl_size_t) origin_lb, @@ -712,7 +712,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", - __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount)); + __FILE__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount)); ret = PtlPut(module->origin_iovec_md_h, (ptl_size_t) origin_lb, length, @@ -726,7 +726,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d PtlPut() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -799,7 +799,7 @@ atomic_put_from_iovec(ompi_osc_portals4_module_t *module, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)", - __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount)); + __FILE__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount)); ret = segmentedPut(&module->opcount, module->origin_iovec_md_h, (ptl_size_t) origin_lb, @@ -1479,7 +1479,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module, (unsigned long) target_iovec[target_iov_index].iov_len)); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, - "%s,%d Atomic", __FUNCTION__, __LINE__)); + "%s,%d Atomic", __FILE__, __LINE__)); ret = PtlAtomic(md_h, (ptl_size_t)origin_iovec[origin_iov_index].iov_base, atomic_len, @@ -1835,7 +1835,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module, if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlSwap failed with return value %d", - __FUNCTION__, __LINE__, ret); + __FILE__, __LINE__, ret); opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1986,7 +1986,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module, if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlFetchAtomic failed with return value %d", - __FUNCTION__, __LINE__, ret); + __FILE__, __LINE__, ret); opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -2057,7 +2057,7 @@ ompi_osc_portals4_rput(const void *origin_addr, OMPI_OSC_PORTALS4_REQUEST_RETURN(request); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d put_to_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { @@ -2076,7 +2076,7 @@ ompi_osc_portals4_rput(const void *origin_addr, OMPI_OSC_PORTALS4_REQUEST_RETURN(request); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d put_from_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -2097,7 +2097,7 @@ ompi_osc_portals4_rput(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d RPut(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)", - __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount)); + __FILE__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount)); ret = segmentedPut(&module->opcount, module->req_md_h, (ptl_size_t) origin_addr + origin_lb, @@ -2168,7 +2168,7 @@ ompi_osc_portals4_rget(void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d get_from_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { @@ -2186,7 +2186,7 @@ ompi_osc_portals4_rget(void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d get_to_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -2204,7 +2204,7 @@ ompi_osc_portals4_rget(void *origin_addr, request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size); OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, - "%s,%d RGet", __FUNCTION__, __LINE__)); + "%s,%d RGet", __FILE__, __LINE__)); ret = segmentedGet(&module->opcount, module->req_md_h, (ptl_size_t) origin_addr + origin_lb, @@ -2279,7 +2279,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, OMPI_OSC_PORTALS4_REQUEST_RETURN(request); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_put_to_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -2300,7 +2300,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, OMPI_OSC_PORTALS4_REQUEST_RETURN(request); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_to_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } @@ -2321,7 +2321,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, OMPI_OSC_PORTALS4_REQUEST_RETURN(request); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_put_from_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -2341,7 +2341,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, OMPI_OSC_PORTALS4_REQUEST_RETURN(request); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_from_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } @@ -2368,7 +2368,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, if (MPI_REPLACE == op) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, - "%s,%d Put", __FUNCTION__, __LINE__)); + "%s,%d Put", __FILE__, __LINE__)); ret = segmentedPut(&module->opcount, module->req_md_h, md_offset + origin_lb, @@ -2406,7 +2406,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, - "%s,%d Atomic", __FUNCTION__, __LINE__)); + "%s,%d Atomic", __FILE__, __LINE__)); ret = PtlAtomic(module->req_md_h, md_offset + sent + origin_lb, msg_length, @@ -2497,7 +2497,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d swap_from_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } @@ -2519,7 +2519,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_get_from_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } @@ -2546,7 +2546,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d fetch_atomic_from_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } @@ -2573,7 +2573,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d swap_to_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } @@ -2594,7 +2594,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_get_to_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } @@ -2619,7 +2619,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d fetch_atomic_to_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } @@ -2701,7 +2701,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, request->ops_expected += number_of_fragments(length, module->fetch_atomic_max); OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, - "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__)); + "%s,%d MPI_Get_accumulate", __FILE__, __LINE__)); ret = segmentedGet(&module->opcount, module->req_md_h, (ptl_size_t) md_offset + result_lb, @@ -2827,7 +2827,7 @@ ompi_osc_portals4_put(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d put_to_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { @@ -2845,7 +2845,7 @@ ompi_osc_portals4_put(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d put_from_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -2862,7 +2862,7 @@ ompi_osc_portals4_put(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)", - __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount)); + __FILE__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount)); ret = segmentedPut(&module->opcount, module->md_h, (ptl_size_t) origin_addr + origin_lb, @@ -2926,7 +2926,7 @@ ompi_osc_portals4_get(void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d get_from_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { @@ -2944,7 +2944,7 @@ ompi_osc_portals4_get(void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d get_to_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -2960,7 +2960,7 @@ ompi_osc_portals4_get(void *origin_addr, length = size * origin_count; OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, - "%s,%d Get", __FUNCTION__, __LINE__)); + "%s,%d Get", __FILE__, __LINE__)); ret = segmentedGet(&module->opcount, module->md_h, (ptl_size_t) origin_addr + origin_lb, @@ -3027,7 +3027,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_put_to_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -3047,7 +3047,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_to_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } @@ -3067,7 +3067,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_put_from_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -3086,7 +3086,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_from_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } @@ -3109,7 +3109,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, if (MPI_REPLACE == op) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, - "%s,%d Put", __FUNCTION__, __LINE__)); + "%s,%d Put", __FILE__, __LINE__)); ret = segmentedPut(&module->opcount, module->md_h, md_offset + origin_lb, @@ -3144,7 +3144,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, - "%s,%d Atomic", __FUNCTION__, __LINE__)); + "%s,%d Atomic", __FILE__, __LINE__)); ret = PtlAtomic(module->md_h, md_offset + sent + origin_lb, msg_length, @@ -3228,7 +3228,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d swap_from_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else if (MPI_NO_OP == op) { @@ -3249,7 +3249,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_get_from_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -3275,7 +3275,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d fetch_atomic_from_noncontig() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } @@ -3301,7 +3301,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d swap_to_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else if (MPI_NO_OP == op) { @@ -3321,7 +3321,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d atomic_get_to_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } else { @@ -3345,7 +3345,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, if (PTL_OK != ret) { OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d fetch_atomic_to_iovec() failed: ret = %d", - __FUNCTION__, __LINE__, ret)); + __FILE__, __LINE__, ret)); return ret; } } @@ -3415,7 +3415,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, md_offset = (ptl_size_t) result_addr; OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, - "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__)); + "%s,%d MPI_Get_accumulate", __FILE__, __LINE__)); ret = segmentedGet(&module->opcount, module->md_h, (ptl_size_t) md_offset + result_lb, @@ -3536,7 +3536,7 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr, (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, - "%s,%d Swap", __FUNCTION__, __LINE__)); + "%s,%d Swap", __FILE__, __LINE__)); ret = PtlSwap(module->md_h, result_md_offset, module->md_h, @@ -3607,7 +3607,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, - "%s,%d Swap", __FUNCTION__, __LINE__)); + "%s,%d Swap", __FILE__, __LINE__)); ret = PtlSwap(module->md_h, result_md_offset, module->md_h, @@ -3629,7 +3629,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, - "%s,%d Get", __FUNCTION__, __LINE__)); + "%s,%d Get", __FILE__, __LINE__)); ret = PtlGet(module->md_h, md_offset, length, @@ -3653,7 +3653,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, origin_md_offset = (ptl_size_t) origin_addr; OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, - "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); + "%s,%d FetchAtomic", __FILE__, __LINE__)); ret = PtlFetchAtomic(module->md_h, result_md_offset, module->md_h, diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index eb1f52659cd..55f15e4aca4 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -9,7 +9,9 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. - * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2022 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2020 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +22,9 @@ #include "ompi_config.h" #include "opal/util/printf.h" +#include "opal/include/opal/align.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/opal_portable_platform.h" #include "ompi/mca/osc/base/base.h" #include "ompi/mca/osc/base/osc_base_obj_convert.h" @@ -205,7 +210,7 @@ progress_callback(void) } req = (ompi_osc_portals4_request_t*) ev.user_ptr; - opal_atomic_add_fetch_size_t(&req->super.req_status._ucount, ev.mlength); + req->super.req_status._ucount = opal_atomic_add_fetch_32(&req->bytes_committed, ev.mlength); ops = opal_atomic_add_fetch_32(&req->ops_committed, 1); if (ops == req->ops_expected) { ompi_request_complete(&req->super, true); @@ -384,13 +389,27 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit { ompi_osc_portals4_module_t *module = NULL; int ret = OMPI_ERROR; - int tmp; + int tmp, flag; ptl_md_t md; ptl_me_t me; char *name; + size_t memory_alignment = OPAL_ALIGN_MIN; if (MPI_WIN_FLAVOR_SHARED == flavor) return OMPI_ERR_NOT_SUPPORTED; + if (NULL != info) { + opal_cstring_t *align_info_str; + opal_info_get(info, "mpi_minimum_memory_alignment", + &align_info_str, &flag); + if (flag) { + size_t tmp_align = atoll(align_info_str->string); + OBJ_RELEASE(align_info_str); + if (OPAL_ALIGN_MIN < tmp_align) { + memory_alignment = tmp_align; + } + } + } + /* create module structure */ module = (ompi_osc_portals4_module_t*) calloc(1, sizeof(ompi_osc_portals4_module_t)); @@ -402,8 +421,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* fill in our part */ if (MPI_WIN_FLAVOR_ALLOCATE == flavor) { - module->free_after = *base = malloc(size); + *base = mca_mpool_base_default_module->mpool_alloc(mca_mpool_base_default_module, size, + memory_alignment, 0); if (NULL == *base) goto error; + module->free_after = *base; } else { module->free_after = NULL; } @@ -413,9 +434,9 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit opal_output_verbose(1, ompi_osc_base_framework.framework_output, "portals4 component creating window with id %d", - ompi_comm_get_cid(module->comm)); + ompi_comm_get_local_cid(module->comm)); - opal_asprintf(&name, "portals4 window %d", ompi_comm_get_cid(module->comm)); + opal_asprintf(&name, "portals4 window %d", ompi_comm_get_local_cid(module->comm)); ompi_win_set_name(win, name); free(name); @@ -569,7 +590,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->passive_target_access_epoch = false; -#if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32 +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) *model = MPI_WIN_UNIFIED; #else *model = MPI_WIN_SEPARATE; @@ -646,7 +667,8 @@ ompi_osc_portals4_free(struct ompi_win_t *win) PtlCTFree(module->ct_h); if (NULL != module->disp_units) free(module->disp_units); ompi_comm_free(&module->comm); - if (NULL != module->free_after) free(module->free_after); + mca_mpool_base_default_module->mpool_free(mca_mpool_base_default_module, + module->free_after); if (!opal_list_is_empty(&module->outstanding_locks)) { ret = OMPI_ERR_RMA_SYNC; diff --git a/ompi/mca/osc/portals4/osc_portals4_request.c b/ompi/mca/osc/portals4/osc_portals4_request.c index d270b0bd245..170a37e4819 100644 --- a/ompi/mca/osc/portals4/osc_portals4_request.c +++ b/ompi/mca/osc/portals4/osc_portals4_request.c @@ -29,7 +29,7 @@ request_free(struct ompi_request_t **ompi_req) ompi_osc_portals4_request_t *request = (ompi_osc_portals4_request_t*) *ompi_req; - if (true != request->super.req_complete) { + if (!REQUEST_COMPLETE(&request->super)) { return MPI_ERR_REQUEST; } diff --git a/ompi/mca/osc/portals4/osc_portals4_request.h b/ompi/mca/osc/portals4/osc_portals4_request.h index f00fed2fb57..0cade501217 100644 --- a/ompi/mca/osc/portals4/osc_portals4_request.h +++ b/ompi/mca/osc/portals4/osc_portals4_request.h @@ -21,6 +21,7 @@ struct ompi_osc_portals4_request_t { ompi_request_t super; int32_t ops_expected; opal_atomic_int32_t ops_committed; + opal_atomic_int32_t bytes_committed; }; typedef struct ompi_osc_portals4_request_t ompi_osc_portals4_request_t; @@ -33,11 +34,11 @@ OBJ_CLASS_DECLARATION(ompi_osc_portals4_request_t); req = (ompi_osc_portals4_request_t*) item; \ OMPI_REQUEST_INIT(&req->super, false); \ req->super.req_mpi_object.win = win; \ - req->super.req_complete = false; \ req->super.req_state = OMPI_REQUEST_ACTIVE; \ req->super.req_status.MPI_ERROR = MPI_SUCCESS; \ req->ops_expected = 0; \ req->ops_committed = 0; \ + req->bytes_committed = 0; \ } while (0) #define OMPI_OSC_PORTALS4_REQUEST_RETURN(req) \ diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index 6a58c19f86c..2a8aeae156d 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -4,7 +4,7 @@ * All rights reserved. * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,6 +16,8 @@ * Copyright (c) 2019 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2020-2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -108,14 +110,14 @@ struct ompi_osc_rdma_component_t { /** Priority of the osc/rdma component */ unsigned int priority; - /** Priority of the osc/rdma component when using non-RDMA BTLs */ - unsigned int alternate_priority; - /** directory where to place backing files */ char *backing_directory; /** maximum count for network AMO usage */ unsigned long network_amo_max_count; + + /** memory alignmen to be used for new windows */ + size_t memory_alignment; }; typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t; @@ -221,6 +223,9 @@ struct ompi_osc_rdma_module_t { /** offset in the shared memory segment where the state array starts */ size_t state_offset; + /** memory alignmen to be used for new windows */ + size_t memory_alignment; + /* ********************* sync data ************************ */ /** global sync object (PSCW, fence, lock all) */ diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 15f0a80714e..ab0b21e539a 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -9,6 +9,7 @@ * reserved. * Copyright (c) 2019-2021 Google, LLC. All rights reserved. * Copyright (c) 2021 IBM Corporation. All rights reserved. + * Copyright (c) 2022 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -208,9 +209,9 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi new_value = old_value; if (&ompi_mpi_op_replace.op == op) { - memcpy ((void *)((intptr_t) &new_value + offset), (void *)((intptr_t) origin_addr + dt->super.true_lb), extent); + memcpy ((void *)((ptrdiff_t) &new_value + offset), (void *)((ptrdiff_t) origin_addr + dt->super.true_lb), extent); } else if (&ompi_mpi_op_no_op.op != op) { - ompi_op_reduce (op, (void *) ((intptr_t) origin_addr + dt->super.true_lb), (void*)((intptr_t) &new_value + offset), 1, dt); + ompi_op_reduce (op, (void *) ((ptrdiff_t) origin_addr + dt->super.true_lb), (void*)((ptrdiff_t) &new_value + offset), 1, dt); } ret = ompi_osc_rdma_btl_cswap (module, peer->data_btl_index, peer->data_endpoint, address, target_handle, diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index c9b29636790..fdd3dd5c832 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -456,6 +456,11 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win) OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "complete: %s", win->w_name); OPAL_THREAD_LOCK(&module->lock); + if (0 == sync->num_peers) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_SUCCESS; + } + if (OMPI_OSC_RDMA_SYNC_TYPE_PSCW != sync->type) { OPAL_THREAD_UNLOCK(&module->lock); return OMPI_ERR_RMA_SYNC; diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 74398060081..3bf2a00d5f8 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -18,7 +18,8 @@ * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020-2021 Google, LLC. All rights reserved. @@ -48,12 +49,12 @@ #include "opal/util/arch.h" #include "opal/util/argv.h" #include "opal/util/printf.h" -#include "opal/align.h" #include "opal/util/sys_limits.h" #if OPAL_CUDA_SUPPORT #include "opal/mca/common/cuda/common_cuda.h" #endif /* OPAL_CUDA_SUPPORT */ #include "opal/util/info_subscriber.h" +#include "opal/mca/mpool/base/base.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" @@ -77,14 +78,12 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); -static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, ompi_osc_rdma_module_t *module); +static int ompi_osc_rdma_query_accelerated_btls (ompi_communicator_t *comm, ompi_osc_rdma_module_t *module); static int ompi_osc_rdma_query_alternate_btls (ompi_communicator_t *comm, ompi_osc_rdma_module_t *module); -static int ompi_osc_rdma_query_mtls (void); static const char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, const char *key, const char *value); -static char *ompi_osc_rdma_btl_names; -static char *ompi_osc_rdma_mtl_names; +static char *ompi_osc_rdma_full_connectivity_btls; static char *ompi_osc_rdma_btl_alternate_names; static const mca_base_var_enum_value_t ompi_osc_rdma_locking_modes[] = { @@ -239,14 +238,6 @@ static int ompi_osc_rdma_component_register (void) MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.priority); free(description_str); - mca_osc_rdma_component.alternate_priority = 37; - opal_asprintf(&description_str, "Priority of the osc/rdma component when using non-RDMA btls (default: %d)", - mca_osc_rdma_component.alternate_priority); - (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "alternate_priority", description_str, - MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.alternate_priority); - free(description_str); - (void) mca_base_var_enum_create ("osc_rdma_locking_mode", ompi_osc_rdma_locking_modes, &new_enum); mca_osc_rdma_component.locking_mode = OMPI_OSC_RDMA_LOCKING_TWO_LEVEL; @@ -256,14 +247,14 @@ static int ompi_osc_rdma_component_register (void) MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.locking_mode); OBJ_RELEASE(new_enum); - ompi_osc_rdma_btl_names = "ugni,uct"; + ompi_osc_rdma_full_connectivity_btls = "ugni,uct,ofi"; opal_asprintf(&description_str, "Comma-delimited list of BTL component names to allow without verifying " "connectivity. Do not add a BTL to to this list unless it can reach all " "processes in any communicator used with an MPI window (default: %s)", - ompi_osc_rdma_btl_names); + ompi_osc_rdma_full_connectivity_btls); (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "btls", description_str, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_btl_names); + MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_full_connectivity_btls); free(description_str); ompi_osc_rdma_btl_alternate_names = "sm,tcp"; @@ -274,14 +265,6 @@ static int ompi_osc_rdma_component_register (void) MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_btl_alternate_names); free(description_str); - ompi_osc_rdma_mtl_names = "psm2"; - opal_asprintf(&description_str, "Comma-delimited list of MTL component names to lower the priority of rdma " - "osc component (default: %s)", ompi_osc_rdma_mtl_names); - (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "mtls", description_str, - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_mtl_names); - free(description_str); - if (0 == access ("/dev/shm", W_OK)) { mca_osc_rdma_component.backing_directory = "/dev/shm"; } else { @@ -305,6 +288,16 @@ static int ompi_osc_rdma_component_register (void) MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_osc_rdma_component.network_amo_max_count); + mca_osc_rdma_component.memory_alignment = opal_getpagesize(); + opal_asprintf(&description_str, "The minimum memory alignment used to allocate local window memory (default: %zu). " + "This is a best effort approach. Alignments larger than the page size may not be supported.", + mca_osc_rdma_component.memory_alignment); + (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "minimum_memory_alignment", + description_str, + MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_rdma_component.memory_alignment); + free(description_str); + /* register performance variables */ (void) mca_base_component_pvar_register (&mca_osc_rdma_component.super.osc_version, "put_retry_count", @@ -390,7 +383,7 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s { if (MPI_WIN_FLAVOR_SHARED == flavor) { - return -1; + return OMPI_ERR_RMA_SHARED; } #if OPAL_CUDA_SUPPORT @@ -402,19 +395,15 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s } #endif /* OPAL_CUDA_SUPPORT */ - if (OMPI_SUCCESS == ompi_osc_rdma_query_mtls ()) { - return 5; - } - - if (OMPI_SUCCESS == ompi_osc_rdma_query_btls (comm, NULL)) { + if (OMPI_SUCCESS == ompi_osc_rdma_query_accelerated_btls (comm, NULL)) { return mca_osc_rdma_component.priority; } if (OMPI_SUCCESS == ompi_osc_rdma_query_alternate_btls (comm, NULL)) { - return mca_osc_rdma_component.alternate_priority; + return mca_osc_rdma_component.priority; } - return mca_osc_rdma_component.priority; + return OMPI_ERROR; } static int ompi_osc_rdma_initialize_region (ompi_osc_rdma_module_t *module, void **base, size_t size) { @@ -448,9 +437,10 @@ static int ompi_osc_rdma_initialize_region (ompi_osc_rdma_module_t *module, void static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, size_t size) { - size_t total_size, local_rank_array_size, leader_peer_data_size; + size_t total_size, local_rank_array_size, leader_peer_data_size, base_data_size; ompi_osc_rdma_peer_t *my_peer; int ret, my_rank; + size_t memory_alignment = module->memory_alignment; OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocating private internal state"); @@ -463,32 +453,35 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s * registration handles needed to access this data. */ total_size = local_rank_array_size + module->region_size + module->state_size + leader_peer_data_size; - total_size += OPAL_ALIGN_PAD_AMOUNT(total_size, OPAL_ALIGN_MIN); + base_data_size = total_size; if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { - total_size += size; + base_data_size += OPAL_ALIGN_PAD_AMOUNT(base_data_size, memory_alignment); + total_size = base_data_size + size; } /* the local data is ordered as follows: rank array (leader, offset mapping), state, leader peer data, and base * (if using MPI_Win_allocate). In this case the leader peer data array does not need to be stored in the same * segment but placing it there simplifies the peer data fetch and cleanup code. */ - module->rank_array = calloc (total_size, 1); + module->rank_array = mca_mpool_base_default_module->mpool_alloc(mca_mpool_base_default_module, total_size, + memory_alignment, 0); if (OPAL_UNLIKELY(NULL == module->rank_array)) { return OMPI_ERR_OUT_OF_RESOURCE; } + memset(module->rank_array, 0, total_size); -// Note, the extra module->region_size space added after local_rank_array_size -// is unused but is there to match what happens in allocte_state_shared() -// This allows module->state_offset to be uniform across the ranks which -// is part of how they pull peer info from each other. + /* Note, the extra module->region_size space added after local_rank_array_size + * is unused but is there to match what happens in allocte_state_shared() + * This allows module->state_offset to be uniform across the ranks which + * is part of how they pull peer info from each other. */ module->state_offset = local_rank_array_size + module->region_size; module->state = (ompi_osc_rdma_state_t *) ((intptr_t) module->rank_array + module->state_offset); module->node_comm_info = (unsigned char *) ((intptr_t) module->state + module->state_size); if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { - *base = (void *) ((intptr_t) module->node_comm_info + leader_peer_data_size); + *base = (void *) ((intptr_t) module->rank_array + base_data_size); } /* just go ahead and register the whole segment */ @@ -583,7 +576,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s ompi_osc_rdma_region_t *state_region; struct _local_data *temp; char *data_file; - int page_size = opal_getpagesize(); + size_t memory_alignment = module->memory_alignment; shared_comm = module->shared_comm; @@ -605,10 +598,6 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s return allocate_state_single (module, base, size); } - if (local_size == global_size) { - module->use_memory_registration = false; - } - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocating shared internal state"); local_rank_array_size = sizeof (ompi_osc_rdma_rank_data_t) * RANK_ARRAY_COUNT (module); @@ -620,8 +609,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s /* ensure proper alignment */ if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { - data_base += OPAL_ALIGN_PAD_AMOUNT(data_base, page_size); - size += OPAL_ALIGN_PAD_AMOUNT(size, page_size); + data_base += OPAL_ALIGN_PAD_AMOUNT(data_base, memory_alignment); + size += OPAL_ALIGN_PAD_AMOUNT(size, memory_alignment); } do { @@ -649,23 +638,24 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s my_base_offset = total_size; } total_size += temp[i].size; + total_size += OPAL_ALIGN_PAD_AMOUNT(total_size, memory_alignment); } } if (0 == local_rank) { /* allocate the shared memory segment */ - ret = opal_asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%d", + ret = opal_asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%s.%d", mca_osc_rdma_component.backing_directory, ompi_process_info.nodename, - OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm)); + OMPI_PROC_MY_NAME->jobid, ompi_comm_print_cid(module->comm), getpid()); if (0 > ret) { ret = OMPI_ERR_OUT_OF_RESOURCE; } else { - /* allocate enough space for the state + data for all local ranks */ - ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size); - free (data_file); - if (OPAL_SUCCESS != ret) { - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment"); - } + /* allocate enough space for the state + data for all local ranks */ + ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size); + free (data_file); + if (OPAL_SUCCESS != ret) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment"); + } } } @@ -692,6 +682,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s } if (size && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + size_t page_size = opal_getpagesize(); char *baseptr = (char *)((intptr_t) module->segment_base + my_base_offset); *base = (void *)baseptr; // touch each page to force allocation on local NUMA node @@ -788,14 +779,17 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s peer->state_handle = (mca_btl_base_registration_handle_t *) state_region->btl_handle_data; } peer->state = (osc_rdma_counter_t) ((uintptr_t) state_region->base + state_base + module->state_size * i); - if (i > 0) { + if (i==0) { + peer->state_endpoint = peer->data_endpoint; + peer->state_btl_index = peer->data_btl_index; + } else { peer->state_endpoint = local_leader->state_endpoint; peer->state_btl_index = local_leader->state_btl_index; } } if (my_rank == peer_rank) { - module->my_peer = peer; + module->my_peer = peer; } if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor && MPI_WIN_FLAVOR_CREATE != module->flavor && @@ -849,23 +843,6 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s return ret; } -static int ompi_osc_rdma_query_mtls (void) -{ - char **mtls_to_use; - - mtls_to_use = opal_argv_split (ompi_osc_rdma_mtl_names, ','); - if (mtls_to_use && ompi_mtl_base_selected_component) { - for (int i = 0 ; mtls_to_use[i] ; ++i) { - if (0 == strcmp (mtls_to_use[i], ompi_mtl_base_selected_component->mtl_version.mca_component_name)) { - opal_argv_free(mtls_to_use); - return OMPI_SUCCESS; - } - } - } - opal_argv_free(mtls_to_use); - return -1; -} - /** * @brief ensure that all local procs are added to the bml * @@ -904,12 +881,14 @@ static void ompi_osc_rdma_ensure_local_add_procs (void) * @return OMPI_SUCCESS if BTLs can be found * @return OMPI_ERR_UNREACH if no BTLs can be found that match * - * In this case an "alternate" BTL is a BTL that does not provide true RDMA but - * can use active messages using the BTL base AM RDMA/atomics. Since more than - * one BTL may be needed for this support the OSC component will disable the - * use of registration-based RDMA (these BTLs will not be used) and will use - * any remaining BTL. By default the BTLs used will be tcp and sm but any single - * (or pair) of BTLs may be used. + * In this case an "alternate" BTL is a BTL does not meet the + * requirements of a BTL outlined in ompi_osc_rdma_query_accelerated_btls(). + * Either it does not provide connectivity to all peers, provide + * remote completion, or natively support put/get/atomic.. Since more + * than one BTL may be needed for this support the OSC component will + * disable the use of registration-based RDMA (these BTLs will not be + * used) and will use any remaining BTL. By default the BTLs used will + * be tcp and sm but any single (or pair) of BTLs may be used. */ static int ompi_osc_rdma_query_alternate_btls (ompi_communicator_t *comm, ompi_osc_rdma_module_t *module) { @@ -958,20 +937,46 @@ static int ompi_osc_rdma_query_alternate_btls (ompi_communicator_t *comm, ompi_o return btls_found > 0 ? OMPI_SUCCESS : OMPI_ERR_UNREACH; } -static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, ompi_osc_rdma_module_t *module) +/* Check for BTL requirements: + * 1) RDMA (put/get) and ATOMIC operations. We only require cswap + * and fetch and add and will emulate other opterations with those + * two as necessary. + * 2) Remote Completion + */ +static bool ompi_osc_rdma_check_accelerated_btl(struct mca_btl_base_module_t *btl) +{ + return ((btl->btl_flags & MCA_BTL_FLAGS_RDMA) && + (btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_FOPS) && + (btl->btl_flags & MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION) && + (btl->btl_atomic_flags & MCA_BTL_ATOMIC_SUPPORTS_ADD)); +} + +/* + * Attempt to find a BTL that can be used for native RDMA + * + * Attempt to find an "accelerated" BTL that can be used directly, as + * opposed to emulated rdma semantics with the alternate BTLs. To be + * an accelerated BTL, four conditions must be true: + * + * 1) The BTL must be able to communicate with all peers in the + * Window + * 2) The BTL must provide remote completion + * 3) The BTL must be able to register the entire target window + * 4) The BTL must natively support put/get/atomic operations + * + * Testing (1) is expensive, so as an optimization, the + * ompi_osc_rdma_full_connectivity_btls list contains the list of BTL + * components we know can achieve (1) in almost all usage scenarios. + * + * If module is NULL, the code acts as a query mechanism to find any + * potential BTLs, and is used to implement osc_rdma_query(). + */ +static int ompi_osc_rdma_query_accelerated_btls (ompi_communicator_t *comm, ompi_osc_rdma_module_t *module) { - struct mca_btl_base_module_t **possible_btls = NULL; int comm_size = ompi_comm_size (comm); - int comm_rank = ompi_comm_rank (comm); - int rc = OMPI_SUCCESS, max_btls = 0; - unsigned int selected_latency = INT_MAX; - struct mca_btl_base_module_t *selected_btl = NULL; - mca_btl_base_selected_module_t *item; - int *btl_counts = NULL; + struct mca_btl_base_module_t *selected_btl; + mca_bml_base_endpoint_t *base_endpoint; char **btls_to_use; - void *tmp; - - btls_to_use = opal_argv_split (ompi_osc_rdma_btl_names, ','); if (module) { ompi_osc_rdma_selected_btl_insert(module, NULL, 0); @@ -979,7 +984,14 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, ompi_osc_rdma_mo module->use_memory_registration = false; } + /* Check for BTLs in the list of BTLs we know can reach all peers + in general usage. */ + btls_to_use = opal_argv_split (ompi_osc_rdma_full_connectivity_btls, ','); if (btls_to_use) { + mca_btl_base_selected_module_t *item; + + selected_btl = NULL; + /* rdma and atomics are only supported with BTLs at the moment */ OPAL_LIST_FOREACH(item, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) { for (int i = 0 ; btls_to_use[i] ; ++i) { @@ -987,9 +999,8 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, ompi_osc_rdma_mo continue; } - if ((item->btl_module->btl_flags & (MCA_BTL_FLAGS_RDMA)) == MCA_BTL_FLAGS_RDMA && - (item->btl_module->btl_flags & (MCA_BTL_FLAGS_ATOMIC_FOPS | MCA_BTL_FLAGS_ATOMIC_OPS))) { - if (!selected_btl || item->btl_module->btl_latency < selected_btl->btl_latency) { + if (ompi_osc_rdma_check_accelerated_btl(item->btl_module)) { + if (NULL == selected_btl || item->btl_module->btl_latency < selected_btl->btl_latency) { selected_btl = item->btl_module; } } @@ -997,126 +1008,92 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, ompi_osc_rdma_mo } opal_argv_free (btls_to_use); - } - if (NULL != selected_btl) { - if (module) { - ompi_osc_rdma_selected_btl_insert(module, selected_btl, 0); - module->btls_in_use = 1; - module->use_memory_registration = selected_btl->btl_register_mem != NULL; + if (NULL != selected_btl) { + goto btl_selection_complete; } - - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "selected btl: %s", - selected_btl->btl_component->btl_version.mca_component_name); - return OMPI_SUCCESS; } /* if osc/rdma gets selected we need to ensure that all local procs have been added */ ompi_osc_rdma_ensure_local_add_procs (); - - for (int rank = 0 ; rank < comm_size ; ++rank) { - ompi_proc_t *proc = ompi_comm_peer_lookup (comm, rank); - mca_bml_base_endpoint_t *endpoint; - int num_btls, prev_max; - bool found_btl = false; - - endpoint = mca_bml_base_get_endpoint (proc); - if (NULL == endpoint) { - /* can't continue if some peer is unreachable */ - rc = OMPI_ERR_UNREACH; - break; - } - - num_btls = mca_bml_base_btl_array_get_size (&endpoint->btl_rdma); - if (0 == num_btls) { - rc = OMPI_ERR_NOT_AVAILABLE; - /* at least one rank doesn't have an RDMA capable btl */ - break; - } - - prev_max = max_btls; - max_btls = (max_btls > num_btls) ? max_btls : num_btls; - - tmp = realloc (possible_btls, sizeof (void *) * max_btls); - if (NULL == tmp) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - break; - } - possible_btls = tmp; + /* + * A BTL in the list of known can reach all peers that met our + * other requirements was not found. Look for BTLs that may be + * able to talk to all peers. This is obviously more expensive + * than the check above. + * + * This algorithm skips a potential use case: it requires + * reachability to self, which is not strictly needed if BTL and + * CPU atomics are atomic with each other. However, the set of + * BTLs which can not send to self, which have RDMA semantics, an + * which have the rquired atomicity is currently the null set and + * almost certain to remain the null set, so we keep it simple. + * + * We only want BTLs that can reach all peers, so use rank 0's BTL + * list as the list of all available BTLs. Any BTL that cannot + * be used to communicate with rank 0 necessarily is not in the + * list of all available BTLs for this algorithm. + */ + base_endpoint = mca_bml_base_get_endpoint(ompi_comm_peer_lookup(comm, 0)); + if (NULL == base_endpoint) { + return OMPI_ERR_UNREACH; + } - for (int j = prev_max ; j < max_btls ; ++j) { - possible_btls[j] = NULL; - } + selected_btl = NULL; + for (size_t i_btl = 0 ; + i_btl < mca_bml_base_btl_array_get_size(&base_endpoint->btl_rdma); + ++i_btl) { + bool have_connectivity = true; + struct mca_bml_base_btl_t *examine_bml_btl; + struct mca_btl_base_module_t *examine_btl; - tmp = realloc (btl_counts, sizeof (int) * max_btls); - if (NULL == tmp) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - break; - } - btl_counts = tmp; - - for (int i_btl = 0 ; i_btl < num_btls ; ++i_btl) { - /* for this implementation we need only compare-and-swap and fetch-and-add */ - if ((endpoint->btl_rdma.bml_btls[i_btl].btl->btl_flags & (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS)) == - (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS) && (endpoint->btl_rdma.bml_btls[i_btl].btl->btl_atomic_flags & - MCA_BTL_ATOMIC_SUPPORTS_ADD)) { - for (int j = 0 ; j < max_btls ; ++j) { - if (endpoint->btl_rdma.bml_btls[i_btl].btl == possible_btls[j]) { - ++btl_counts[j]; - found_btl = true; - break; - } else if (NULL == possible_btls[j]) { - possible_btls[j] = endpoint->btl_rdma.bml_btls[i_btl].btl; - btl_counts[j] = 1; - found_btl = true; - break; - } - } - } + examine_bml_btl = mca_bml_base_btl_array_get_index(&base_endpoint->btl_rdma, i_btl); + if (NULL == examine_bml_btl) { + return OMPI_ERR_NOT_FOUND; } + examine_btl = examine_bml_btl->btl; - /* any non-local rank must have a usable btl */ - if (!found_btl && comm_rank != rank) { - /* no btl = no rdma/atomics */ - rc = OMPI_ERR_UNREACH; - break; + /* skip any BTL which doesn't meet our requirements */ + if (!ompi_osc_rdma_check_accelerated_btl(examine_btl)) { + continue; } - } - if (OMPI_SUCCESS != rc) { - free (possible_btls); - free (btl_counts); - return rc; - } - - for (int i = 0 ; i < max_btls ; ++i) { - int btl_count = btl_counts[i]; + /* check connectivity across all ranks */ + for (int rank = 0 ; rank < comm_size ; ++rank) { + ompi_proc_t *proc = ompi_comm_peer_lookup(comm, rank); + mca_bml_base_endpoint_t *endpoint; - if (NULL == possible_btls[i]) { - break; - } + endpoint = mca_bml_base_get_endpoint(proc); + if (NULL == endpoint) { + have_connectivity = false; + break; + } - if (possible_btls[i]->btl_atomic_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB) { - /* do not need to use the btl for self communication */ - btl_count++; + if (NULL == mca_bml_base_btl_array_find(&endpoint->btl_rdma, + examine_btl)) { + have_connectivity = false; + break; + } } - if (btl_count >= comm_size && possible_btls[i]->btl_latency < selected_latency) { - selected_btl = possible_btls[i]; - selected_latency = possible_btls[i]->btl_latency; + /* if we have connectivity, displace currently selected btl if + * this one has lower latency; we prioritize latency over all + * other parameters + */ + if (have_connectivity) { + if (NULL == selected_btl || examine_btl->btl_latency < selected_btl->btl_latency) { + selected_btl = examine_btl; + } } } - free (possible_btls); - free (btl_counts); - if (NULL == selected_btl) { OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no suitable btls found"); - /* no btl = no rdma/atomics */ return OMPI_ERR_NOT_AVAILABLE; } +btl_selection_complete: if (module) { ompi_osc_rdma_selected_btl_insert(module, selected_btl, 0); module->btls_in_use = 1; @@ -1343,6 +1320,10 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, module->win = win; module->disp_unit = disp_unit; module->size = size; + module->memory_alignment = mca_osc_rdma_component.memory_alignment; + if (NULL != info) { + ompi_osc_base_set_memory_alignment(info, &module->memory_alignment); + } /* set the module so we properly cleanup */ win->w_osc_module = (ompi_osc_base_module_t*) module; @@ -1369,8 +1350,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, return ret; } - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "creating osc/rdma window of flavor %d with id %d", - flavor, ompi_comm_get_cid(module->comm)); + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "creating osc/rdma window of flavor %d with id %s", + flavor, ompi_comm_print_cid (module->comm)); /* peer data */ if (world_size > init_limit) { @@ -1389,7 +1370,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, } /* find rdma capable endpoints */ - ret = ompi_osc_rdma_query_btls (module->comm, module); + ret = ompi_osc_rdma_query_accelerated_btls (module->comm, module); if (OMPI_SUCCESS != ret) { OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_WARN, "could not find a suitable btl. falling back on " "active-message BTLs"); @@ -1415,15 +1396,16 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, } else { module->state_size += mca_osc_rdma_component.max_attach * module->region_size; } -/* - * These are the info's that this module is interested in - */ + + /* + * These are the info's that this module is interested in + */ opal_infosubscribe_subscribe(&win->super, "no_locks", "false", ompi_osc_rdma_set_no_lock_info); -/* - * TODO: same_size, same_disp_unit have w_flag entries, but do not appear - * to be used anywhere. If that changes, they should be subscribed - */ + /* + * TODO: same_size, same_disp_unit have w_flag entries, but do not appear + * to be used anywhere. If that changes, they should be subscribed + */ /* fill in the function pointer part */ memcpy(&module->super, &ompi_osc_rdma_module_rdma_template, sizeof(module->super)); @@ -1477,7 +1459,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, /* update component data */ OPAL_THREAD_LOCK(&mca_osc_rdma_component.lock); ret = opal_hash_table_set_value_uint32(&mca_osc_rdma_component.modules, - ompi_comm_get_cid(module->comm), + ompi_comm_get_local_cid(module->comm), module); OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.lock); if (OMPI_SUCCESS != ret) { @@ -1488,7 +1470,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, /* fill in window information */ *model = MPI_WIN_UNIFIED; win->w_osc_module = (ompi_osc_base_module_t*) module; - opal_asprintf(&name, "rdma window %d", ompi_comm_get_cid(module->comm)); + opal_asprintf(&name, "rdma window %s", ompi_comm_print_cid(module->comm)); ompi_win_set_name(win, name); free(name); @@ -1503,8 +1485,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, /* for now the leader is always rank 0 in the communicator */ module->leader = ompi_osc_rdma_module_peer (module, 0); - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %d", - ompi_comm_get_cid(module->comm)); + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %s", + ompi_comm_print_cid(module->comm)); } return ret; @@ -1541,8 +1523,8 @@ ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, const char *key, cons } /* enforce collectiveness... */ module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); -/* - * Accept any value - */ + /* + * Accept any value + */ return module->no_locks ? "true" : "false"; } diff --git a/ompi/mca/osc/rdma/osc_rdma_module.c b/ompi/mca/osc/rdma/osc_rdma_module.c index cda38baaa24..933baf00694 100644 --- a/ompi/mca/osc/rdma/osc_rdma_module.c +++ b/ompi/mca/osc/rdma/osc_rdma_module.c @@ -4,7 +4,7 @@ * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -22,6 +22,8 @@ * $HEADER$ */ +#include "opal/mca/mpool/base/base.h" + #include "osc_rdma.h" #include "osc_rdma_lock.h" @@ -58,8 +60,8 @@ int ompi_osc_rdma_free(ompi_win_t *win) if (NULL != module->comm) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, - "rdma component destroying window with id %d", - ompi_comm_get_cid(module->comm)); + "rdma component destroying window with id %s", + ompi_comm_print_cid(module->comm)); /* finish with a barrier */ if (ompi_group_size(win->w_group) > 1) { @@ -70,7 +72,7 @@ int ompi_osc_rdma_free(ompi_win_t *win) /* remove from component information */ OPAL_THREAD_LOCK(&mca_osc_rdma_component.lock); opal_hash_table_remove_value_uint32(&mca_osc_rdma_component.modules, - ompi_comm_get_cid(module->comm)); + ompi_comm_get_local_cid(module->comm)); OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.lock); } @@ -140,7 +142,8 @@ int ompi_osc_rdma_free(ompi_win_t *win) free (module->peer_array); free (module->outstanding_lock_array); - free (module->free_after); + mca_mpool_base_default_module->mpool_free(mca_mpool_base_default_module, + module->free_after); free (module->selected_btls); free (module); diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 99486725ba6..6b51c6d9403 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -13,6 +13,8 @@ * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2020 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2020 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,9 +29,10 @@ #include "ompi/mca/osc/base/osc_base_obj_convert.h" #include "ompi/request/request.h" #include "opal/util/sys_limits.h" -#include "opal/include/opal/align.h" +#include "opal/align.h" #include "opal/util/info_subscriber.h" #include "opal/util/printf.h" +#include "opal/mca/mpool/base/base.h" #include "osc_sm.h" @@ -193,6 +196,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit int comm_size = ompi_comm_size (comm); bool unlink_needed = false; int ret = OMPI_ERROR; + size_t memory_alignment = OPAL_ALIGN_MIN; if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) { return ret; @@ -208,9 +212,12 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit OBJ_CONSTRUCT(&module->lock, opal_mutex_t); ret = opal_infosubscribe_subscribe(&(win->super), "alloc_shared_noncontig", "false", component_set_alloc_shared_noncontig_info); - if (OPAL_SUCCESS != ret) goto error; + if (NULL != info) { + ompi_osc_base_set_memory_alignment(info, &memory_alignment); + } + /* fill in the function pointer part */ memcpy(module, &ompi_osc_sm_module_template, sizeof(ompi_osc_base_module_t)); @@ -230,7 +237,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit if (NULL == module->bases) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; module->sizes[0] = size; - module->bases[0] = malloc(size); + module->bases[0] = mca_mpool_base_default_module->mpool_alloc(mca_mpool_base_default_module, size, + memory_alignment, 0); if (NULL == module->bases[0]) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; module->global_state = malloc(sizeof(ompi_osc_sm_global_state_t)); @@ -246,6 +254,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit size_t pagesize; size_t state_size; size_t posts_size, post_size = (comm_size + OSC_SM_POST_MASK) / (OSC_SM_POST_MASK + 1); + size_t data_base_size; OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, "allocating shared memory region of size %ld\n", (long) size)); @@ -283,17 +292,20 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit state_size += OPAL_ALIGN_PAD_AMOUNT(state_size, 64); posts_size = comm_size * post_size * sizeof (module->posts[0][0]); posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64); + data_base_size = state_size + posts_size; + data_base_size += OPAL_ALIGN_PAD_AMOUNT(data_base_size, pagesize); if (0 == ompi_comm_rank (module->comm)) { char *data_file; - ret = opal_asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_sm.%s.%x.%d.%d", - mca_osc_sm_component.backing_directory, ompi_process_info.nodename, - OMPI_PROC_MY_NAME->jobid, (int) OMPI_PROC_MY_NAME->vpid, ompi_comm_get_cid(module->comm)); + ret = opal_asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_sm.%s.%x.%d.%s", + mca_osc_sm_component.backing_directory, ompi_process_info.nodename, + OMPI_PROC_MY_NAME->jobid, (int) OMPI_PROC_MY_NAME->vpid, + ompi_comm_print_cid(module->comm)); if (ret < 0) { free(rbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - ret = opal_shmem_segment_create (&module->seg_ds, data_file, total + pagesize + state_size + posts_size); + ret = opal_shmem_segment_create (&module->seg_ds, data_file, total + data_base_size); free(data_file); if (OPAL_SUCCESS != ret) { free(rbuf); @@ -340,7 +352,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->global_state = (ompi_osc_sm_global_state_t *) (module->posts[0] + comm_size * post_size); module->node_states = (ompi_osc_sm_node_state_t *) (module->global_state + 1); - for (i = 0, total = state_size + posts_size ; i < comm_size ; ++i) { + for (i = 0, total = data_base_size ; i < comm_size ; ++i) { if (i > 0) { module->posts[i] = module->posts[i - 1] + post_size; } @@ -523,12 +535,13 @@ ompi_osc_sm_free(struct ompi_win_t *win) module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); - opal_shmem_segment_detach (&module->seg_ds); + opal_shmem_segment_detach (&module->seg_ds); } else { free(module->node_states); free(module->global_state); if (NULL != module->bases) { - free(module->bases[0]); + mca_mpool_base_default_module->mpool_free(mca_mpool_base_default_module, + module->bases[0]); } } free(module->disp_units); @@ -565,9 +578,9 @@ static const char* component_set_blocking_fence_info(opal_infosubscriber_t *obj, const char *key, const char *val) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) ((struct ompi_win_t*) obj)->w_osc_module; -/* - * Assuming that you can't change the default. - */ + /* + * Assuming that you can't change the default. + */ return module->global_state->use_barrier_for_fence ? "true" : "false"; } @@ -577,9 +590,9 @@ component_set_alloc_shared_noncontig_info(opal_infosubscriber_t *obj, const char { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) ((struct ompi_win_t*) obj)->w_osc_module; -/* - * Assuming that you can't change the default. - */ + /* + * Assuming that you can't change the default. + */ return module->noncontig ? "true" : "false"; } diff --git a/ompi/mca/osc/ucx/osc_ucx_comm.c b/ompi/mca/osc/ucx/osc_ucx_comm.c index b4b99018334..efeea95c347 100644 --- a/ompi/mca/osc/ucx/osc_ucx_comm.c +++ b/ompi/mca/osc/ucx/osc_ucx_comm.c @@ -325,18 +325,18 @@ static inline int end_atomicity( } static inline int get_dynamic_win_info(uint64_t remote_addr, ompi_osc_ucx_module_t *module, - int target) { + int target, int *win_idx) { uint64_t remote_state_addr = (module->state_addrs)[target] + OSC_UCX_STATE_DYNAMIC_WIN_CNT_OFFSET; - size_t len = sizeof(uint64_t) + sizeof(ompi_osc_dynamic_win_info_t) * OMPI_OSC_UCX_ATTACH_MAX; - char *temp_buf = malloc(len); + size_t remote_state_len = sizeof(uint64_t) + sizeof(ompi_osc_dynamic_win_info_t) * OMPI_OSC_UCX_ATTACH_MAX; + char *temp_buf = calloc(remote_state_len, 1); ompi_osc_dynamic_win_info_t *temp_dynamic_wins; uint64_t win_count; - int contain, insert = -1; + int insert = -1; int ret; ret = opal_common_ucx_wpmem_putget(module->state_mem, OPAL_COMMON_UCX_GET, target, (void *)((intptr_t)temp_buf), - len, remote_state_addr); + remote_state_len, remote_state_addr); if (OPAL_SUCCESS != ret) { OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_putget failed: %d", ret); ret = OMPI_ERROR; @@ -350,23 +350,27 @@ static inline int get_dynamic_win_info(uint64_t remote_addr, ompi_osc_ucx_module } memcpy(&win_count, temp_buf, sizeof(uint64_t)); - assert(win_count > 0 && win_count <= OMPI_OSC_UCX_ATTACH_MAX); + if (win_count > OMPI_OSC_UCX_ATTACH_MAX) { + return MPI_ERR_RMA_RANGE; + } temp_dynamic_wins = (ompi_osc_dynamic_win_info_t *)(temp_buf + sizeof(uint64_t)); - contain = ompi_osc_find_attached_region_position(temp_dynamic_wins, 0, win_count, + *win_idx = ompi_osc_find_attached_region_position(temp_dynamic_wins, 0, win_count, remote_addr, 1, &insert); - assert(contain >= 0 && (uint64_t)contain < win_count); + if (*win_idx < 0 || (uint64_t)*win_idx >= win_count) { + return MPI_ERR_RMA_RANGE; + } - if (module->local_dynamic_win_info[contain].mem->mem_addrs == NULL) { - module->local_dynamic_win_info[contain].mem->mem_addrs = calloc(ompi_comm_size(module->comm), + if (module->local_dynamic_win_info[*win_idx].mem->mem_addrs == NULL) { + module->local_dynamic_win_info[*win_idx].mem->mem_addrs = calloc(ompi_comm_size(module->comm), OMPI_OSC_UCX_MEM_ADDR_MAX_LEN); - module->local_dynamic_win_info[contain].mem->mem_displs =calloc(ompi_comm_size(module->comm), + module->local_dynamic_win_info[*win_idx].mem->mem_displs = calloc(ompi_comm_size(module->comm), sizeof(int)); } - memcpy(module->local_dynamic_win_info[contain].mem->mem_addrs + target * OMPI_OSC_UCX_MEM_ADDR_MAX_LEN, - temp_dynamic_wins[contain].mem_addr, OMPI_OSC_UCX_MEM_ADDR_MAX_LEN); - module->local_dynamic_win_info[contain].mem->mem_displs[target] = target * OMPI_OSC_UCX_MEM_ADDR_MAX_LEN; + memcpy(module->local_dynamic_win_info[*win_idx].mem->mem_addrs + target * OMPI_OSC_UCX_MEM_ADDR_MAX_LEN, + temp_dynamic_wins[*win_idx].mem_addr, OMPI_OSC_UCX_MEM_ADDR_MAX_LEN); + module->local_dynamic_win_info[*win_idx].mem->mem_displs[target] = target * OMPI_OSC_UCX_MEM_ADDR_MAX_LEN; cleanup: free(temp_buf); @@ -416,17 +420,20 @@ static int do_atomic_op_intrinsic( void *result_addr, ompi_osc_ucx_request_t *ucx_req) { - int ret = OMPI_SUCCESS; + int ret = OMPI_SUCCESS, + win_idx = -1; size_t origin_dt_bytes; + opal_common_ucx_wpmem_t *mem = module->mem; ompi_datatype_type_size(dt, &origin_dt_bytes); uint64_t remote_addr = (module->addrs[target]) + target_disp * OSC_UCX_GET_DISP(module, target); if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { - ret = get_dynamic_win_info(remote_addr, module, target); + ret = get_dynamic_win_info(remote_addr, module, target, &win_idx); if (ret != OMPI_SUCCESS) { return ret; } + mem = module->local_dynamic_win_info[win_idx].mem; } ucp_atomic_fetch_op_t opcode; @@ -454,7 +461,7 @@ static int do_atomic_op_intrinsic( user_req_ptr = ucx_req; // issue a fence if this is the last but not the only element if (0 < i) { - ret = opal_common_ucx_wpmem_fence(module->mem); + ret = opal_common_ucx_wpmem_fence(mem); if (ret != OMPI_SUCCESS) { OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_fence failed: %d", ret); return OMPI_ERROR; @@ -466,7 +473,7 @@ static int do_atomic_op_intrinsic( } else { value = opal_common_ucx_load_uint64(origin_addr, origin_dt_bytes); } - ret = opal_common_ucx_wpmem_fetch_nb(module->mem, opcode, value, target, + ret = opal_common_ucx_wpmem_fetch_nb(mem, opcode, value, target, output_addr, origin_dt_bytes, remote_addr, user_req_cb, user_req_ptr); @@ -485,10 +492,11 @@ int ompi_osc_ucx_put(const void *origin_addr, int origin_count, struct ompi_data int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + opal_common_ucx_wpmem_t *mem = module->mem; uint64_t remote_addr = (module->addrs[target]) + target_disp * OSC_UCX_GET_DISP(module, target); bool is_origin_contig = false, is_target_contig = false; ptrdiff_t origin_lb, origin_extent, target_lb, target_extent; - int ret = OMPI_SUCCESS; + int ret = OMPI_SUCCESS, win_idx = -1; ret = check_sync_state(module, target, false); if (ret != OMPI_SUCCESS) { @@ -496,10 +504,11 @@ int ompi_osc_ucx_put(const void *origin_addr, int origin_count, struct ompi_data } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { - ret = get_dynamic_win_info(remote_addr, module, target); + ret = get_dynamic_win_info(remote_addr, module, target, &win_idx); if (ret != OMPI_SUCCESS) { return ret; } + mem = module->local_dynamic_win_info[win_idx].mem; } if (!target_count) { @@ -519,7 +528,7 @@ int ompi_osc_ucx_put(const void *origin_addr, int origin_count, struct ompi_data ompi_datatype_type_size(origin_dt, &origin_len); origin_len *= origin_count; - ret = opal_common_ucx_wpmem_putget(module->mem, OPAL_COMMON_UCX_PUT, target, + ret = opal_common_ucx_wpmem_putget(mem, OPAL_COMMON_UCX_PUT, target, (void *)((intptr_t)origin_addr + origin_lb), origin_len, remote_addr + target_lb); if (OPAL_SUCCESS != ret) { @@ -539,10 +548,11 @@ int ompi_osc_ucx_get(void *origin_addr, int origin_count, int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + opal_common_ucx_wpmem_t *mem = module->mem; uint64_t remote_addr = (module->addrs[target]) + target_disp * OSC_UCX_GET_DISP(module, target); ptrdiff_t origin_lb, origin_extent, target_lb, target_extent; bool is_origin_contig = false, is_target_contig = false; - int ret = OMPI_SUCCESS; + int ret = OMPI_SUCCESS, win_idx = -1; ret = check_sync_state(module, target, false); if (ret != OMPI_SUCCESS) { @@ -550,10 +560,11 @@ int ompi_osc_ucx_get(void *origin_addr, int origin_count, } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { - ret = get_dynamic_win_info(remote_addr, module, target); + ret = get_dynamic_win_info(remote_addr, module, target, &win_idx); if (ret != OMPI_SUCCESS) { return ret; } + mem = module->local_dynamic_win_info[win_idx].mem; } if (!target_count) { @@ -574,7 +585,7 @@ int ompi_osc_ucx_get(void *origin_addr, int origin_count, ompi_datatype_type_size(origin_dt, &origin_len); origin_len *= origin_count; - ret = opal_common_ucx_wpmem_putget(module->mem, OPAL_COMMON_UCX_GET, target, + ret = opal_common_ucx_wpmem_putget(mem, OPAL_COMMON_UCX_GET, target, (void *)((intptr_t)origin_addr + origin_lb), origin_len, remote_addr + target_lb); if (OPAL_SUCCESS != ret) { @@ -771,9 +782,10 @@ int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_a int target, ptrdiff_t target_disp, struct ompi_win_t *win) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module; + opal_common_ucx_wpmem_t *mem = module->mem; uint64_t remote_addr = (module->addrs[target]) + target_disp * OSC_UCX_GET_DISP(module, target); size_t dt_bytes; - int ret = OMPI_SUCCESS; + int ret = OMPI_SUCCESS, win_idx = -1; bool lock_acquired = false; ret = check_sync_state(module, target, false); @@ -782,10 +794,11 @@ int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_a } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { - ret = get_dynamic_win_info(remote_addr, module, target); + ret = get_dynamic_win_info(remote_addr, module, target, &win_idx); if (ret != OMPI_SUCCESS) { return ret; } + mem = module->local_dynamic_win_info[win_idx].mem; } ompi_datatype_type_size(dt, &dt_bytes); @@ -803,21 +816,21 @@ int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_a return ret; } - ret = opal_common_ucx_wpmem_putget(module->mem, OPAL_COMMON_UCX_GET, target, + ret = opal_common_ucx_wpmem_putget(mem, OPAL_COMMON_UCX_GET, target, &result_addr, dt_bytes, remote_addr); if (OPAL_SUCCESS != ret) { OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_putget failed: %d", ret); return OMPI_ERROR; } - ret = opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_EP, target); + ret = opal_common_ucx_wpmem_flush(mem, OPAL_COMMON_UCX_SCOPE_EP, target); if (ret != OPAL_SUCCESS) { return ret; } if (0 == memcmp(result_addr, compare_addr, dt_bytes)) { // write the new value - ret = opal_common_ucx_wpmem_putget(module->mem, OPAL_COMMON_UCX_PUT, target, + ret = opal_common_ucx_wpmem_putget(mem, OPAL_COMMON_UCX_PUT, target, (void*)origin_addr, dt_bytes, remote_addr); if (OPAL_SUCCESS != ret) { OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_putget failed: %d", ret); @@ -834,7 +847,8 @@ int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_win_t *win) { size_t dt_bytes; ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; - int ret = OMPI_SUCCESS; + opal_common_ucx_wpmem_t *mem = module->mem; + int ret = OMPI_SUCCESS, win_idx = -1; ret = check_sync_state(module, target, false); if (ret != OMPI_SUCCESS) { @@ -860,10 +874,11 @@ int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr, } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { - ret = get_dynamic_win_info(remote_addr, module, target); + ret = get_dynamic_win_info(remote_addr, module, target, &win_idx); if (ret != OMPI_SUCCESS) { return ret; } + mem = module->local_dynamic_win_info[win_idx].mem; } value = origin_addr ? opal_common_ucx_load_uint64(origin_addr, dt_bytes) : 0; @@ -877,7 +892,7 @@ int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr, } } - ret = opal_common_ucx_wpmem_fetch_nb(module->mem, opcode, value, target, + ret = opal_common_ucx_wpmem_fetch_nb(mem, opcode, value, target, (void *)result_addr, dt_bytes, remote_addr, NULL, NULL); @@ -1049,9 +1064,10 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, struct ompi_request_t **request) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + opal_common_ucx_wpmem_t *mem = module->mem; uint64_t remote_addr = (module->addrs[target]) + target_disp * OSC_UCX_GET_DISP(module, target); ompi_osc_ucx_request_t *ucx_req = NULL; - int ret = OMPI_SUCCESS; + int ret = OMPI_SUCCESS, win_idx = -1; ret = check_sync_state(module, target, true); if (ret != OMPI_SUCCESS) { @@ -1059,10 +1075,11 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count, } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { - ret = get_dynamic_win_info(remote_addr, module, target); + ret = get_dynamic_win_info(remote_addr, module, target, &win_idx); if (ret != OMPI_SUCCESS) { return ret; } + mem = module->local_dynamic_win_info[win_idx].mem; } OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); @@ -1074,7 +1091,7 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count, return ret; } - ret = opal_common_ucx_wpmem_fence(module->mem); + ret = opal_common_ucx_wpmem_fence(mem); if (ret != OMPI_SUCCESS) { OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_fence failed: %d", ret); return OMPI_ERROR; @@ -1082,7 +1099,7 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count, mca_osc_ucx_component.num_incomplete_req_ops++; /* TODO: investigate whether ucp_worker_flush_nb is a better choice here */ - ret = opal_common_ucx_wpmem_fetch_nb(module->mem, UCP_ATOMIC_FETCH_OP_FADD, + ret = opal_common_ucx_wpmem_fetch_nb(mem, UCP_ATOMIC_FETCH_OP_FADD, 0, target, &(module->req_result), sizeof(uint64_t), remote_addr & (~0x7), req_completion, ucx_req); @@ -1102,9 +1119,10 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, struct ompi_request_t **request) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + opal_common_ucx_wpmem_t *mem = module->mem; uint64_t remote_addr = (module->addrs[target]) + target_disp * OSC_UCX_GET_DISP(module, target); ompi_osc_ucx_request_t *ucx_req = NULL; - int ret = OMPI_SUCCESS; + int ret = OMPI_SUCCESS, win_idx = -1; ret = check_sync_state(module, target, true); if (ret != OMPI_SUCCESS) { @@ -1112,10 +1130,11 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count, } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { - ret = get_dynamic_win_info(remote_addr, module, target); + ret = get_dynamic_win_info(remote_addr, module, target, &win_idx); if (ret != OMPI_SUCCESS) { return ret; } + mem = module->local_dynamic_win_info[win_idx].mem; } OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); @@ -1127,7 +1146,7 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count, return ret; } - ret = opal_common_ucx_wpmem_fence(module->mem); + ret = opal_common_ucx_wpmem_fence(mem); if (ret != OMPI_SUCCESS) { OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_fence failed: %d", ret); return OMPI_ERROR; @@ -1135,7 +1154,7 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count, mca_osc_ucx_component.num_incomplete_req_ops++; /* TODO: investigate whether ucp_worker_flush_nb is a better choice here */ - ret = opal_common_ucx_wpmem_fetch_nb(module->mem, UCP_ATOMIC_FETCH_OP_FADD, + ret = opal_common_ucx_wpmem_fetch_nb(mem, UCP_ATOMIC_FETCH_OP_FADD, 0, target, &(module->req_result), sizeof(uint64_t), remote_addr & (~0x7), req_completion, ucx_req); diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index a9db0952776..95c18c117d0 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -1,6 +1,9 @@ /* * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -392,7 +395,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in } *model = MPI_WIN_UNIFIED; - opal_asprintf(&name, "ucx window %d", ompi_comm_get_cid(module->comm)); + opal_asprintf(&name, "ucx window %s", ompi_comm_print_cid(module->comm)); ompi_win_set_name(win, name); free(name); @@ -451,22 +454,24 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in ret = opal_common_ucx_wpmem_create(module->ctx, base, size, mem_type, &exchange_len_info, + OPAL_COMMON_UCX_WPMEM_ADDR_EXCHANGE_FULL, (void *)module->comm, &my_mem_addr, &my_mem_addr_size, &module->mem); if (ret != OMPI_SUCCESS) { goto error; } - } state_base = (void *)&(module->state); ret = opal_common_ucx_wpmem_create(module->ctx, &state_base, sizeof(ompi_osc_ucx_state_t), - OPAL_COMMON_UCX_MEM_MAP, &exchange_len_info, + OPAL_COMMON_UCX_MEM_MAP, + &exchange_len_info, + OPAL_COMMON_UCX_WPMEM_ADDR_EXCHANGE_FULL, (void *)module->comm, - &my_mem_addr, &my_mem_addr_size, - &module->state_mem); + &my_mem_addr, &my_mem_addr_size, + &module->state_mem); if (ret != OMPI_SUCCESS) { goto error; } @@ -567,6 +572,10 @@ int ompi_osc_find_attached_region_position(ompi_osc_dynamic_win_info_t *dynamic_ uint64_t base, size_t len, int *insert) { int mid_index = (max_index + min_index) >> 1; + if (dynamic_wins[mid_index].size == 1) { + len = 0; + } + if (min_index > max_index) { (*insert) = min_index; return -1; @@ -615,6 +624,7 @@ int ompi_osc_ucx_win_attach(struct ompi_win_t *win, void *base, size_t len) { ret = opal_common_ucx_wpmem_create(module->ctx, &base, len, OPAL_COMMON_UCX_MEM_MAP, &exchange_len_info, + OPAL_COMMON_UCX_WPMEM_ADDR_EXCHANGE_DIRECT, (void *)module->comm, &(module->local_dynamic_win_info[insert_index].my_mem_addr), &(module->local_dynamic_win_info[insert_index].my_mem_addr_size), @@ -680,7 +690,9 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) { } OBJ_DESTRUCT(&module->pending_posts); - opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_WORKER, 0); + if (NULL != module->mem) { + opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_WORKER, 0); + } ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); @@ -699,7 +711,9 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) { free(module->state_addrs); opal_common_ucx_wpmem_free(module->state_mem); - opal_common_ucx_wpmem_free(module->mem); + if (NULL != module->mem) { + opal_common_ucx_wpmem_free(module->mem); + } opal_common_ucx_wpctx_release(module->ctx); diff --git a/ompi/mca/osc/ucx/osc_ucx_passive_target.c b/ompi/mca/osc/ucx/osc_ucx_passive_target.c index 5df360f0464..d25514a6a57 100644 --- a/ompi/mca/osc/ucx/osc_ucx_passive_target.c +++ b/ompi/mca/osc/ucx/osc_ucx_passive_target.c @@ -216,8 +216,8 @@ int ompi_osc_ucx_lock_all(int mpi_assert, struct ompi_win_t *win) { int ompi_osc_ucx_unlock_all(struct ompi_win_t *win) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*)win->w_osc_module; - int comm_size = ompi_comm_size(module->comm); - int ret = OMPI_SUCCESS; + int comm_size = ompi_comm_size(module->comm), + i = 0, ret = OMPI_SUCCESS; if (module->epoch_type.access != PASSIVE_ALL_EPOCH) { return OMPI_ERR_RMA_SYNC; @@ -225,9 +225,19 @@ int ompi_osc_ucx_unlock_all(struct ompi_win_t *win) { assert(module->lock_count == 0); - ret = opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_WORKER, 0); - if (ret != OMPI_SUCCESS) { - return ret; + if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { + for (i = 0; i < module->state.dynamic_win_count; i++) { + ret = opal_common_ucx_wpmem_flush(module->local_dynamic_win_info[i].mem , OPAL_COMMON_UCX_SCOPE_WORKER, 0); + if (ret != OMPI_SUCCESS) { + return ret; + } + } + } + else { + ret = opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_WORKER, 0); + if (ret != OMPI_SUCCESS) { + return ret; + } } if (!module->lock_all_is_nocheck) { diff --git a/ompi/mca/part/part.h b/ompi/mca/part/part.h index df0ee7ace51..aa3977bc84c 100644 --- a/ompi/mca/part/part.h +++ b/ompi/mca/part/part.h @@ -15,6 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -141,6 +142,7 @@ typedef int (*mca_part_base_module_precv_init_fn_t)( int src, int tag, struct ompi_communicator_t* comm, + struct ompi_info_t * info, struct ompi_request_t **request ); @@ -166,6 +168,7 @@ typedef int (*mca_part_base_module_psend_init_fn_t)( int dst, int tag, struct ompi_communicator_t* comm, + struct ompi_info_t * info, struct ompi_request_t **request ); diff --git a/ompi/mca/part/persist/part_persist.h b/ompi/mca/part/persist/part_persist.h index febc1385376..939aae99696 100644 --- a/ompi/mca/part/persist/part_persist.h +++ b/ompi/mca/part/persist/part_persist.h @@ -13,6 +13,7 @@ * Copyright (c) 2021 University of Alabama at Birmingham. All rights reserved. * Copyright (c) 2021 Tennessee Technological University. All rights reserved. * Copyright (c) 2021 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2021 Bull S.A.S. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -331,6 +332,7 @@ mca_part_persist_precv_init(void *buf, int src, int tag, struct ompi_communicator_t *comm, + struct ompi_info_t * info, struct ompi_request_t **request) { int err = OMPI_SUCCESS; @@ -398,6 +400,7 @@ mca_part_persist_psend_init(const void* buf, int dst, int tag, ompi_communicator_t* comm, + struct ompi_info_t * info, ompi_request_t** request) { int err = OMPI_SUCCESS; diff --git a/ompi/mca/pml/base/base.h b/ompi/mca/pml/base/base.h index 8eb37e48448..433c7a60833 100644 --- a/ompi/mca/pml/base/base.h +++ b/ompi/mca/pml/base/base.h @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +11,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,8 +63,6 @@ OMPI_DECLSPEC int mca_pml_base_pml_check_selected(const char *my_pml, struct ompi_proc_t **procs, size_t nprocs); -OMPI_DECLSPEC int mca_pml_base_finalize(void); - /* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */ OMPI_DECLSPEC int mca_pml_base_revoke_comm(struct ompi_communicator_t *comm, bool coll_only); diff --git a/ompi/mca/pml/base/pml_base_bsend.c b/ompi/mca/pml/base/pml_base_bsend.c index 3826253e2ae..b17d03a2387 100644 --- a/ompi/mca/pml/base/pml_base_bsend.c +++ b/ompi/mca/pml/base/pml_base_bsend.c @@ -16,6 +16,8 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,6 +36,7 @@ #include "ompi/mca/pml/base/pml_base_sendreq.h" #include "ompi/mca/pml/base/pml_base_bsend.h" #include "opal/mca/mpool/mpool.h" +#include "ompi/runtime/mpiruntime.h" #ifdef HAVE_UNISTD_H #include @@ -56,6 +59,8 @@ static opal_atomic_int32_t mca_pml_bsend_init = 0; /* defined in pml_base_open.c */ extern char *ompi_pml_base_bsend_allocator_name; +static int mca_pml_base_bsend_fini (void); + /* * Routine to return pages to sub-allocator as needed */ @@ -77,7 +82,7 @@ static void* mca_pml_bsend_alloc_segment(void *ctx, size_t *size_inout) /* * One time initialization at startup */ -int mca_pml_base_bsend_init(bool thread_safe) +int mca_pml_base_bsend_init (void) { size_t tmp; @@ -100,6 +105,9 @@ int mca_pml_base_bsend_init(bool thread_safe) tmp >>= 1; mca_pml_bsend_pagebits++; } + + ompi_mpi_instance_append_finalize (mca_pml_base_bsend_fini); + return OMPI_SUCCESS; } @@ -107,7 +115,7 @@ int mca_pml_base_bsend_init(bool thread_safe) /* * One-time cleanup at shutdown - release any resources. */ -int mca_pml_base_bsend_fini(void) +static int mca_pml_base_bsend_fini (void) { if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init,-1) > 0) return OMPI_SUCCESS; diff --git a/ompi/mca/pml/base/pml_base_bsend.h b/ompi/mca/pml/base/pml_base_bsend.h index e50bdc7b5e8..725427e27f1 100644 --- a/ompi/mca/pml/base/pml_base_bsend.h +++ b/ompi/mca/pml/base/pml_base_bsend.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,8 +28,7 @@ BEGIN_C_DECLS -OMPI_DECLSPEC int mca_pml_base_bsend_init(bool enable_mpi_threads); -OMPI_DECLSPEC int mca_pml_base_bsend_fini(void); +OMPI_DECLSPEC int mca_pml_base_bsend_init (void); int mca_pml_base_bsend_attach(void* addr, int size); int mca_pml_base_bsend_detach(void* addr, int* size); diff --git a/ompi/mca/pml/base/pml_base_frame.c b/ompi/mca/pml/base/pml_base_frame.c index 5481095d486..d33b4f2cab0 100644 --- a/ompi/mca/pml/base/pml_base_frame.c +++ b/ompi/mca/pml/base/pml_base_frame.c @@ -126,14 +126,6 @@ static int mca_pml_base_register(mca_base_register_flag_t flags) return OMPI_SUCCESS; } -int mca_pml_base_finalize(void) { - if (NULL != mca_pml_base_selected_component.pmlm_finalize) { - return mca_pml_base_selected_component.pmlm_finalize(); - } - return OMPI_SUCCESS; -} - - static int mca_pml_base_close(void) { int i, j; diff --git a/ompi/mca/pml/base/pml_base_select.c b/ompi/mca/pml/base/pml_base_select.c index 1b9c1de13d4..301b51d8889 100644 --- a/ompi/mca/pml/base/pml_base_select.c +++ b/ompi/mca/pml/base/pml_base_select.c @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology @@ -15,6 +15,7 @@ * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights + * Copyright (c) 2018-2020 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -37,6 +38,7 @@ #include "opal/mca/pmix/pmix-internal.h" #include "ompi/constants.h" +#include "ompi/instance/instance.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" #include "ompi/proc/proc.h" @@ -46,6 +48,15 @@ typedef struct opened_component_t { mca_pml_base_component_t *om_component; } opened_component_t; + +static int mca_pml_base_finalize (void) { + if (NULL != mca_pml_base_selected_component.pmlm_finalize) { + return mca_pml_base_selected_component.pmlm_finalize(); + } + + return OMPI_SUCCESS; +} + /** * Function for selecting one component from all those that are * available. @@ -229,6 +240,7 @@ int mca_pml_base_select(bool enable_progress_threads, ret = mca_pml_base_pml_selected(best_component->pmlm_version.mca_component_name); /* All done */ + ompi_mpi_instance_append_finalize (mca_pml_base_finalize); return ret; } diff --git a/ompi/mca/pml/cm/pml_cm.c b/ompi/mca/pml/cm/pml_cm.c index 567b00bc331..51a63fdba4c 100644 --- a/ompi/mca/pml/cm/pml_cm.c +++ b/ompi/mca/pml/cm/pml_cm.c @@ -93,7 +93,7 @@ int mca_pml_cm_add_comm(ompi_communicator_t* comm) { /* should never happen, but it was, so check */ - if (comm->c_contextid > ompi_pml_cm.super.pml_max_contextid) { + if (comm->c_index > ompi_pml_cm.super.pml_max_contextid) { return OMPI_ERR_OUT_OF_RESOURCE; } diff --git a/ompi/mca/pml/cm/pml_cm.h b/ompi/mca/pml/cm/pml_cm.h index fa563e0b313..6440545acf9 100644 --- a/ompi/mca/pml/cm/pml_cm.h +++ b/ompi/mca/pml/cm/pml_cm.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -212,8 +212,8 @@ mca_pml_cm_recv(void *addr, ompi_request_wait_completion(&req.req_ompi); - if (NULL != status) { /* return status */ - *status = req.req_ompi.req_status; + if (MPI_STATUS_IGNORE != status) { + OMPI_COPY_STATUS(status, req.req_ompi.req_status, false); } ret = req.req_ompi.req_status.MPI_ERROR; OBJ_DESTRUCT(&convertor); @@ -548,8 +548,8 @@ mca_pml_cm_mrecv(void *buf, ompi_request_wait_completion(&recvreq->req_base.req_ompi); - if (NULL != status) { /* return status */ - *status = recvreq->req_base.req_ompi.req_status; + if (MPI_STATUS_IGNORE != status) { + OMPI_COPY_STATUS(status, recvreq->req_base.req_ompi.req_status, false); } ret = recvreq->req_base.req_ompi.req_status.MPI_ERROR; ompi_request_free( (ompi_request_t**)&recvreq ); diff --git a/ompi/mca/pml/cm/pml_cm_component.c b/ompi/mca/pml/cm/pml_cm_component.c index 40def94feb2..cd5808e43f9 100644 --- a/ompi/mca/pml/cm/pml_cm_component.c +++ b/ompi/mca/pml/cm/pml_cm_component.c @@ -151,8 +151,10 @@ mca_pml_cm_component_init(int* priority, ompi_pml_cm.super.pml_flags |= MCA_PML_BASE_FLAG_REQUIRE_WORLD; } - /* update our tag / context id max values based on MTL - information */ + if (ompi_mtl->mtl_flags & MCA_MTL_BASE_FLAG_SUPPORTS_EXT_CID) { + ompi_pml_cm.super.pml_flags |= MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID; + } + ompi_pml_cm.super.pml_max_contextid = ompi_mtl->mtl_max_contextid; ompi_pml_cm.super.pml_max_tag = ompi_mtl->mtl_max_tag; diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index a25caf46d35..4ead13a1f91 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -22,6 +22,9 @@ * All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. + * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reseved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,32 +63,34 @@ mca_pml_ob1_t mca_pml_ob1 = { { - mca_pml_ob1_add_procs, - mca_pml_ob1_del_procs, - mca_pml_ob1_enable, - NULL, /* mca_pml_ob1_progress, */ - mca_pml_ob1_add_comm, - mca_pml_ob1_del_comm, + .pml_add_procs = mca_pml_ob1_add_procs, + .pml_del_procs = mca_pml_ob1_del_procs, + .pml_enable = mca_pml_ob1_enable, + .pml_progress = NULL, /* mca_pml_ob1_progress, */ + .pml_add_comm = mca_pml_ob1_add_comm, + .pml_del_comm = mca_pml_ob1_del_comm, #if OPAL_ENABLE_FT_MPI - mca_pml_ob1_revoke_comm, + .pml_revoke_comm = mca_pml_ob1_revoke_comm, +#else + .pml_revoke_comm = NULL, #endif - mca_pml_ob1_irecv_init, - mca_pml_ob1_irecv, - mca_pml_ob1_recv, - mca_pml_ob1_isend_init, - mca_pml_ob1_isend, - mca_pml_ob1_send, - mca_pml_ob1_iprobe, - mca_pml_ob1_probe, - mca_pml_ob1_start, - mca_pml_ob1_improbe, - mca_pml_ob1_mprobe, - mca_pml_ob1_imrecv, - mca_pml_ob1_mrecv, - mca_pml_ob1_dump, - 65535, - INT_MAX, - 0 /* flags */ + .pml_irecv_init = mca_pml_ob1_irecv_init, + .pml_irecv = mca_pml_ob1_irecv, + .pml_recv = mca_pml_ob1_recv, + .pml_isend_init = mca_pml_ob1_isend_init, + .pml_isend = mca_pml_ob1_isend, + .pml_send = mca_pml_ob1_send, + .pml_iprobe = mca_pml_ob1_iprobe, + .pml_probe = mca_pml_ob1_probe, + .pml_start = mca_pml_ob1_start, + .pml_improbe = mca_pml_ob1_improbe, + .pml_mprobe = mca_pml_ob1_mprobe, + .pml_imrecv = mca_pml_ob1_imrecv, + .pml_mrecv = mca_pml_ob1_mrecv, + .pml_dump = mca_pml_ob1_dump, + .pml_max_contextid = 65535, + .pml_max_tag = INT_MAX, + .pml_flags = 0 /* flags */ } }; @@ -193,6 +198,7 @@ int mca_pml_ob1_enable(bool enable) NULL, 0, NULL, NULL, NULL); mca_pml_ob1.enabled = true; + return OMPI_SUCCESS; } @@ -209,7 +215,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) } /* should never happen, but it was, so check */ - if (comm->c_contextid > mca_pml_ob1.super.pml_max_contextid) { + if (comm->c_index > mca_pml_ob1.super.pml_max_contextid) { OBJ_RELEASE(pml_comm); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -224,9 +230,25 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) OPAL_LIST_FOREACH_SAFE(frag, next_frag, &mca_pml_ob1.non_existing_communicator_pending, mca_pml_ob1_recv_frag_t) { hdr = &frag->hdr.hdr_match; + if (MCA_PML_OB1_HDR_TYPE_CID == frag->hdr.hdr_common.hdr_type) { + if (!ompi_comm_cid_compare (comm, frag->hdr.hdr_cid.hdr_cid)) { + continue; + } + + /* handle this CID*/ + mca_pml_ob1_handle_cid (comm, frag->hdr.hdr_ext_match.hdr_match.hdr_src, &frag->hdr.hdr_cid); + + hdr = &frag->hdr.hdr_ext_match.hdr_match; + hdr->hdr_ctx = comm->c_index; + + /* NTH: this is ok because the pointer that will be freed is stored in frag->addr[] */ + frag->segments[0].seg_addr.pval = (void *)((uintptr_t) frag->segments[0].seg_addr.pval + sizeof (frag->hdr.hdr_cid)); + } + /* Is this fragment for the current communicator ? */ - if( frag->hdr.hdr_match.hdr_ctx != comm->c_contextid ) + if (hdr->hdr_ctx != comm->c_index) { continue; + } /* As we now know we work on a fragment for this communicator * we should remove it from the @@ -330,11 +352,17 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) return rc; } - rc = mca_bml.bml_add_procs( nprocs, - procs, - &reachable ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + OBJ_CONSTRUCT(&reachable, opal_bitmap_t); + rc = opal_bitmap_init(&reachable, (int)nprocs); + if (OMPI_SUCCESS != rc) { + return rc; + } + + rc = mca_bml.bml_add_procs (nprocs, procs, &reachable); + OBJ_DESTRUCT(&reachable); + if (OMPI_SUCCESS != rc) { + return rc; + } /* Check that values supplied by all initialized btls will work for us. Note that this is the list of all initialized BTLs, @@ -358,8 +386,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) sm->btl_component->btl_version.mca_component_name, sizeof(mca_pml_ob1_hdr_t), sm->btl_component->btl_version.mca_component_name); - rc = OMPI_ERR_BAD_PARAM; - goto cleanup_and_return; + return OMPI_ERR_BAD_PARAM; } #if OPAL_CUDA_GDR_SUPPORT /* If size is SIZE_MAX, then we know we want to set this to the minimum possible @@ -380,8 +407,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) sm->btl_component->btl_version.mca_component_name, sizeof(mca_pml_ob1_hdr_t), sm->btl_component->btl_version.mca_component_name); - rc = OMPI_ERR_BAD_PARAM; - goto cleanup_and_return; + return OMPI_ERR_BAD_PARAM; } } if (0 == sm->btl_module->btl_cuda_rdma_limit) { @@ -398,8 +424,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) sm->btl_component->btl_version.mca_component_name, sm->btl_module->btl_cuda_eager_limit, sm->btl_component->btl_version.mca_component_name); - rc = OMPI_ERR_BAD_PARAM; - goto cleanup_and_return; + return OMPI_ERR_BAD_PARAM; } } #endif /* OPAL_CUDA_GDR_SUPPORT */ @@ -410,54 +435,61 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_MATCH, mca_pml_ob1_recv_frag_callback_match, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_RNDV, mca_pml_ob1_recv_frag_callback_rndv, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_RGET, mca_pml_ob1_recv_frag_callback_rget, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_ACK, mca_pml_ob1_recv_frag_callback_ack, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_FRAG, mca_pml_ob1_recv_frag_callback_frag, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_PUT, mca_pml_ob1_recv_frag_callback_put, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_FIN, mca_pml_ob1_recv_frag_callback_fin, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - /* register error handlers */ - rc = mca_bml.bml_register_error(mca_pml_ob1_error_handler); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } - cleanup_and_return: - OBJ_DESTRUCT(&reachable); + rc = mca_bml.bml_register (MCA_PML_OB1_HDR_TYPE_CID, + mca_pml_ob1_recv_frag_callback_cid, + NULL); + if (OMPI_SUCCESS != rc) { + return rc; + } - return rc; + /* register error handlers */ + return mca_bml.bml_register_error(mca_pml_ob1_error_handler); } /* @@ -594,8 +626,8 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose) /* TODO: don't forget to dump mca_pml_ob1.non_existing_communicator_pending */ - opal_output(0, "Communicator %s [%p](%d) rank %d recv_seq %d num_procs %lu last_probed %lu\n", - comm->c_name, (void*) comm, comm->c_contextid, comm->c_my_rank, + opal_output(0, "Communicator %s [%p](%s) rank %d recv_seq %d num_procs %lu last_probed %lu\n", + comm->c_name, (void*) comm, ompi_comm_print_cid (comm), comm->c_my_rank, pml_comm->recv_sequence, pml_comm->num_procs, pml_comm->last_probed); #if !MCA_PML_OB1_CUSTOM_MATCH @@ -653,10 +685,8 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose) return OMPI_SUCCESS; } -static void mca_pml_ob1_fin_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) +static void mca_pml_ob1_control_completion (mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t *endpoint, + mca_btl_base_descriptor_t *des, int status) { mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; @@ -665,40 +695,36 @@ static void mca_pml_ob1_fin_completion( mca_btl_base_module_t* btl, MCA_PML_OB1_PROGRESS_PENDING(bml_btl); } -/** - * Send an FIN to the peer. If we fail to send this ack (no more available - * fragments or the send failed) this function automatically add the FIN - * to the list of pending FIN, Which guarantee that the FIN will be sent - * later. - */ -int mca_pml_ob1_send_fin( ompi_proc_t* proc, - mca_bml_base_btl_t* bml_btl, - opal_ptr_t hdr_frag, - uint64_t rdma_size, - uint8_t order, - int status ) + +int mca_pml_ob1_send_control_btl (mca_bml_base_btl_t *bml_btl, int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size, + bool add_to_pending) { - mca_btl_base_descriptor_t* fin; + int des_flags = MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL; + mca_btl_base_descriptor_t *des; int rc; - mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_ob1_fin_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL); + if (NULL != bml_btl->btl->btl_sendi) { + rc = mca_bml_base_sendi (bml_btl, NULL, hdr, hdr_size, 0, order, des_flags, hdr->hdr_common.hdr_type, &des); + if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { + return rc; + } + } else { + (void) mca_bml_base_alloc (bml_btl, &des, order, hdr_size, des_flags); + } - if(NULL == fin) { - MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status); + if (OPAL_UNLIKELY(NULL == des)) { + if (add_to_pending) { + mca_pml_ob1_add_to_pending (NULL, bml_btl, order, hdr, hdr_size); + } return OMPI_ERR_OUT_OF_RESOURCE; } - fin->des_cbfunc = mca_pml_ob1_fin_completion; - fin->des_cbdata = NULL; - /* fill in header */ - mca_pml_ob1_fin_hdr_prepare ((mca_pml_ob1_fin_hdr_t *) fin->des_segments->seg_addr.pval, - 0, hdr_frag.lval, status ? status : (int64_t) rdma_size); + des->des_cbfunc = mca_pml_ob1_control_completion; - ob1_hdr_hton((mca_pml_ob1_hdr_t *) fin->des_segments->seg_addr.pval, MCA_PML_OB1_HDR_TYPE_FIN, proc); + memcpy (des->des_segments->seg_addr.pval, hdr, hdr_size); /* queue request */ - rc = mca_bml_base_send( bml_btl, fin, MCA_PML_OB1_HDR_TYPE_FIN ); + rc = mca_bml_base_send (bml_btl, des, hdr->hdr_common.hdr_type); if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { MCA_PML_OB1_PROGRESS_PENDING(bml_btl); @@ -706,76 +732,98 @@ int mca_pml_ob1_send_fin( ompi_proc_t* proc, SPC_RECORD(OMPI_SPC_BYTES_SENT_MPI, (ompi_spc_value_t)sizeof(mca_pml_ob1_fin_hdr_t)); return OMPI_SUCCESS; } - mca_bml_base_free(bml_btl, fin); - MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status); + + mca_bml_base_free(bml_btl, des); + if (add_to_pending) { + mca_pml_ob1_add_to_pending (NULL, bml_btl, order, hdr, hdr_size); + } + + return OMPI_ERR_OUT_OF_RESOURCE; +} + +int mca_pml_ob1_send_control_any (ompi_proc_t *proc, int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size, + bool add_to_pending) +{ + mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc); + int rc; + + assert (NULL != endpoint); + + for (size_t i = 0 ; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager) ; ++i) { + mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next (&endpoint->btl_eager); + + rc = mca_pml_ob1_send_control_btl (bml_btl, order, hdr, hdr_size, false); + if (OMPI_SUCCESS == rc) { + return OMPI_SUCCESS; + } + } + + if (add_to_pending) { + mca_pml_ob1_add_to_pending (proc, NULL, order, hdr, hdr_size); + } + return OMPI_ERR_OUT_OF_RESOURCE; } +/** + * Send an FIN to the peer. If we fail to send this ack (no more available + * fragments or the send failed) this function automatically add the FIN + * to the list of pending FIN, Which guarantee that the FIN will be sent + * later. + */ +int mca_pml_ob1_send_fin (ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, opal_ptr_t hdr_frag, uint64_t rdma_size, + uint8_t order, int status) +{ + mca_pml_ob1_fin_hdr_t fin; + + /* fill in header */ + mca_pml_ob1_fin_hdr_prepare (&fin, 0, hdr_frag.lval, status ? status : (int64_t) rdma_size); + + ob1_hdr_hton((mca_pml_ob1_hdr_t *) &fin, MCA_PML_OB1_HDR_TYPE_FIN, proc); + + return mca_pml_ob1_send_control_btl (bml_btl, order, (mca_pml_ob1_hdr_t *) &fin, sizeof (fin), true); +} + +int mca_pml_ob1_send_cid (ompi_proc_t *proc, ompi_communicator_t *comm) +{ + mca_pml_ob1_cid_hdr_t cid; + + mca_pml_ob1_cid_hdr_prepare (&cid, comm); + ob1_hdr_hton ((mca_pml_ob1_hdr_t *) &cid, cid->hdr_common.hdr_type, proc); + + return mca_pml_ob1_send_control_any (proc, MCA_BTL_NO_ORDER, (mca_pml_ob1_hdr_t *) &cid, sizeof (cid), true); +} + void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl) { mca_pml_ob1_pckt_pending_t *pckt; - int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_ob1.pckt_pending); - - for(i = 0; i < s; i++) { - mca_bml_base_btl_t *send_dst = NULL; - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); - pckt = (mca_pml_ob1_pckt_pending_t*) - opal_list_remove_first(&mca_pml_ob1.pckt_pending); - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); - if(NULL == pckt) + int32_t rc, max = (int32_t) opal_list_get_size (&mca_pml_ob1.pckt_pending); + + for (int32_t i = 0; i < max ; ++i) { + OPAL_THREAD_SCOPED_LOCK(&mca_pml_ob1.lock, { + pckt = (mca_pml_ob1_pckt_pending_t*) + opal_list_remove_first(&mca_pml_ob1.pckt_pending); + }); + if (NULL == pckt) { break; - if(pckt->bml_btl != NULL && - pckt->bml_btl->btl == bml_btl->btl) { - send_dst = pckt->bml_btl; - } else { - mca_bml_base_endpoint_t* endpoint = - (mca_bml_base_endpoint_t*) pckt->proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - send_dst = mca_bml_base_btl_array_find( - &endpoint->btl_eager, bml_btl->btl); } - if(NULL == send_dst) { - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); - opal_list_append(&mca_pml_ob1.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); - continue; + + if (pckt->bml_btl) { + rc = mca_pml_ob1_send_control_btl (pckt->bml_btl, pckt->order, &pckt->hdr, pckt->hdr_size, false); + } else { + rc = mca_pml_ob1_send_control_any (pckt->proc, pckt->order, &pckt->hdr, pckt->hdr_size, false); } - switch(pckt->hdr.hdr_common.hdr_type) { - case MCA_PML_OB1_HDR_TYPE_ACK: - rc = mca_pml_ob1_recv_request_ack_send_btl(pckt->proc, - send_dst, - pckt->hdr.hdr_ack.hdr_src_req.lval, - pckt->hdr.hdr_ack.hdr_dst_req.pval, - pckt->hdr.hdr_ack.hdr_send_offset, - pckt->hdr.hdr_ack.hdr_send_size, - pckt->hdr.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA); - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); + if (OPAL_SUCCESS != rc) { + /* could not send the packet. readd it to the pending list */ + OPAL_THREAD_SCOPED_LOCK(&mca_pml_ob1.lock, { opal_list_append(&mca_pml_ob1.pckt_pending, (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); - return; - } - break; - case MCA_PML_OB1_HDR_TYPE_FIN: - rc = mca_pml_ob1_send_fin(pckt->proc, send_dst, - pckt->hdr.hdr_fin.hdr_frag, - pckt->hdr.hdr_fin.hdr_size, - pckt->order, - pckt->status); - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - MCA_PML_OB1_PCKT_PENDING_RETURN(pckt); - return; - } - break; - default: - opal_output(0, "[%s:%d] wrong header type\n", - __FILE__, __LINE__); - break; + }); + } else { + /* We're done with this packet, return it back to the free list */ + MCA_PML_OB1_PCKT_PENDING_RETURN(pckt); } - /* We're done with this packet, return it back to the free list */ - MCA_PML_OB1_PCKT_PENDING_RETURN(pckt); } } diff --git a/ompi/mca/pml/ob1/pml_ob1.h b/ompi/mca/pml/ob1/pml_ob1.h index f425e98d6ff..726791bfeba 100644 --- a/ompi/mca/pml/ob1/pml_ob1.h +++ b/ompi/mca/pml/ob1/pml_ob1.h @@ -12,10 +12,12 @@ * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,6 +43,7 @@ #include "ompi/mca/bml/base/base.h" #include "ompi/proc/proc.h" #include "opal/mca/allocator/base/base.h" +#include "ompi/runtime/mpiruntime.h" BEGIN_C_DECLS @@ -226,11 +229,11 @@ END_C_DECLS struct mca_pml_ob1_pckt_pending_t { opal_free_list_item_t super; - ompi_proc_t* proc; + ompi_proc_t *proc; mca_pml_ob1_hdr_t hdr; + size_t hdr_size; struct mca_bml_base_btl_t *bml_btl; uint8_t order; - int status; }; typedef struct mca_pml_ob1_pckt_pending_t mca_pml_ob1_pckt_pending_t; OBJ_CLASS_DECLARATION(mca_pml_ob1_pckt_pending_t); @@ -248,22 +251,22 @@ do { \ (opal_free_list_item_t*)pckt); \ } while(0) -#define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, Sz, B, O, S) \ - do { \ - mca_pml_ob1_pckt_pending_t *_pckt; \ - \ - MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt); \ - mca_pml_ob1_fin_hdr_prepare (&_pckt->hdr.hdr_fin, 0, \ - (D).lval, (Sz)); \ - _pckt->proc = (P); \ - _pckt->bml_btl = (B); \ - _pckt->order = (O); \ - _pckt->status = (S); \ - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); \ - opal_list_append(&mca_pml_ob1.pckt_pending, \ - (opal_list_item_t*)_pckt); \ - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); \ - } while(0) +static inline void mca_pml_ob1_add_to_pending (ompi_proc_t *proc, mca_bml_base_btl_t *bml_btl, + int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size) +{ + mca_pml_ob1_pckt_pending_t *pckt; + + MCA_PML_OB1_PCKT_PENDING_ALLOC(pckt); + assert (sizeof (pckt->hdr) >= hdr_size); + pckt->proc = proc; + pckt->order = order; + pckt->hdr_size = hdr_size; + pckt->bml_btl = bml_btl; + memcpy (&pckt->hdr, hdr, hdr_size); + OPAL_THREAD_SCOPED_LOCK(&mca_pml_ob1.lock, { + opal_list_append(&mca_pml_ob1.pckt_pending, &pckt->super.super); + }); +} #define OB1_MATCHING_LOCK(lock) \ do { \ @@ -287,6 +290,8 @@ do { \ int mca_pml_ob1_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, opal_ptr_t hdr_frag, uint64_t size, uint8_t order, int status); +int mca_pml_ob1_send_cid (ompi_proc_t *proc, ompi_communicator_t *comm); + /* This function tries to resend FIN/ACK packets from pckt_pending queue. * Packets are added to the queue when sending of FIN or ACK is failed due to * resource unavailability. bml_btl passed to the function doesn't represents @@ -408,4 +413,9 @@ mca_pml_ob1_calc_weighted_length( mca_pml_ob1_com_btl_t *btls, int num_btls, siz */ int mca_pml_ob1_enable_progress(int32_t count); +int mca_pml_ob1_send_control_any (ompi_proc_t *proc, int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size, + bool add_to_pending); +int mca_pml_ob1_send_control_btl (mca_bml_base_btl_t *bml_btl, int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size, + bool add_to_pending); + #endif diff --git a/ompi/mca/pml/ob1/pml_ob1_comm.c b/ompi/mca/pml/ob1/pml_ob1_comm.c index 9eeedd6b05d..aa0f2046638 100644 --- a/ompi/mca/pml/ob1/pml_ob1_comm.c +++ b/ompi/mca/pml/ob1/pml_ob1_comm.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,9 @@ * * Copyright (c) 2018 Sandia National Laboratories * All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +37,8 @@ static void mca_pml_ob1_comm_proc_construct(mca_pml_ob1_comm_proc_t* proc) proc->expected_sequence = 1; proc->send_sequence = 0; proc->frags_cant_match = NULL; + /* don't know the index of this communicator yet */ + proc->comm_index = -1; #if !MCA_PML_OB1_CUSTOM_MATCH OBJ_CONSTRUCT(&proc->specific_receives, opal_list_t); OBJ_CONSTRUCT(&proc->unexpected_frags, opal_list_t); @@ -84,7 +90,7 @@ static void mca_pml_ob1_comm_destruct(mca_pml_ob1_comm_t* comm) } } - free(comm->procs); + free ((void *) comm->procs); } #if !MCA_PML_OB1_CUSTOM_MATCH @@ -116,4 +122,26 @@ int mca_pml_ob1_comm_init_size (mca_pml_ob1_comm_t* comm, size_t size) return OMPI_SUCCESS; } +mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_create (ompi_communicator_t *comm, mca_pml_ob1_comm_t *pml_comm, int rank) +{ + mca_pml_ob1_comm_proc_t *proc = OBJ_NEW(mca_pml_ob1_comm_proc_t); + uintptr_t old_proc = 0; + proc->ompi_proc = ompi_comm_peer_lookup (comm, rank); + if (OMPI_COMM_IS_GLOBAL_INDEX (comm)) { + /* the index is global so we can save it on the proc now */ + proc->comm_index = comm->c_index; + } + OBJ_RETAIN(proc->ompi_proc); + /* make sure proc structure is filled in before adding it to the array */ + opal_atomic_wmb (); + + if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR((opal_atomic_intptr_t *) pml_comm->procs + rank, &old_proc, + (uintptr_t) proc)) { + /* proc was created by a competing thread. go ahead and throw this one away. */ + OBJ_RELEASE(proc); + return (mca_pml_ob1_comm_proc_t *) old_proc; + } + + return proc; +} diff --git a/ompi/mca/pml/ob1/pml_ob1_comm.h b/ompi/mca/pml/ob1/pml_ob1_comm.h index 25313b4d204..7b7e978ec96 100644 --- a/ompi/mca/pml/ob1/pml_ob1_comm.h +++ b/ompi/mca/pml/ob1/pml_ob1_comm.h @@ -44,6 +44,7 @@ struct mca_pml_ob1_comm_proc_t { opal_object_t super; struct ompi_proc_t* ompi_proc; uint16_t expected_sequence; /**< send message sequence number - receiver side */ + int16_t comm_index; /**< index of this communicator on the receiver size (-1 - not set) */ opal_atomic_int32_t send_sequence; /**< send side sequence number */ struct mca_pml_ob1_recv_frag_t* frags_cant_match; /**< out-of-order fragment queues */ #if !MCA_PML_OB1_CUSTOM_MATCH @@ -54,6 +55,8 @@ struct mca_pml_ob1_comm_proc_t { OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_proc_t); +#define MCA_PML_OB1_PROC_REQUIRES_EXT_MATCH(proc) (-1 == (proc)->comm_index) + /** * Cached on ompi_communicator_t to hold queues/state * used by the PML<->PTL interface for matching logic. @@ -66,7 +69,7 @@ struct mca_pml_comm_t { opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */ #endif opal_mutex_t proc_lock; - mca_pml_ob1_comm_proc_t **procs; + mca_pml_ob1_comm_proc_t * volatile * procs; size_t num_procs; size_t last_probed; #if MCA_PML_OB1_CUSTOM_MATCH @@ -78,6 +81,11 @@ typedef struct mca_pml_comm_t mca_pml_ob1_comm_t; OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_t); +/** + * @brief Helper function to allocate/fill in ob1 proc for a comm/rank + */ +mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_create (ompi_communicator_t *comm, mca_pml_ob1_comm_t *pml_comm, int rank); + static inline mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_lookup (struct ompi_communicator_t *comm, int rank) { mca_pml_ob1_comm_t *pml_comm = (mca_pml_ob1_comm_t *)comm->c_pml_comm; @@ -93,15 +101,7 @@ static inline mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_lookup (struct ompi_comm " valid range of the communicator. Please submit a bug request!"); } if (OPAL_UNLIKELY(NULL == pml_comm->procs[rank])) { - OPAL_THREAD_LOCK(&pml_comm->proc_lock); - if (NULL == pml_comm->procs[rank]) { - mca_pml_ob1_comm_proc_t* proc = OBJ_NEW(mca_pml_ob1_comm_proc_t); - proc->ompi_proc = ompi_comm_peer_lookup (comm, rank); - OBJ_RETAIN(proc->ompi_proc); - opal_atomic_wmb (); - pml_comm->procs[rank] = proc; - } - OPAL_THREAD_UNLOCK(&pml_comm->proc_lock); + mca_pml_ob1_peer_create (comm, pml_comm, rank); } return pml_comm->procs[rank]; diff --git a/ompi/mca/pml/ob1/pml_ob1_component.c b/ompi/mca/pml/ob1/pml_ob1_component.c index 0feb982ae60..57fccd643ac 100644 --- a/ompi/mca/pml/ob1/pml_ob1_component.c +++ b/ompi/mca/pml/ob1/pml_ob1_component.c @@ -314,6 +314,9 @@ mca_pml_ob1_component_init( int* priority, } + /** this pml supports the extended CID space */ + mca_pml_ob1.super.pml_flags |= MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID; + return &mca_pml_ob1.super; } diff --git a/ompi/mca/pml/ob1/pml_ob1_hdr.h b/ompi/mca/pml/ob1/pml_ob1_hdr.h index 716dd841511..4ce0d84e150 100644 --- a/ompi/mca/pml/ob1/pml_ob1_hdr.h +++ b/ompi/mca/pml/ob1/pml_ob1_hdr.h @@ -49,13 +49,14 @@ #define MCA_PML_OB1_HDR_TYPE_GET (MCA_BTL_TAG_PML + 7) #define MCA_PML_OB1_HDR_TYPE_PUT (MCA_BTL_TAG_PML + 8) #define MCA_PML_OB1_HDR_TYPE_FIN (MCA_BTL_TAG_PML + 9) +#define MCA_PML_OB1_HDR_TYPE_CID (MCA_BTL_TAG_PML + 10) -#define MCA_PML_OB1_HDR_FLAGS_ACK 1 /* is an ack required */ -#define MCA_PML_OB1_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */ -#define MCA_PML_OB1_HDR_FLAGS_PIN 4 /* is user buffer pinned */ -#define MCA_PML_OB1_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */ -#define MCA_PML_OB1_HDR_FLAGS_NORDMA 16 /* rest will be send by copy-in-out */ -#define MCA_PML_OB1_HDR_FLAGS_SIGNAL 32 /* message can be optionally signalling */ +#define MCA_PML_OB1_HDR_FLAGS_ACK 0x01 /* is an ack required */ +#define MCA_PML_OB1_HDR_FLAGS_NBO 0x02 /* is the hdr in network byte order */ +#define MCA_PML_OB1_HDR_FLAGS_PIN 0x04 /* is user buffer pinned */ +#define MCA_PML_OB1_HDR_FLAGS_CONTIG 0x08 /* is user buffer contiguous */ +#define MCA_PML_OB1_HDR_FLAGS_NORDMA 0x10 /* rest will be send by copy-in-out */ +#define MCA_PML_OB1_HDR_FLAGS_SIGNAL 0x20 /* message can be optionally signalling */ /** * Common hdr attributes - must be first element in each hdr type @@ -76,6 +77,41 @@ static inline void mca_pml_ob1_common_hdr_prepare (mca_pml_ob1_common_hdr_t *hdr #define MCA_PML_OB1_COMMON_HDR_NTOH(h) #define MCA_PML_OB1_COMMON_HDR_HTON(h) +/** + * Header definition for sending a CID/local comm index combo + */ +struct mca_pml_ob1_cid_hdr_t { + mca_pml_ob1_common_hdr_t hdr_common; + ompi_comm_extended_cid_t hdr_cid; + int16_t hdr_src_comm_index; + int32_t hdr_src; +}; + +typedef struct mca_pml_ob1_cid_hdr_t mca_pml_ob1_cid_hdr_t; + +static inline void mca_pml_ob1_cid_hdr_prepare (mca_pml_ob1_cid_hdr_t *hdr, ompi_communicator_t *comm) +{ + mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_CID, 0); + hdr->hdr_cid = ompi_comm_get_extended_cid (comm); + hdr->hdr_src_comm_index = comm->c_index; + hdr->hdr_src = ompi_comm_rank (comm); +} + +#define MCA_PML_OB1_EXT_CID_HDR_HTON(h) \ + do { \ + MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \ + (h).hdr_src_comm_index = htons((h).hdr_src_comm_index); \ + ompi_comm_cid_hton(&(h).hdr_cid); \ + } while (0) + +#define MCA_PML_OB1_EXT_CID_HDR_NTOH(h) \ + do { \ + MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \ + (h).hdr_src_comm_index = ntonh((h).hdr_src_comm_index); \ + ompi_comm_cid_ntoh(&(h).hdr_cid); \ + } while (0) + + /** * Header definition for the first fragment, contains the * attributes required to match the corresponding posted receive. @@ -130,7 +166,17 @@ do { \ (h).hdr_seq = htons((h).hdr_seq); \ } while (0) -/** +struct mca_pml_ob1_ext_match_hdr_t { + mca_pml_ob1_cid_hdr_t hdr_ext_cid; + + /* actual match */ + mca_pml_ob1_match_hdr_t hdr_match; +}; + +typedef struct mca_pml_ob1_ext_match_hdr_t mca_pml_ob1_ext_match_hdr_t; + +/* +* * Header definition for the first fragment when an acknowledgment * is required. This could be the first fragment of a large message * or a short message that requires an ack (synchronous). @@ -142,6 +188,14 @@ struct mca_pml_ob1_rendezvous_hdr_t { }; typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t; +struct mca_pml_ob1_ext_rendezvous_hdr_t { + mca_pml_ob1_cid_hdr_t hdr_ext_cid; + + /* actual match */ + mca_pml_ob1_rendezvous_hdr_t hdr_rndv; +}; +typedef struct mca_pml_ob1_ext_rendezvous_hdr_t mca_pml_ob1_ext_rendezvous_hdr_t; + static inline void mca_pml_ob1_rendezvous_hdr_prepare (mca_pml_ob1_rendezvous_hdr_t *hdr, uint8_t hdr_type, uint8_t hdr_flags, uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq, uint64_t hdr_msg_length, void *hdr_src_req) @@ -180,6 +234,15 @@ struct mca_pml_ob1_rget_hdr_t { }; typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t; +struct mca_pml_ob1_ext_rget_hdr_t { + mca_pml_ob1_cid_hdr_t hdr_ext_cid; + + /* actual match */ + mca_pml_ob1_rget_hdr_t hdr_rget; +}; + +typedef struct mca_pml_ob1_ext_rget_hdr_t mca_pml_ob1_ext_rget_hdr_t; + static inline void mca_pml_ob1_rget_hdr_prepare (mca_pml_ob1_rget_hdr_t *hdr, uint8_t hdr_flags, uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq, uint64_t hdr_msg_length, void *hdr_src_req, void *hdr_frag, @@ -425,6 +488,11 @@ union mca_pml_ob1_hdr_t { mca_pml_ob1_ack_hdr_t hdr_ack; mca_pml_ob1_rdma_hdr_t hdr_rdma; mca_pml_ob1_fin_hdr_t hdr_fin; + /* extended CID support */ + mca_pml_ob1_cid_hdr_t hdr_cid; + mca_pml_ob1_ext_match_hdr_t hdr_ext_match; + mca_pml_ob1_ext_rendezvous_hdr_t hdr_ext_rndv; + mca_pml_ob1_ext_rget_hdr_t hdr_ext_rget; }; typedef union mca_pml_ob1_hdr_t mca_pml_ob1_hdr_t; @@ -457,6 +525,15 @@ ob1_hdr_ntoh(mca_pml_ob1_hdr_t *hdr, const uint8_t hdr_type) case MCA_PML_OB1_HDR_TYPE_FIN: MCA_PML_OB1_FIN_HDR_NTOH(hdr->hdr_fin); break; + case MCA_PML_OB1_HDR_TYPE_CID: + { + mca_pml_ob1_hdr_t *next_hdr = (mca_pml_ob1_hdr_t *) ((uintptr_t) hdr + sizeof (hdr->hdr_cid)); + + MCA_PML_OB1_EXT_MATCH_HDR_NTOH(hdr->hdr_cid); + /* now swap the real header */ + ob1_hdr_ntoh (next_hdr, hext_hdr->hdr_common.hdr_type); + break; + } default: assert(0); break; @@ -503,6 +580,15 @@ ob1_hdr_hton_intr(mca_pml_ob1_hdr_t *hdr, const uint8_t hdr_type, case MCA_PML_OB1_HDR_TYPE_FIN: MCA_PML_OB1_FIN_HDR_HTON(hdr->hdr_fin); break; + case MCA_PML_OB1_HDR_TYPE_CID: + { + mca_pml_ob1_hdr_t *next_hdr = (mca_pml_ob1_hdr_t *) ((uintptr_t) hdr + sizeof (hdr->hdr_cid)); + + MCA_PML_OB1_EXT_MATCH_HDR_HTON(hdr->hdr_cid); + /* now swap the real header */ + ob1_hdr_hton (next_hdr, hext_hdr->hdr_common.hdr_type, proc); + break; + } default: assert(0); break; @@ -516,7 +602,8 @@ ob1_hdr_hton_intr(mca_pml_ob1_hdr_t *hdr, const uint8_t hdr_type, static inline __opal_attribute_always_inline__ void ob1_hdr_copy(mca_pml_ob1_hdr_t *src, mca_pml_ob1_hdr_t *dst) { - switch(src->hdr_common.hdr_type) { + do { + switch(src->hdr_common.hdr_type) { case MCA_PML_OB1_HDR_TYPE_MATCH: memcpy( &(dst->hdr_match), &(src->hdr_match), sizeof(mca_pml_ob1_match_hdr_t) ); break; @@ -538,10 +625,24 @@ ob1_hdr_copy(mca_pml_ob1_hdr_t *src, mca_pml_ob1_hdr_t *dst) case MCA_PML_OB1_HDR_TYPE_FIN: memcpy( &(dst->hdr_fin), &(src->hdr_fin), sizeof(mca_pml_ob1_fin_hdr_t) ); break; + case MCA_PML_OB1_HDR_TYPE_CID: + { + mca_pml_ob1_hdr_t *next_src = (mca_pml_ob1_hdr_t *) ((uintptr_t) src + sizeof (src->hdr_cid)); + mca_pml_ob1_hdr_t *next_dst = (mca_pml_ob1_hdr_t *) ((uintptr_t) dst + sizeof (dst->hdr_cid)); + + memcpy (&dst->hdr_cid, &src->hdr_cid, sizeof (src->hdr_cid)); + /* can't call recusively and expect inlining */ + src = next_src; + dst = next_dst; + continue; + } default: memcpy( &(dst->hdr_common), &(src->hdr_common), sizeof(mca_pml_ob1_common_hdr_t) ); break; - } + } + + break; + } while (1); } #endif /* MCA_PML_OB1_HEADER_H */ diff --git a/ompi/mca/pml/ob1/pml_ob1_iprobe.c b/ompi/mca/pml/ob1/pml_ob1_iprobe.c index 9175a6cc4d5..4d6a0eb8dfd 100644 --- a/ompi/mca/pml/ob1/pml_ob1_iprobe.c +++ b/ompi/mca/pml/ob1/pml_ob1_iprobe.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -40,10 +40,10 @@ int mca_pml_ob1_iprobe(int src, MCA_PML_OB1_RECV_REQUEST_START(&recvreq); if( REQUEST_COMPLETE( &(recvreq.req_recv.req_base.req_ompi)) ) { - if( NULL != status ) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR; + if( MPI_STATUS_IGNORE != status ) { + OMPI_COPY_STATUS(status, recvreq.req_recv.req_base.req_ompi.req_status, false); + } *matched = 1; } else { *matched = 0; @@ -71,8 +71,8 @@ int mca_pml_ob1_probe(int src, ompi_request_wait_completion(&recvreq.req_recv.req_base.req_ompi); rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR; - if (NULL != status) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; + if( MPI_STATUS_IGNORE != status ) { + OMPI_COPY_STATUS(status, recvreq.req_recv.req_base.req_ompi.req_status, false); } MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); @@ -107,8 +107,9 @@ mca_pml_ob1_improbe(int src, MCA_PML_OB1_RECV_REQUEST_START(recvreq); if( REQUEST_COMPLETE( &(recvreq->req_recv.req_base.req_ompi)) ) { - if( NULL != status ) { - *status = recvreq->req_recv.req_base.req_ompi.req_status; + rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; + if( MPI_STATUS_IGNORE != status ) { + OMPI_COPY_STATUS(status, recvreq->req_recv.req_base.req_ompi.req_status, false); } *matched = 1; @@ -116,8 +117,6 @@ mca_pml_ob1_improbe(int src, (*message)->req_ptr = recvreq; (*message)->peer = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; (*message)->count = recvreq->req_recv.req_base.req_ompi.req_status._ucount; - - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; } else { *matched = 0; @@ -162,9 +161,8 @@ mca_pml_ob1_mprobe(int src, ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi); rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - - if( NULL != status ) { - *status = recvreq->req_recv.req_base.req_ompi.req_status; + if( MPI_STATUS_IGNORE != status ) { + OMPI_COPY_STATUS(status, recvreq->req_recv.req_base.req_ompi.req_status, false); } if( OMPI_SUCCESS == rc ) { diff --git a/ompi/mca/pml/ob1/pml_ob1_irecv.c b/ompi/mca/pml/ob1/pml_ob1_irecv.c index 3497074743d..ab16a4776e5 100644 --- a/ompi/mca/pml/ob1/pml_ob1_irecv.c +++ b/ompi/mca/pml/ob1/pml_ob1_irecv.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -145,8 +145,8 @@ int mca_pml_ob1_recv(void *addr, ); } - if (NULL != status) { /* return status */ - *status = recvreq->req_recv.req_base.req_ompi.req_status; + if (MPI_STATUS_IGNORE != status) { + OMPI_COPY_STATUS(status, recvreq->req_recv.req_base.req_ompi.req_status, false); } rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; @@ -362,8 +362,8 @@ mca_pml_ob1_mrecv( void *buf, MCA_PML_OB1_RECV_FRAG_RETURN(frag); - if (NULL != status) { /* return status */ - *status = recvreq->req_recv.req_base.req_ompi.req_status; + if (MPI_STATUS_IGNORE != status) { + OMPI_COPY_STATUS(status, recvreq->req_recv.req_base.req_ompi.req_status, false); } rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; #if OPAL_ENABLE_FT_MPI diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 2b7e7047708..8355a6e9237 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -48,17 +48,14 @@ int mca_pml_ob1_isend_init(const void *buf, ompi_communicator_t * comm, ompi_request_t ** request) { + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst); mca_pml_ob1_send_request_t *sendreq = NULL; MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq); if (NULL == sendreq) return OMPI_ERR_OUT_OF_RESOURCE; - MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, true); + MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, buf, count, datatype, dst, tag, + comm, sendmode, true, ob1_proc); PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, &(sendreq)->req_send.req_base, @@ -78,7 +75,8 @@ int mca_pml_ob1_isend_init(const void *buf, static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, ompi_datatype_t * datatype, int dst, int tag, int16_t seqn, - ompi_proc_t *dst_proc, mca_bml_base_endpoint_t* endpoint, + ompi_proc_t *dst_proc, mca_pml_ob1_comm_proc_t *ob1_proc, + mca_bml_base_endpoint_t* endpoint, ompi_communicator_t * comm) { mca_pml_ob1_match_hdr_t match; @@ -92,7 +90,10 @@ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, return OMPI_ERR_NOT_AVAILABLE; ompi_datatype_type_size (datatype, &size); - if ((size * count) > 256) { /* some random number */ + + /* the size used here was picked based on performance on a Cray XE-6. it should probably + * be provided by the btl module */ + if ((size * count) > 256 || -1 == ob1_proc->comm_index) { return OMPI_ERR_NOT_AVAILABLE; } @@ -111,7 +112,7 @@ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, } mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, - comm->c_contextid, comm->c_my_rank, + ob1_proc->comm_index, comm->c_my_rank, tag, seqn); ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH, dst_proc); @@ -174,7 +175,7 @@ int mca_pml_ob1_isend(const void *buf, } if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { - rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, + rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, ob1_proc, endpoint, comm); if (OPAL_LIKELY(0 <= rc)) { /* NTH: it is legal to return ompi_request_empty since the only valid @@ -194,7 +195,7 @@ int mca_pml_ob1_isend(const void *buf, count, datatype, dst, tag, - comm, sendmode, false); + comm, sendmode, false, ob1_proc); PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, &(sendreq)->req_send.req_base, @@ -206,7 +207,6 @@ int mca_pml_ob1_isend(const void *buf, #if OPAL_ENABLE_FT_MPI alloc_ft_req: -#endif /* OPAL_ENABLE_FT_MPI */ MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq); if (NULL == sendreq) return OMPI_ERR_OUT_OF_RESOURCE; @@ -216,7 +216,7 @@ int mca_pml_ob1_isend(const void *buf, count, datatype, dst, tag, - comm, sendmode, false); + comm, sendmode, false, ob1_proc); PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, &(sendreq)->req_send.req_base, @@ -224,10 +224,16 @@ int mca_pml_ob1_isend(const void *buf, /* No point in starting the request, it won't go through, mark completed * in error for collection in future wait */ - sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR = MPI_ERR_PROC_FAILED; + sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR = ompi_comm_is_revoked(comm)? MPI_ERR_REVOKED: MPI_ERR_PROC_FAILED; MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq, false); + OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "Allocating request in error %p (peer %d, seq %" PRIu64 ") with error code %d", + (void*) sendreq, + dst, + sendreq->req_send.req_base.req_sequence, + sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR)); *request = (ompi_request_t *) sendreq; return OMPI_SUCCESS; +#endif /* OPAL_ENABLE_FT_MPI */ } int mca_pml_ob1_send(const void *buf, @@ -279,7 +285,7 @@ int mca_pml_ob1_send(const void *buf, */ if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, - endpoint, comm); + ob1_proc, endpoint, comm); if (OPAL_LIKELY(0 <= rc)) { return OMPI_SUCCESS; } @@ -299,12 +305,8 @@ int mca_pml_ob1_send(const void *buf, sendreq->req_send.req_base.req_proc = dst_proc; sendreq->rdma_frag = NULL; - MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); + MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, buf, count, datatype, dst, tag, + comm, sendmode, false, ob1_proc); PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, &sendreq->req_send.req_base, diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 3c1e2762e85..ed7ec7d4360 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -20,6 +20,10 @@ * Copyright (c) 2018 Sandia National Laboratories * All rights reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. + * Copyright (c) 2020-2021 Triad National Security, LLC. All rights + * reserved. + * Copyright (c) 2021 Cisco Systems, Inc. All rights reserved + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -404,8 +408,8 @@ int mca_pml_ob1_revoke_comm( struct ompi_communicator_t* ompi_comm, bool coll_on #if OPAL_ENABLE_DEBUG if( opal_list_get_size(&nack_list) ) { OPAL_OUTPUT_VERBOSE((15, ompi_ftmpi_output_handle, - "ob1_revoke_comm: purging unexpected and cantmatch frags for in comm %d (%s): nacking %zu frags", - ompi_comm->c_contextid, coll_only ? "collective frags only" : "all revoked", + "ob1_revoke_comm: purging unexpected and cantmatch frags for in comm %s (%s): nacking %zu frags", + ompi_comm_print_cid(ompi_comm), coll_only ? "collective frags only" : "all revoked", opal_list_get_size(&nack_list))); if( verbose > 15) mca_pml_ob1_dump(ompi_comm, verbose); } @@ -419,7 +423,7 @@ int mca_pml_ob1_revoke_comm( struct ompi_communicator_t* ompi_comm, bool coll_on assert( MCA_PML_OB1_HDR_TYPE_RGET == hdr->hdr_common.hdr_type || MCA_PML_OB1_HDR_TYPE_RNDV == hdr->hdr_common.hdr_type ); OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, - "ob1_revoke_comm: sending NACK to %d", hdr->hdr_rndv.hdr_match.hdr_src)); + "ob1_revoke_comm: sending NACK to %d for seq %d", hdr->hdr_rndv.hdr_match.hdr_src, hdr->hdr_rndv.hdr_match.hdr_seq)); /* Send a ACK with a NULL request to signify revocation */ proc = mca_pml_ob1_peer_lookup(ompi_comm, hdr->hdr_rndv.hdr_match.hdr_src); mca_pml_ob1_recv_request_ack_send(NULL, proc->ompi_proc, hdr->hdr_rndv.hdr_src_req.lval, NULL, 0, 0, false); @@ -428,7 +432,7 @@ int mca_pml_ob1_revoke_comm( struct ompi_communicator_t* ompi_comm, bool coll_on /* if it's a TYPE_MATCH, the sender is not expecting anything * from us. So we are done. */ OPAL_OUTPUT_VERBOSE((15, ompi_ftmpi_output_handle, - "ob1_revoke_comm: dropping silently frag from %d", hdr->hdr_rndv.hdr_match.hdr_src)); + "ob1_revoke_comm: dropping silently frag from %d for seq %d", hdr->hdr_rndv.hdr_match.hdr_src, hdr->hdr_rndv.hdr_match.hdr_seq)); } MCA_PML_OB1_RECV_FRAG_RETURN(frag); } @@ -476,8 +480,8 @@ void mca_pml_ob1_recv_frag_callback_match (mca_btl_base_module_t *btl, * this pending queue will be searched and all matching fragments * moved to the right communicator. */ - append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); + append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending, btl, + hdr, segments, num_segments, NULL ); return; } comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; @@ -681,7 +685,7 @@ void mca_pml_ob1_recv_frag_callback_ack (mca_btl_base_module_t *btl, #if OPAL_ENABLE_FT_MPI /* if the req_recv is NULL, the comm has been revoked at the receiver */ if( OPAL_UNLIKELY(NULL == sendreq->req_recv.pval) ) { - OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "Recvfrag: Received a NACK to the RDV/RGET match to %d on comm %d\n", sendreq->req_send.req_base.req_peer, sendreq->req_send.req_base.req_comm->c_contextid)); + OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "Recvfrag: Received a NACK to the RDV/RGET match to %d for seq %" PRIu64 " on comm %s\n", sendreq->req_send.req_base.req_peer, sendreq->req_send.req_base.req_sequence, ompi_comm_print_cid(sendreq->req_send.req_base.req_comm))); if (NULL != sendreq->rdma_frag) { MCA_PML_OB1_RDMA_FRAG_RETURN(sendreq->rdma_frag); sendreq->rdma_frag = NULL; @@ -1037,8 +1041,8 @@ static int mca_pml_ob1_recv_frag_match (mca_btl_base_module_t *btl, * this pending queue will be searched and all matching fragments * moved to the right communicator. */ - append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); + append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending, btl, + hdr, segments, num_segments, NULL ); return OMPI_SUCCESS; } comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; @@ -1074,7 +1078,7 @@ static int mca_pml_ob1_recv_frag_match (mca_btl_base_module_t *btl, /* Send a ACK with a NULL request to signify revocation */ mca_pml_ob1_rendezvous_hdr_t* hdr_rndv = (mca_pml_ob1_rendezvous_hdr_t*) hdr; mca_pml_ob1_recv_request_ack_send(NULL, proc->ompi_proc, hdr_rndv->hdr_src_req.lval, NULL, 0, 0, false); - OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "Recvfrag: comm %d is revoked or collectives force errors, sending a NACK to the RDV/RGET match from %d\n", hdr->hdr_ctx, hdr->hdr_src)); + OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "Recvfrag: comm %d is revoked or collectives force errors, sending a NACK to the RDV/RGET match from %d for seq %d\n", hdr->hdr_ctx, hdr->hdr_src, hdr->hdr_seq)); } else { OPAL_OUTPUT_VERBOSE((15, ompi_ftmpi_output_handle, @@ -1201,3 +1205,71 @@ mca_pml_ob1_recv_frag_match_proc (mca_btl_base_module_t *btl, return OMPI_SUCCESS; } +void mca_pml_ob1_handle_cid (ompi_communicator_t *comm, int src, mca_pml_ob1_cid_hdr_t *hdr_cid) +{ + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, src); + bool had_comm_index = (-1 != ob1_proc->comm_index); + + if (!had_comm_index) { + /* avoid sending too many extra packets. if this doesn't work well then a flag can be added to + * the proc to indicate that this packet has been sent */ + ob1_proc->comm_index = hdr_cid->hdr_src_comm_index; + + /* + * if the proc to send to is myself, no need to do the send + */ + if(ob1_proc->ompi_proc != ompi_proc_local()) { + (void) mca_pml_ob1_send_cid (ob1_proc->ompi_proc, comm); + } + } +} + +void mca_pml_ob1_recv_frag_callback_cid (mca_btl_base_module_t* btl, + const mca_btl_base_receive_descriptor_t* des) +{ + mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS]; + mca_pml_ob1_hdr_t *hdr = (mca_pml_ob1_hdr_t *) des->des_segments[0].seg_addr.pval; + mca_pml_ob1_match_hdr_t *hdr_match = &hdr->hdr_ext_match.hdr_match; + size_t num_segments = des->des_segment_count; + ompi_communicator_t *comm; + + memcpy (segments, des->des_segments, num_segments * sizeof (segments[0])); + assert (segments->seg_len >= sizeof (hdr->hdr_cid)); + + ob1_hdr_ntoh (hdr, hdr->hdr_common.hdr_type); + + /* NTH: this should be ok as as all BTLs create a dummy segment */ + segments->seg_len -= offsetof (mca_pml_ob1_ext_match_hdr_t, hdr_match); + segments->seg_addr.pval = (void *) hdr_match; + + /* find the communicator with this extended CID */ + comm = ompi_comm_lookup_cid (hdr->hdr_cid.hdr_cid); + if (OPAL_UNLIKELY(NULL == comm)) { + if (segments->seg_len > 0) { + /* This is a special case. A message for a not yet existing + * communicator can happens. Instead of doing a matching we + * will temporarily add it the a pending queue in the PML. + * Later on, when the communicator is completely instantiated, + * this pending queue will be searched and all matching fragments + * moved to the right communicator. + */ + append_frag_to_list (&mca_pml_ob1.non_existing_communicator_pending, + btl, (const mca_pml_ob1_match_hdr_t *)hdr, des->des_segments, + num_segments, NULL); + } + + /* nothing more to do */ + return; + } + + mca_pml_ob1_handle_cid (comm, hdr->hdr_cid.hdr_src, &hdr->hdr_cid); + hdr_match->hdr_ctx = comm->c_index; + + if (segments->seg_len == 0) { + /* just a response */ + return; + } + + mca_pml_ob1_recv_frag_match (btl, hdr_match, segments, des->des_segment_count, + hdr_match->hdr_common.hdr_type); +} diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.h b/ompi/mca/pml/ob1/pml_ob1_recvfrag.h index d058a113612..95f57a66b6e 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.h @@ -159,6 +159,12 @@ extern void mca_pml_ob1_recv_frag_callback_put (mca_btl_base_module_t *btl, extern void mca_pml_ob1_recv_frag_callback_fin (mca_btl_base_module_t *btl, const mca_btl_base_receive_descriptor_t *descriptor); +/** + * Callback from BTL on receipt of an extended CID header + */ +extern void mca_pml_ob1_recv_frag_callback_cid( mca_btl_base_module_t *btl, + const mca_btl_base_receive_descriptor_t* descriptor); + /** * Extract the next fragment from the cant_match ordered list. This fragment * will be the next in sequence. @@ -170,6 +176,8 @@ void append_frag_to_ordered_list(mca_pml_ob1_recv_frag_t** queue, mca_pml_ob1_recv_frag_t* frag, uint16_t seq); +void mca_pml_ob1_handle_cid (ompi_communicator_t *comm, int src, mca_pml_ob1_cid_hdr_t *hdr_cid); + extern void mca_pml_ob1_dump_cant_match(mca_pml_ob1_recv_frag_t* queue); END_C_DECLS diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index cd089c01db3..64910b72c40 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -21,6 +21,8 @@ * Copyright (c) 2018 Sandia National Laboratories * All rights reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -282,6 +284,10 @@ int mca_pml_ob1_recv_request_ack_send_btl( return OMPI_ERR_OUT_OF_RESOURCE; } +/* + * + */ + static int mca_pml_ob1_recv_request_ack( mca_pml_ob1_recv_request_t* recvreq, mca_btl_base_module_t* btl, @@ -1198,8 +1204,8 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, mca_pml_ob1_comm_proc_t **p) #endif { - mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; - mca_pml_ob1_comm_proc_t **procp = comm->procs; + mca_pml_ob1_comm_t *comm = (mca_pml_ob1_comm_t *) req->req_recv.req_base.req_comm->c_pml_comm; + mca_pml_ob1_comm_proc_t **procp = (mca_pml_ob1_comm_proc_t **) comm->procs; #if MCA_PML_OB1_CUSTOM_MATCH mca_pml_ob1_recv_frag_t* frag; @@ -1303,8 +1309,8 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) ompi_communicator_t* comm_ptr = req->req_recv.req_base.req_comm; if( ((ompi_comm_is_revoked(comm_ptr) && !ompi_request_tag_is_ft(req->req_recv.req_base.req_tag) ) || (ompi_comm_coll_revoked(comm_ptr) && ompi_request_tag_is_collective(req->req_recv.req_base.req_tag)))) { - OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "Recvreq: Posting a new recv req peer %d, tag %d on a revoked/coll_revoked communicator %d, discarding it.\n", - req->req_recv.req_base.req_peer, req->req_recv.req_base.req_tag, comm_ptr->c_contextid)); + OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "Recvreq: Posting a new recv req peer %d, tag %d on a revoked/coll_revoked communicator %s, discarding it.\n", + req->req_recv.req_base.req_peer, req->req_recv.req_base.req_tag, ompi_comm_print_cid(comm_ptr))); req->req_recv.req_base.req_ompi.req_status.MPI_ERROR = ompi_comm_is_revoked(comm_ptr)? MPI_ERR_REVOKED: MPI_ERR_PROC_FAILED; recv_request_pml_complete( req ); PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_END, diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index bae8fc10bc9..d1f37a73b20 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -19,6 +19,8 @@ * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -487,18 +489,23 @@ int mca_pml_ob1_send_request_start_buffered( mca_bml_base_btl_t* bml_btl, size_t size) { + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t hdr_size = sizeof (mca_pml_ob1_rendezvous_hdr_t); mca_btl_base_descriptor_t* des; mca_btl_base_segment_t* segment; mca_pml_ob1_hdr_t* hdr; + mca_pml_ob1_rendezvous_hdr_t *hdr_rndv; struct iovec iov; unsigned int iov_count; size_t max_data, req_bytes_delivered; int rc; + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size = sizeof (hdr->hdr_ext_rndv); + } + /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_ob1_rendezvous_hdr_t) + size, + mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, hdr_size + size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL); if( OPAL_UNLIKELY(NULL == des) ) { @@ -507,8 +514,7 @@ int mca_pml_ob1_send_request_start_buffered( segment = des->des_segments; /* pack the data into the BTL supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - sizeof(mca_pml_ob1_rendezvous_hdr_t)); + iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + hdr_size); iov.iov_len = size; iov_count = 1; max_data = size; @@ -523,17 +529,24 @@ int mca_pml_ob1_send_request_start_buffered( /* build rendezvous header */ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; - mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, 0, - sendreq->req_send.req_base.req_comm->c_contextid, + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_rndv = &hdr->hdr_ext_rndv.hdr_rndv; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_rndv = &hdr->hdr_rndv; + } + + mca_pml_ob1_rendezvous_hdr_prepare (hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, 0, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_send.req_bytes_packed, sendreq); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); /* update lengths */ - segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data; + segment->seg_len = hdr_size + max_data; des->des_cbfunc = mca_pml_ob1_rndv_completion; des->des_cbdata = sendreq; @@ -571,7 +584,7 @@ int mca_pml_ob1_send_request_start_buffered( MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq, true); /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); + rc = mca_bml_base_send (bml_btl, des, hdr->hdr_common.hdr_type); if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered); @@ -593,18 +606,22 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size ) { + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t hdr_size = OMPI_PML_OB1_MATCH_HDR_LEN; mca_btl_base_descriptor_t* des = NULL; mca_btl_base_segment_t* segment; mca_pml_ob1_hdr_t* hdr; + mca_pml_ob1_match_hdr_t *hdr_match; struct iovec iov; unsigned int iov_count; size_t max_data = size; int rc; - if(NULL != bml_btl->btl->btl_sendi) { + if(NULL != bml_btl->btl->btl_sendi && !need_ext_match) { mca_pml_ob1_match_hdr_t match; + mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, - sendreq->req_send.req_base.req_comm->c_contextid, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence); @@ -632,9 +649,11 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, } } else { /* allocate descriptor */ - mca_bml_base_alloc( bml_btl, &des, - MCA_BTL_NO_ORDER, - OMPI_PML_OB1_MATCH_HDR_LEN + size, + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size += sizeof (hdr->hdr_cid); + } + + mca_bml_base_alloc (bml_btl, &des, MCA_BTL_NO_ORDER, hdr_size + size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); } if( OPAL_UNLIKELY(NULL == des) ) { @@ -645,8 +664,7 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, if(size > 0) { /* pack the data into the supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - OMPI_PML_OB1_MATCH_HDR_LEN); + iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + hdr_size); iov.iov_len = size; iov_count = 1; /* @@ -672,26 +690,32 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, ); } - /* build match header */ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; - mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, - sendreq->req_send.req_base.req_comm->c_contextid, + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_match = &hdr->hdr_ext_match.hdr_match; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_match = &hdr->hdr_match; + } + + mca_pml_ob1_match_hdr_prepare (hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); /* update lengths */ - segment->seg_len = OMPI_PML_OB1_MATCH_HDR_LEN + max_data; + segment->seg_len = hdr_size + max_data; /* short message */ des->des_cbdata = sendreq; des->des_cbfunc = mca_pml_ob1_match_completion_free; /* send */ - rc = mca_bml_base_send_status(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); + rc = mca_bml_base_send_status(bml_btl, des, hdr->hdr_common.hdr_type); SPC_USER_OR_MPI(sendreq->req_send.req_base.req_ompi.req_status.MPI_TAG, (ompi_spc_value_t)size, OMPI_SPC_BYTES_SENT_USER, OMPI_SPC_BYTES_SENT_MPI); if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { @@ -720,19 +744,23 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size ) { + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t hdr_size = OMPI_PML_OB1_MATCH_HDR_LEN; mca_btl_base_descriptor_t* des; mca_btl_base_segment_t* segment; mca_pml_ob1_hdr_t* hdr; + mca_pml_ob1_match_hdr_t *hdr_match; int rc; + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size += sizeof (hdr->hdr_cid); + } + /* prepare descriptor */ - mca_bml_base_prepare_src( bml_btl, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - OMPI_PML_OB1_MATCH_HDR_LEN, - &size, + mca_bml_base_prepare_src (bml_btl, &sendreq->req_send.req_base.req_convertor, + MCA_BTL_NO_ORDER, hdr_size, &size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - &des ); + &des); if( OPAL_UNLIKELY(NULL == des) ) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -740,20 +768,27 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq, /* build match header */ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; - mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, - sendreq->req_send.req_base.req_comm->c_contextid, + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_match = &hdr->hdr_ext_match.hdr_match; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_match = &hdr->hdr_match; + } + + mca_pml_ob1_match_hdr_prepare (hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); /* short message */ des->des_cbfunc = mca_pml_ob1_match_completion_free; des->des_cbdata = sendreq; /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); + rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type); SPC_USER_OR_MPI(sendreq->req_send.req_base.req_ompi.req_status.MPI_TAG, (ompi_spc_value_t)size, OMPI_SPC_BYTES_SENT_USER, OMPI_SPC_BYTES_SENT_MPI); if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { @@ -782,11 +817,13 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, * one RDMA capable BTLs). This way round robin distribution of RDMA * operation is achieved. */ + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t reg_size, hdr_size = sizeof (mca_pml_ob1_rget_hdr_t); mca_btl_base_registration_handle_t *local_handle; mca_btl_base_descriptor_t *des; mca_pml_ob1_rdma_frag_t *frag; - mca_pml_ob1_rget_hdr_t *hdr; - size_t reg_size; + mca_pml_ob1_hdr_t *hdr; + mca_pml_ob1_rget_hdr_t *hdr_rget; void *data_ptr; int rc; @@ -818,10 +855,15 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, frag->cbfunc = mca_pml_ob1_rget_completion; /* do not store the local handle in the fragment. it will be released by mca_pml_ob1_free_rdma_resources */ + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size = sizeof (hdr->hdr_ext_rget); + } + reg_size = bml_btl->btl->btl_registration_handle_size; + hdr_size += reg_size; /* allocate space for get hdr + segment list */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof (*hdr) + reg_size, + mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, hdr_size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL); if( OPAL_UNLIKELY(NULL == des) ) { @@ -834,17 +876,24 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, sendreq->rdma_frag = frag; /* build match header */ - hdr = (mca_pml_ob1_rget_hdr_t *) des->des_segments->seg_addr.pval; + hdr = (mca_pml_ob1_hdr_t *) des->des_segments->seg_addr.pval; + if (need_ext_match) { + hdr_rget = &hdr->hdr_ext_rget.hdr_rget; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_rget = &hdr->hdr_rget; + } + /* TODO -- Add support for multiple segments for get */ - mca_pml_ob1_rget_hdr_prepare (hdr, MCA_PML_OB1_HDR_FLAGS_CONTIG | MCA_PML_OB1_HDR_FLAGS_PIN, - sendreq->req_send.req_base.req_comm->c_contextid, + mca_pml_ob1_rget_hdr_prepare (hdr_rget, MCA_PML_OB1_HDR_FLAGS_CONTIG | MCA_PML_OB1_HDR_FLAGS_PIN, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_send.req_bytes_packed, sendreq, frag, data_ptr, local_handle, reg_size); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RGET, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); des->des_cbfunc = mca_pml_ob1_send_ctl_completion; des->des_cbdata = sendreq; @@ -860,7 +909,7 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, } /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RGET); + rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type); if (OPAL_UNLIKELY(rc < 0)) { MCA_PML_OB1_RDMA_FRAG_RETURN(frag); sendreq->rdma_frag = NULL; @@ -882,18 +931,22 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, size_t size, int flags ) { + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t hdr_size = sizeof (mca_pml_ob1_rendezvous_hdr_t); mca_btl_base_descriptor_t* des; mca_btl_base_segment_t* segment; mca_pml_ob1_hdr_t* hdr; + mca_pml_ob1_rendezvous_hdr_t *hdr_rndv; int rc; + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size = sizeof (hdr->hdr_ext_rndv); + } + /* prepare descriptor */ if(size == 0) { - mca_bml_base_alloc( bml_btl, - &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_ob1_rendezvous_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ); + mca_bml_base_alloc (bml_btl, &des, MCA_BTL_NO_ORDER, hdr_size, MCA_BTL_DES_FLAGS_PRIORITY | + MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); } else { MEMCHECKER( memchecker_call(&opal_memchecker_base_mem_defined, @@ -901,14 +954,10 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, sendreq->req_send.req_base.req_count, sendreq->req_send.req_base.req_datatype); ); - mca_bml_base_prepare_src( bml_btl, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_ob1_rendezvous_hdr_t), - &size, + mca_bml_base_prepare_src (bml_btl, &sendreq->req_send.req_base.req_convertor, + MCA_BTL_NO_ORDER, hdr_size, &size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_FLAGS_SIGNAL, - &des ); + MCA_BTL_DES_FLAGS_SIGNAL, &des); MEMCHECKER( memchecker_call(&opal_memchecker_base_mem_noaccess, sendreq->req_send.req_base.req_addr, @@ -924,15 +973,23 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, /* build hdr */ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; - mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, flags | + + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_rndv = &hdr->hdr_ext_rndv.hdr_rndv; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_rndv = &hdr->hdr_rndv; + } + + mca_pml_ob1_rendezvous_hdr_prepare (hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, flags | MCA_PML_OB1_HDR_FLAGS_SIGNAL, - sendreq->req_send.req_base.req_comm->c_contextid, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_send.req_bytes_packed, sendreq); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); /* first fragment of a long message */ des->des_cbdata = sendreq; @@ -942,7 +999,7 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, sendreq->req_state = 2; /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); + rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type); if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, size ); diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 80a4ae2f6b4..07e9899fd30 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -46,6 +46,7 @@ typedef enum { struct mca_pml_ob1_send_request_t { mca_pml_base_send_request_t req_send; mca_bml_base_endpoint_t* req_endpoint; + mca_pml_ob1_comm_proc_t *ob1_proc; opal_ptr_t req_recv; opal_atomic_int32_t req_state; opal_atomic_int32_t req_lock; @@ -143,7 +144,8 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type) tag, \ comm, \ sendmode, \ - persistent) \ + persistent, \ + ob1_proc) \ { \ MCA_PML_BASE_SEND_REQUEST_INIT(&(sendreq)->req_send, \ buf, \ @@ -156,11 +158,14 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type) persistent, \ 0); /* convertor_flags */ \ (sendreq)->req_recv.pval = NULL; \ + (sendreq)->ob1_proc = ob1_proc; \ } #define MCA_PML_OB1_SEND_REQUEST_RESET(sendreq) \ MCA_PML_BASE_SEND_REQUEST_RESET(&(sendreq)->req_send) +#define MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq) (-1 == sendreq->ob1_proc->comm_index) + static inline void mca_pml_ob1_free_rdma_resources (mca_pml_ob1_send_request_t* sendreq) { size_t r; diff --git a/ompi/mca/pml/pml.h b/ompi/mca/pml/pml.h index b356d224a22..6614cf4d5f6 100644 --- a/ompi/mca/pml/pml.h +++ b/ompi/mca/pml/pml.h @@ -489,7 +489,12 @@ typedef int (*mca_pml_base_module_dump_fn_t)( */ /** PML requires requires all procs in the job on the first call to * add_procs */ -#define MCA_PML_BASE_FLAG_REQUIRE_WORLD 0x00000001 +#define MCA_PML_BASE_FLAG_REQUIRE_WORLD 0x00000001 + +/** + * PML supports the extended CID space (doesn't need a global communicator index) + */ +#define MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID 0x00000002 /** * PML instance. @@ -560,5 +565,10 @@ static inline bool mca_pml_base_requires_world (void) return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_REQUIRE_WORLD); } +static inline bool mca_pml_base_supports_extended_cid (void) +{ + return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID); +} + END_C_DECLS #endif /* MCA_PML_H */ diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 6bee4dea29f..8f237a1c3f6 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2011 Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. - * Copyright (c) 2016-2020 The University of Tennessee and The University + * Copyright (c) 2016-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2018-2019 Research Organization for Information Science @@ -35,18 +35,18 @@ PML_UCX_VERBOSE(8, _msg " buf %p count %zu type '%s' dst %d tag %d mode %s comm %d '%s'", \ __VA_ARGS__, \ (_buf), (_count), (_datatype)->name, (_dst), (_tag), \ - mca_pml_ucx_send_mode_name(_mode), (_comm)->c_contextid, \ + mca_pml_ucx_send_mode_name(_mode), (_comm)->c_index, \ (_comm)->c_name); #define PML_UCX_TRACE_RECV(_msg, _buf, _count, _datatype, _src, _tag, _comm, ...) \ PML_UCX_VERBOSE(8, _msg " buf %p count %zu type '%s' src %d tag %d comm %d '%s'", \ __VA_ARGS__, \ (_buf), (_count), (_datatype)->name, (_src), (_tag), \ - (_comm)->c_contextid, (_comm)->c_name); + (_comm)->c_index, (_comm)->c_name); #define PML_UCX_TRACE_PROBE(_msg, _src, _tag, _comm) \ PML_UCX_VERBOSE(8, _msg " src %d tag %d comm %d '%s'", \ - _src, (_tag), (_comm)->c_contextid, (_comm)->c_name); + _src, (_tag), (_comm)->c_index, (_comm)->c_name); #define PML_UCX_TRACE_MRECV(_msg, _buf, _count, _datatype, _message) \ PML_UCX_VERBOSE(8, _msg " buf %p count %zu type '%s' msg *%p=%p (%p)", \ @@ -550,7 +550,9 @@ int mca_pml_ucx_irecv_init(void *buf, size_t count, ompi_datatype_t *datatype, req->flags = 0; req->buffer = buf; req->count = count; - req->datatype.datatype = mca_pml_ucx_get_datatype(datatype); + req->ompi_datatype = datatype; + req->datatype = mca_pml_ucx_get_datatype(datatype); + OMPI_DATATYPE_RETAIN(datatype); PML_UCX_MAKE_RECV_TAG(req->tag, req->recv.tag_mask, tag, src, comm); @@ -564,7 +566,7 @@ int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype, { #if HAVE_DECL_UCP_TAG_RECV_NBX pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype); - ucp_request_param_t *param = &op_data->op_param.recv; + ucp_request_param_t *param = &op_data->op_param.irecv; #endif ucp_tag_t ucp_tag, ucp_tag_mask; @@ -631,7 +633,7 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src MCA_COMMON_UCX_PROGRESS_LOOP(ompi_pml_ucx.ucp_worker) { status = ucp_request_test(req, &info); if (status != UCS_INPROGRESS) { - result = mca_pml_ucx_set_recv_status_safe(mpi_status, status, &info); + result = mca_pml_ucx_set_recv_status_public(mpi_status, status, &info); #if SPC_ENABLE == 1 size_t dt_size; @@ -694,12 +696,13 @@ int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datat req->tag = PML_UCX_MAKE_SEND_TAG(tag, comm); req->send.mode = mode; req->send.ep = ep; + req->ompi_datatype = datatype; + OMPI_DATATYPE_RETAIN(datatype); if (MCA_PML_BASE_SEND_BUFFERED == mode) { - req->datatype.ompi_datatype = datatype; - OBJ_RETAIN(datatype); + req->datatype = NULL; } else { - req->datatype.datatype = mca_pml_ucx_get_datatype(datatype); + req->datatype = mca_pml_ucx_get_datatype(datatype); } *request = &req->ompi; @@ -831,7 +834,7 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, #if HAVE_DECL_UCP_TAG_SEND_NBX req = (ompi_request_t*)mca_pml_ucx_common_send_nbx(ep, buf, count, datatype, PML_UCX_MAKE_SEND_TAG(tag, comm), mode, - &mca_pml_ucx_get_op_data(datatype)->op_param.send); + &mca_pml_ucx_get_op_data(datatype)->op_param.isend); #else req = (ompi_request_t*)mca_pml_ucx_common_send(ep, buf, count, datatype, mca_pml_ucx_get_datatype(datatype), @@ -864,19 +867,19 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, static inline __opal_attribute_always_inline__ int mca_pml_ucx_send_nb(ucp_ep_h ep, const void *buf, size_t count, ompi_datatype_t *datatype, ucp_datatype_t ucx_datatype, - ucp_tag_t tag, mca_pml_base_send_mode_t mode, - ucp_send_callback_t cb) + ucp_tag_t tag, mca_pml_base_send_mode_t mode) { ompi_request_t *req; req = (ompi_request_t*)mca_pml_ucx_common_send(ep, buf, count, datatype, mca_pml_ucx_get_datatype(datatype), - tag, mode, cb); + tag, mode, + mca_pml_ucx_send_completion_empty); if (OPAL_LIKELY(req == NULL)) { return OMPI_SUCCESS; } else if (!UCS_PTR_IS_ERR(req)) { PML_UCX_VERBOSE(8, "got request %p", (void*)req); - MCA_COMMON_UCX_WAIT_LOOP(req, ompi_pml_ucx.ucp_worker, "ucx send", ompi_request_free(&req)); + MCA_COMMON_UCX_WAIT_LOOP(req, ompi_pml_ucx.ucp_worker, "ucx send", ucp_request_free(req)); } else { PML_UCX_ERROR("ucx send failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); return OMPI_ERROR; @@ -954,8 +957,7 @@ int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, i return mca_pml_ucx_send_nb(ep, buf, count, datatype, mca_pml_ucx_get_datatype(datatype), - PML_UCX_MAKE_SEND_TAG(tag, comm), mode, - mca_pml_ucx_send_completion); + PML_UCX_MAKE_SEND_TAG(tag, comm), mode); } int mca_pml_ucx_iprobe(int src, int tag, struct ompi_communicator_t* comm, @@ -974,7 +976,7 @@ int mca_pml_ucx_iprobe(int src, int tag, struct ompi_communicator_t* comm, 0, &info); if (ucp_msg != NULL) { *matched = 1; - mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); + mca_pml_ucx_set_recv_status_public(mpi_status, UCS_OK, &info); } else { (++progress_count % opal_common_ucx.progress_iterations) ? (void)ucp_worker_progress(ompi_pml_ucx.ucp_worker) : opal_progress(); @@ -998,7 +1000,7 @@ int mca_pml_ucx_probe(int src, int tag, struct ompi_communicator_t* comm, ucp_msg = ucp_tag_probe_nb(ompi_pml_ucx.ucp_worker, ucp_tag, ucp_tag_mask, 0, &info); if (ucp_msg != NULL) { - mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); + mca_pml_ucx_set_recv_status_public(mpi_status, UCS_OK, &info); return OMPI_SUCCESS; } } @@ -1023,7 +1025,7 @@ int mca_pml_ucx_improbe(int src, int tag, struct ompi_communicator_t* comm, PML_UCX_MESSAGE_NEW(comm, ucp_msg, &info, message); PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, (void*)ucp_msg); *matched = 1; - mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); + mca_pml_ucx_set_recv_status_public(mpi_status, UCS_OK, &info); } else { (++progress_count % opal_common_ucx.progress_iterations) ? (void)ucp_worker_progress(ompi_pml_ucx.ucp_worker) : opal_progress(); @@ -1049,7 +1051,7 @@ int mca_pml_ucx_mprobe(int src, int tag, struct ompi_communicator_t* comm, if (ucp_msg != NULL) { PML_UCX_MESSAGE_NEW(comm, ucp_msg, &info, message); PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, (void*)ucp_msg); - mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); + mca_pml_ucx_set_recv_status_public(mpi_status, UCS_OK, &info); return OMPI_SUCCESS; } } @@ -1122,8 +1124,8 @@ int mca_pml_ucx_start(size_t count, ompi_request_t** requests) tmp_req = (ompi_request_t*)mca_pml_ucx_common_send(preq->send.ep, preq->buffer, preq->count, - preq->datatype.ompi_datatype, - preq->datatype.datatype, + preq->ompi_datatype, + preq->datatype, preq->tag, preq->send.mode, mca_pml_ucx_psend_completion); @@ -1131,7 +1133,7 @@ int mca_pml_ucx_start(size_t count, ompi_request_t** requests) PML_UCX_VERBOSE(8, "start recv request %p", (void*)preq); tmp_req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, preq->buffer, preq->count, - preq->datatype.datatype, + preq->datatype, preq->tag, preq->recv.tag_mask, mca_pml_ucx_precv_completion); diff --git a/ompi/mca/pml/ucx/pml_ucx.h b/ompi/mca/pml/ucx/pml_ucx.h index 7ad9f646d1d..f452d071d75 100644 --- a/ompi/mca/pml/ucx/pml_ucx.h +++ b/ompi/mca/pml/ucx/pml_ucx.h @@ -59,6 +59,7 @@ struct mca_pml_ucx_module { int priority; bool cuda_initialized; bool request_leak_check; + uint32_t op_attr_nonblocking; }; extern mca_pml_base_component_2_1_0_t mca_pml_ucx_component; diff --git a/ompi/mca/pml/ucx/pml_ucx_component.c b/ompi/mca/pml/ucx/pml_ucx_component.c index 0ffe641ca80..93ecc3888b3 100644 --- a/ompi/mca/pml/ucx/pml_ucx_component.c +++ b/ompi/mca/pml/ucx/pml_ucx_component.c @@ -49,6 +49,8 @@ mca_pml_base_component_2_1_0_t mca_pml_ucx_component = { static int mca_pml_ucx_component_register(void) { + int multi_send_op_attr_enable; + ompi_pml_ucx.priority = 51; (void) mca_base_component_var_register(&mca_pml_ucx_component.pmlm_version, "priority", "Priority of the UCX component", @@ -79,6 +81,20 @@ static int mca_pml_ucx_component_register(void) ompi_pml_ucx.request_leak_check = true; #endif + ompi_pml_ucx.op_attr_nonblocking = 0; +#if HAVE_DECL_UCP_OP_ATTR_FLAG_MULTI_SEND + multi_send_op_attr_enable = 0; + (void) mca_base_component_var_register(&mca_pml_ucx_component.pmlm_version, "multi_send_nb", + "Enable passing multi-send optimization flag for nonblocking operations", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &multi_send_op_attr_enable); + if (multi_send_op_attr_enable) { + ompi_pml_ucx.op_attr_nonblocking = UCP_OP_ATTR_FLAG_MULTI_SEND; + } +#endif + opal_common_ucx_mca_var_register(&mca_pml_ucx_component.pmlm_version); return 0; } diff --git a/ompi/mca/pml/ucx/pml_ucx_datatype.c b/ompi/mca/pml/ucx/pml_ucx_datatype.c index 5a4dfe80037..31694bf8653 100644 --- a/ompi/mca/pml/ucx/pml_ucx_datatype.c +++ b/ompi/mca/pml/ucx/pml_ucx_datatype.c @@ -24,7 +24,6 @@ #ifdef HAVE_UCP_REQUEST_PARAM_T #define PML_UCX_DATATYPE_SET_VALUE(_datatype, _val) \ (_datatype)->op_param.send._val; \ - (_datatype)->op_param.bsend._val; \ (_datatype)->op_param.recv._val; #endif @@ -190,8 +189,6 @@ pml_ucx_datatype_t *mca_pml_ucx_init_nbx_datatype(ompi_datatype_t *datatype, pml_datatype->datatype = ucp_datatype; pml_datatype->op_param.send.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; pml_datatype->op_param.send.cb.send = mca_pml_ucx_send_nbx_completion; - pml_datatype->op_param.bsend.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; - pml_datatype->op_param.bsend.cb.send = mca_pml_ucx_bsend_nbx_completion; pml_datatype->op_param.recv.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK | UCP_OP_ATTR_FLAG_NO_IMM_CMPL; pml_datatype->op_param.recv.cb.recv = mca_pml_ucx_recv_nbx_completion; @@ -206,6 +203,11 @@ pml_ucx_datatype_t *mca_pml_ucx_init_nbx_datatype(ompi_datatype_t *datatype, PML_UCX_DATATYPE_SET_VALUE(pml_datatype, datatype = ucp_datatype); } + pml_datatype->op_param.isend = pml_datatype->op_param.send; + pml_datatype->op_param.irecv = pml_datatype->op_param.recv; + pml_datatype->op_param.isend.op_attr_mask |= ompi_pml_ucx.op_attr_nonblocking; + pml_datatype->op_param.irecv.op_attr_mask |= ompi_pml_ucx.op_attr_nonblocking; + return pml_datatype; } #endif diff --git a/ompi/mca/pml/ucx/pml_ucx_datatype.h b/ompi/mca/pml/ucx/pml_ucx_datatype.h index 921aab0d399..8e1fbbad006 100644 --- a/ompi/mca/pml/ucx/pml_ucx_datatype.h +++ b/ompi/mca/pml/ucx/pml_ucx_datatype.h @@ -21,8 +21,9 @@ typedef struct { int size_shift; struct { ucp_request_param_t send; - ucp_request_param_t bsend; + ucp_request_param_t isend; ucp_request_param_t recv; + ucp_request_param_t irecv; } op_param; } pml_ucx_datatype_t; #endif diff --git a/ompi/mca/pml/ucx/pml_ucx_request.c b/ompi/mca/pml/ucx/pml_ucx_request.c index 744d5803587..51946db832c 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.c +++ b/ompi/mca/pml/ucx/pml_ucx_request.c @@ -85,6 +85,11 @@ void mca_pml_ucx_send_completion(void *request, ucs_status_t status) mca_pml_ucx_send_completion_internal(request, status); } +void mca_pml_ucx_send_completion_empty(void *request, ucs_status_t status) +{ + /* empty */ +} + void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status) { mca_pml_ucx_bsend_completion_internal(request, status); @@ -216,10 +221,7 @@ static int mca_pml_ucx_persistent_request_free(ompi_request_t **rptr) mca_pml_ucx_persistent_request_detach(preq, tmp_req); ucp_request_free(tmp_req); } - if ((preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND) && - (MCA_PML_BASE_SEND_BUFFERED == preq->send.mode)) { - OBJ_RELEASE(preq->datatype.ompi_datatype); - } + OMPI_DATATYPE_RELEASE(preq->ompi_datatype); PML_UCX_FREELIST_RETURN(&ompi_pml_ucx.persistent_reqs, &preq->ompi.super); *rptr = MPI_REQUEST_NULL; return OMPI_SUCCESS; diff --git a/ompi/mca/pml/ucx/pml_ucx_request.h b/ompi/mca/pml/ucx/pml_ucx_request.h index bfa3d6c858c..d8fe6144a69 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.h +++ b/ompi/mca/pml/ucx/pml_ucx_request.h @@ -1,6 +1,6 @@ /* * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. - * Copyright (c) 2016 The University of Tennessee and The University + * Copyright (c) 2016-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ @@ -42,7 +42,7 @@ enum { #define PML_UCX_MAKE_SEND_TAG(_tag, _comm) \ ((((uint64_t) (_tag) ) << (PML_UCX_RANK_BITS + PML_UCX_CONTEXT_BITS)) | \ (((uint64_t)(_comm)->c_my_rank ) << PML_UCX_CONTEXT_BITS) | \ - ((uint64_t)(_comm)->c_contextid)) + ((uint64_t)(_comm)->c_index)) #define PML_UCX_MAKE_RECV_TAG(_ucp_tag, _ucp_tag_mask, _tag, _src, _comm) \ @@ -54,7 +54,7 @@ enum { } \ \ _ucp_tag = (((uint64_t)(_src) & UCS_MASK(PML_UCX_RANK_BITS)) << PML_UCX_CONTEXT_BITS) | \ - (_comm)->c_contextid; \ + (_comm)->c_index; \ \ if ((_tag) != MPI_ANY_TAG) { \ _ucp_tag_mask |= PML_UCX_TAG_MASK; \ @@ -99,10 +99,8 @@ struct pml_ucx_persistent_request { unsigned flags; void *buffer; size_t count; - union { - ucp_datatype_t datatype; - ompi_datatype_t *ompi_datatype; - } datatype; + ucp_datatype_t datatype; + ompi_datatype_t *ompi_datatype; ucp_tag_t tag; struct { mca_pml_base_send_mode_t mode; @@ -119,6 +117,8 @@ void mca_pml_ucx_send_completion(void *request, ucs_status_t status); void mca_pml_ucx_recv_completion(void *request, ucs_status_t status, ucp_tag_recv_info_t *info); +void mca_pml_ucx_send_completion_empty(void *request, ucs_status_t status); + void mca_pml_ucx_psend_completion(void *request, ucs_status_t status); void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status); @@ -151,6 +151,12 @@ static inline void mca_pml_ucx_request_reset(ompi_request_t *req) req->req_complete = REQUEST_PENDING; } +/* Use when setting a request's status field. + * Note that a new function 'mca_mpl_ucx_set_send_status_public' shall + * be created and used instead if updating a publicly visible status becomes + * necessary (i.e., the status argument in an user-visible procedure), see the + * recv_status case below for rationale. + */ __opal_attribute_always_inline__ static inline void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status, ucs_status_t status) @@ -165,6 +171,11 @@ static inline void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status, } } +/* Use when setting a request's status field. + * Note that the next function 'mca_mpl_ucx_set_recv_status_public' shall + * be used instead when updating a publicly visible status (i.e., the + * status argument in an user-visible procedure). + */ static inline int mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status, ucs_status_t ucp_status, const ucp_tag_recv_info_t *info) @@ -180,6 +191,10 @@ static inline int mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status, mpi_status->_ucount = info->length; } else if (ucp_status == UCS_ERR_MESSAGE_TRUNCATED) { mpi_status->MPI_ERROR = MPI_ERR_TRUNCATE; + mpi_status->MPI_SOURCE = PML_UCX_TAG_GET_SOURCE(tag); + mpi_status->MPI_TAG = PML_UCX_TAG_GET_MPI_TAG(tag); + mpi_status->_cancelled = false; + mpi_status->_ucount = info->length; } else if (ucp_status == UCS_ERR_CANCELED) { mpi_status->MPI_ERROR = MPI_SUCCESS; mpi_status->_cancelled = true; @@ -190,16 +205,41 @@ static inline int mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status, return mpi_status->MPI_ERROR; } -static inline int mca_pml_ucx_set_recv_status_safe(ompi_status_public_t* mpi_status, +/* Use when setting a publicly visible status (i.e., the status argument in an + * user-visible procedure). + * Except in procedures that return MPI_ERR_IN_STATUS, the MPI_ERROR + * field of a status object shall never be modified + * See MPI-1.1 doc, sec 3.2.5, p.22 + */ +static inline int mca_pml_ucx_set_recv_status_public(ompi_status_public_t* mpi_status, ucs_status_t ucp_status, const ucp_tag_recv_info_t *info) { if (mpi_status != MPI_STATUS_IGNORE) { - return mca_pml_ucx_set_recv_status(mpi_status, ucp_status, info); - } else if (OPAL_LIKELY(ucp_status == UCS_OK) || (ucp_status == UCS_ERR_CANCELED)) { - return UCS_OK; + if (OPAL_LIKELY(ucp_status == UCS_OK)) { + uint64_t tag = info->sender_tag; + mpi_status->MPI_SOURCE = PML_UCX_TAG_GET_SOURCE(tag); + mpi_status->MPI_TAG = PML_UCX_TAG_GET_MPI_TAG(tag); + mpi_status->_cancelled = false; + mpi_status->_ucount = info->length; + return MPI_SUCCESS; + } else if (ucp_status == UCS_ERR_MESSAGE_TRUNCATED) { + uint64_t tag = info->sender_tag; + mpi_status->MPI_SOURCE = PML_UCX_TAG_GET_SOURCE(tag); + mpi_status->MPI_TAG = PML_UCX_TAG_GET_MPI_TAG(tag); + mpi_status->_cancelled = false; + mpi_status->_ucount = info->length; + return MPI_ERR_TRUNCATE; + } else if (ucp_status == UCS_ERR_CANCELED) { + mpi_status->_cancelled = true; + return MPI_SUCCESS; + } else { + return MPI_ERR_INTERN; + } } else if (ucp_status == UCS_ERR_MESSAGE_TRUNCATED) { return MPI_ERR_TRUNCATE; + } else if (OPAL_LIKELY(ucp_status == UCS_OK) || (ucp_status == UCS_ERR_CANCELED)) { + return MPI_SUCCESS; } return MPI_ERR_INTERN; diff --git a/ompi/mca/sharedfp/base/Makefile.am b/ompi/mca/sharedfp/base/Makefile.am index 419d0de87e1..017e4efd332 100644 --- a/ompi/mca/sharedfp/base/Makefile.am +++ b/ompi/mca/sharedfp/base/Makefile.am @@ -21,8 +21,12 @@ headers += \ base/base.h +libmca_sharedfp_la_SOURCES += \ + base/sharedfp_base_frame.c \ + base/sharedfp_base_find_available.c + +if OMPI_OMPIO_SUPPORT libmca_sharedfp_la_SOURCES += \ base/sharedfp_base_file_select.c \ - base/sharedfp_base_file_unselect.c \ - base/sharedfp_base_find_available.c \ - base/sharedfp_base_frame.c + base/sharedfp_base_file_unselect.c +endif diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm.c b/ompi/mca/sharedfp/sm/sharedfp_sm.c index 498c02d716a..a4779fd1b63 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm.c @@ -9,9 +9,10 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2013 University of Houston. All rights reserved. + * Copyright (c) 2008-2021 University of Houston. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,10 +28,13 @@ #include "ompi_config.h" #include "mpi.h" +#include "opal/util/printf.h" #include "ompi/mca/sharedfp/sharedfp.h" #include "ompi/mca/sharedfp/base/base.h" #include "ompi/mca/sharedfp/sm/sharedfp_sm.h" +#include "opal/util/basename.h" + /* * ******************************************************************* * ************************ actions structure ************************ @@ -88,12 +92,36 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_sm_component_file_query(o proc = ompi_group_peer_lookup(group,i); if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)){ opal_output(ompi_sharedfp_base_framework.framework_output, - "mca_sharedfp_sm_component_file_query: Disqualifying myself: (%d/%s) " + "mca_sharedfp_sm_component_file_query: Disqualifying myself: (%s/%s) " "not all processes are on the same node.", - comm->c_contextid, comm->c_name); + ompi_comm_print_cid (comm), comm->c_name); return NULL; } } + + + /* Check that we can actually open the required file */ + char *filename_basename = opal_basename((char*)fh->f_filename); + char *sm_filename; + int comm_cid = -1; + int pid = ompi_comm_rank (comm); + + opal_asprintf(&sm_filename, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir, + filename_basename, comm_cid, pid); + free(filename_basename); + + int sm_fd = open(sm_filename, O_RDWR | O_CREAT, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if ( sm_fd == -1){ + /*error opening file*/ + opal_output(0,"mca_sharedfp_sm_component_file_query: Error, unable to open file for mmap: %s\n",sm_filename); + free(sm_filename); + return NULL; + } + close (sm_fd); + unlink(sm_filename); + free (sm_filename); + /* This module can run */ *priority = mca_sharedfp_sm_priority; return &sm; diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index 42cc532b4e7..edc453a7add 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -9,11 +9,11 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2018 University of Houston. All rights reserved. + * Copyright (c) 2013-2021 University of Houston. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2021 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -35,12 +35,16 @@ #include "sharedfp_sm.h" #include "mpi.h" +#include "opal/util/printf.h" +#include "opal/util/output.h" #include "ompi/constants.h" #include "ompi/group/group.h" #include "ompi/proc/proc.h" #include "ompi/mca/sharedfp/sharedfp.h" #include "ompi/mca/sharedfp/base/base.h" +#include "opal/util/basename.h" + #include #include #include @@ -57,11 +61,9 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, struct mca_sharedfp_sm_data * sm_data = NULL; char * filename_basename; char * sm_filename; - int sm_filename_length; struct mca_sharedfp_sm_offset * sm_offset_ptr; struct mca_sharedfp_sm_offset sm_offset; int sm_fd; - uint32_t comm_cid; int int_pid; pid_t my_pid; @@ -101,18 +103,9 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, ** and then mapping it to memory ** For sharedfp we also want to put the file backed shared memory into the tmp directory */ - filename_basename = basename((char*)filename); - /* format is "%s/%s_cid-%d-%d.sm", see below */ - sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4; - sm_filename = (char*) malloc( sizeof(char) * sm_filename_length); - if (NULL == sm_filename) { - opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to malloc sm_filename\n"); - free(sm_data); - free(sh); - return OMPI_ERR_OUT_OF_RESOURCE; - } + filename_basename = opal_basename((char*)filename); + /* format is "%s/%s_cid-%s-%d.sm", see below */ - comm_cid = ompi_comm_get_cid(comm); if ( 0 == fh->f_rank ) { my_pid = getpid(); int_pid = (int) my_pid; @@ -120,20 +113,21 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, err = comm->c_coll->coll_bcast (&int_pid, 1, MPI_INT, 0, comm, comm->c_coll->coll_bcast_module ); if ( OMPI_SUCCESS != err ) { opal_output(0,"mca_sharedfp_sm_file_open: Error in bcast operation \n"); - free(sm_filename); + free(filename_basename); free(sm_data); free(sh); return err; } - snprintf(sm_filename, sm_filename_length, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir, - filename_basename, comm_cid, int_pid); + opal_asprintf(&sm_filename, "%s/%s_cid-%s-%d.sm", ompi_process_info.job_session_dir, + filename_basename, ompi_comm_print_cid(comm), int_pid); /* open shared memory file, initialize to 0, map into memory */ sm_fd = open(sm_filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if ( sm_fd == -1){ /*error opening file*/ opal_output(0,"mca_sharedfp_sm_file_open: Error, unable to open file for mmap: %s\n",sm_filename); + free(filename_basename); free(sm_filename); free(sm_data); free(sh); @@ -145,11 +139,20 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, /* TODO: is it necessary to write to the file first? */ if( 0 == fh->f_rank ){ memset ( &sm_offset, 0, sizeof (struct mca_sharedfp_sm_offset )); - write ( sm_fd, &sm_offset, sizeof(struct mca_sharedfp_sm_offset)); + err = opal_best_effort_write ( sm_fd, &sm_offset, sizeof(struct mca_sharedfp_sm_offset)); + if (OPAL_SUCCESS != err) { + free(filename_basename); + free(sm_filename); + free(sm_data); + free(sh); + close (sm_fd); + return err; + } } err = comm->c_coll->coll_barrier (comm, comm->c_coll->coll_barrier_module ); if ( OMPI_SUCCESS != err ) { opal_output(0,"mca_sharedfp_sm_file_open: Error in barrier operation \n"); + free(filename_basename); free(sm_filename); free(sm_data); free(sh); @@ -167,6 +170,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, err = OMPI_ERROR; opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to mmap file: %s\n",sm_filename); opal_output(0, "%s\n", strerror(errno)); + free(filename_basename); free(sm_filename); free(sm_data); free(sh); @@ -185,6 +189,10 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, sm_data->sem_name = (char*) malloc( sizeof(char) * 253); snprintf(sm_data->sem_name,252,"OMPIO_%s",filename_basename); #endif + // We're now done with filename_basename. Free it here so that we + // don't have to keep freeing it in the error/return cases. + free(filename_basename); + filename_basename = NULL; if( (sm_data->mutex = sem_open(sm_data->sem_name, O_CREAT, 0644, 1)) != SEM_FAILED ) { #elif defined(HAVE_SEM_INIT) diff --git a/ompi/mca/topo/base/base.h b/ompi/mca/topo/base/base.h index 9ab1a4b927a..f04a1cbff9f 100644 --- a/ompi/mca/topo/base/base.h +++ b/ompi/mca/topo/base/base.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -16,6 +17,8 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,6 +59,13 @@ mca_topo_base_comm_select(const ompi_communicator_t* comm, mca_topo_base_module_t** selected_module, uint32_t type); +/* Select a topo module for a particular type of topology */ +OMPI_DECLSPEC int +mca_topo_base_group_select (const ompi_group_t *group, + mca_topo_base_module_t *preferred_module, + mca_topo_base_module_t **selected_module, + uint32_t type); + /* Find all components that want to be considered in this job */ OMPI_DECLSPEC int mca_topo_base_find_available(bool enable_progress_threads, diff --git a/ompi/mca/topo/base/topo_base_cart_create.c b/ompi/mca/topo/base/topo_base_cart_create.c index e751a909f3f..9da23056c70 100644 --- a/ompi/mca/topo/base/topo_base_cart_create.c +++ b/ompi/mca/topo/base/topo_base_cart_create.c @@ -16,6 +16,8 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,78 @@ #include "ompi/mca/topo/base/base.h" #include "ompi/mca/topo/topo.h" +static int mca_topo_base_cart_allocate (ompi_group_t *group, int ndims, const int *dims, const int *periods, + int *my_rank, int *num_procs, mca_topo_base_comm_cart_2_2_0_t **cart_out) +{ + mca_topo_base_comm_cart_2_2_0_t *cart = OBJ_NEW(mca_topo_base_comm_cart_2_2_0_t); + int nprocs = 1; + + *num_procs = group->grp_proc_count; + *my_rank = group->grp_my_rank; + + /* Calculate the number of processes in this grid */ + for (int i = 0 ; i < ndims ; ++i) { + if (dims[i] <= 0) { + return OMPI_ERROR; + } + nprocs *= dims[i]; + } + + /* check for the error condition */ + if (OPAL_UNLIKELY(*num_procs < nprocs)) { + return MPI_ERR_DIMS; + } + + /* check if we have to trim the list of processes */ + if (nprocs < *num_procs) { + *num_procs = nprocs; + } + + if (*my_rank > (nprocs - 1)) { + *my_rank = MPI_UNDEFINED; + } + + if (MPI_UNDEFINED == *my_rank) { + /* nothing more to do */ + *cart_out = NULL; + return OMPI_SUCCESS; + } + + if (OPAL_UNLIKELY(NULL == cart)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + cart->ndims = ndims; + + /* MPI-2.1 allows 0-dimension cartesian communicators, so prevent + a 0-byte malloc -- leave dims as NULL */ + if (0 == ndims) { + *cart_out = cart; + return OMPI_SUCCESS; + } + + cart->dims = (int *) malloc (sizeof (int) * ndims); + cart->periods = (int *) malloc (sizeof (int) * ndims); + cart->coords = (int *) malloc (sizeof (int) * ndims); + if (OPAL_UNLIKELY(NULL == cart->dims || NULL == cart->periods || NULL == cart->coords)) { + OBJ_RELEASE(cart); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* Cartesian communicator; copy the right data to the common information */ + memcpy(cart->dims, dims, ndims * sizeof(int)); + memcpy(cart->periods, periods, ndims * sizeof(int)); + + nprocs = *num_procs; + for (int i = 0, rank = *my_rank ; i < ndims ; ++i) { + nprocs /= cart->dims[i]; + cart->coords[i] = rank / nprocs; + rank %= nprocs; + } + + *cart_out = cart; + return OMPI_SUCCESS; +} + /* * function - makes a new communicator to which topology information * has been attached @@ -55,135 +129,50 @@ int mca_topo_base_cart_create(mca_topo_base_module_t *topo, bool reorder, ompi_communicator_t** comm_topo) { - int nprocs = 1, i, new_rank, num_procs, ret; + int new_rank, num_procs, ret; ompi_communicator_t *new_comm; - ompi_proc_t **topo_procs = NULL; mca_topo_base_comm_cart_2_2_0_t* cart; + ompi_group_t *c_local_group; - num_procs = old_comm->c_local_group->grp_proc_count; - new_rank = old_comm->c_local_group->grp_my_rank; assert(topo->type == OMPI_COMM_CART); - /* Calculate the number of processes in this grid */ - for (i = 0; i < ndims; ++i) { - if(dims[i] <= 0) { - return OMPI_ERROR; - } - nprocs *= dims[i]; - } - - /* check for the error condition */ - if (num_procs < nprocs) { - return MPI_ERR_DIMS; - } - - /* check if we have to trim the list of processes */ - if (nprocs < num_procs) { - num_procs = nprocs; - } - - if (new_rank > (nprocs-1)) { - ndims = 0; - new_rank = MPI_UNDEFINED; - num_procs = 0; + ret = mca_topo_base_cart_allocate (old_comm->c_local_group, ndims, dims, periods, + &new_rank, &num_procs, &cart); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; } - cart = OBJ_NEW(mca_topo_base_comm_cart_2_2_0_t); - if( NULL == cart ) { + /* Copy the proc structure from the previous communicator over to + the new one. The topology module is then able to work on this + copy and rearrange it as it deems fit. NTH: seems odd that this + function has always clipped the group size here. It might be + worthwhile to clip the group in the module (if reordering) */ + c_local_group = ompi_group_flatten (old_comm->c_local_group, num_procs); + if (OPAL_UNLIKELY(NULL == c_local_group)) { + OBJ_RELEASE(cart); return OMPI_ERR_OUT_OF_RESOURCE; } - cart->ndims = ndims; - - /* MPI-2.1 allows 0-dimension cartesian communicators, so prevent - a 0-byte malloc -- leave dims as NULL */ - if( ndims > 0 ) { - cart->dims = (int*)malloc(sizeof(int) * ndims); - if (NULL == cart->dims) { - OBJ_RELEASE(cart); - return OMPI_ERROR; - } - memcpy(cart->dims, dims, ndims * sizeof(int)); - /* Cartesian communicator; copy the right data to the common information */ - cart->periods = (int*)malloc(sizeof(int) * ndims); - if (NULL == cart->periods) { - OBJ_RELEASE(cart); - return OMPI_ERR_OUT_OF_RESOURCE; - } - memcpy(cart->periods, periods, ndims * sizeof(int)); - - cart->coords = (int*)malloc(sizeof(int) * ndims); - if (NULL == cart->coords) { - OBJ_RELEASE(cart); - return OMPI_ERR_OUT_OF_RESOURCE; - } - { /* setup the cartesian topology */ - int n_procs = num_procs, rank = new_rank; - - for (i = 0; i < ndims; ++i) { - n_procs /= cart->dims[i]; - cart->coords[i] = rank / n_procs; - rank %= n_procs; - } - } - } + ret = ompi_comm_create (old_comm, c_local_group, &new_comm); - /* JMS: This should really be refactored to use - comm_create_group(), because ompi_comm_allocate() still - complains about 0-byte mallocs in debug builds for 0-member - groups. */ - if (num_procs > 0) { - /* Copy the proc structure from the previous communicator over to - the new one. The topology module is then able to work on this - copy and rearrange it as it deems fit. */ - topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *)); - if (NULL == topo_procs) { - OBJ_RELEASE(cart); - return OMPI_ERR_OUT_OF_RESOURCE; - } - if(OMPI_GROUP_IS_DENSE(old_comm->c_local_group)) { - memcpy(topo_procs, - old_comm->c_local_group->grp_proc_pointers, - num_procs * sizeof(ompi_proc_t *)); - } else { - for(i = 0 ; i < num_procs; i++) { - topo_procs[i] = ompi_group_peer_lookup(old_comm->c_local_group,i); - } - } - } + ompi_group_free (&c_local_group); - /* allocate a new communicator */ - new_comm = ompi_comm_allocate(num_procs, 0); - if (NULL == new_comm) { - free(topo_procs); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OBJ_RELEASE(cart); - return MPI_ERR_INTERN; + return ret; } - ret = ompi_comm_enable(old_comm, new_comm, - new_rank, num_procs, topo_procs); - if (OMPI_SUCCESS != ret) { - /* something wrong happened during setting the communicator */ - free(topo_procs); - OBJ_RELEASE(cart); - if (MPI_COMM_NULL != new_comm) { - new_comm->c_topo = NULL; - new_comm->c_flags &= ~OMPI_COMM_CART; - ompi_comm_free (&new_comm); - } - return ret; + *comm_topo = new_comm; + + if (MPI_COMM_NULL == new_comm) { + /* not part of this new communicator */ + return OMPI_SUCCESS; } new_comm->c_topo = topo; new_comm->c_topo->mtc.cart = cart; new_comm->c_topo->reorder = reorder; new_comm->c_flags |= OMPI_COMM_CART; - *comm_topo = new_comm; - - if( MPI_UNDEFINED == new_rank ) { - ompi_comm_free(&new_comm); - *comm_topo = MPI_COMM_NULL; - } /* end here */ return OMPI_SUCCESS; @@ -197,15 +186,9 @@ static void mca_topo_base_comm_cart_2_2_0_construct(mca_topo_base_comm_cart_2_2_ } static void mca_topo_base_comm_cart_2_2_0_destruct(mca_topo_base_comm_cart_2_2_0_t * cart) { - if (NULL != cart->dims) { - free(cart->dims); - } - if (NULL != cart->periods) { - free(cart->periods); - } - if (NULL != cart->coords) { - free(cart->coords); - } + free(cart->dims); + free(cart->periods); + free(cart->coords); } OBJ_CLASS_INSTANCE(mca_topo_base_comm_cart_2_2_0_t, opal_object_t, diff --git a/ompi/mca/topo/base/topo_base_comm_select.c b/ompi/mca/topo/base/topo_base_comm_select.c index 165727fd393..617f8a07dc5 100644 --- a/ompi/mca/topo/base/topo_base_comm_select.c +++ b/ompi/mca/topo/base/topo_base_comm_select.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,10 +71,10 @@ static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); * 4. Select the module with the highest priority. * 5. OBJ_RELEASE all the "losing" modules. */ -int mca_topo_base_comm_select(const ompi_communicator_t* comm, - mca_topo_base_module_t* preferred_module, - mca_topo_base_module_t** selected_module, - uint32_t type) +static int _mca_topo_base_select (const ompi_communicator_t *comm, const ompi_group_t *group, + mca_topo_base_module_t *preferred_module, + mca_topo_base_module_t **selected_module, + uint32_t type) { int priority; int best_priority; @@ -88,9 +91,15 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, if (OMPI_SUCCESS != (err = mca_topo_base_lazy_init())) { return err; } - opal_output_verbose(10, ompi_topo_base_framework.framework_output, - "topo:base:comm_select: new communicator: %s (cid %d)", - comm->c_name, comm->c_contextid); + + if (comm) { + opal_output_verbose(10, ompi_topo_base_framework.framework_output, + "topo:base:comm_select: new communicator: %s (cid %s)", + comm->c_name, ompi_comm_print_cid (comm)); + } else { + opal_output_verbose(10, ompi_topo_base_framework.framework_output, + "topo:base:group_select: new communicator"); + } /* Check and see if a preferred component was provided. If it was provided then it should be used (if possible) */ @@ -106,7 +115,7 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, /* query the component for its priority and get its module structure. This is necessary to proceed */ component = (mca_topo_base_component_t *)preferred_module->topo_component; - module = component->topoc_comm_query(comm, &priority, type); + module = component->topoc_query(comm, group, &priority, type); if (NULL != module) { /* this query seems to have returned something legitimate @@ -149,14 +158,14 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, /* * we can call the query function only if there is a function :-) */ - if (NULL == component->topoc_comm_query) { + if (NULL == component->topoc_query) { opal_output_verbose(10, ompi_topo_base_framework.framework_output, "select: no query, ignoring the component"); } else { /* * call the query function and see what it returns */ - module = component->topoc_comm_query(comm, &priority, type); + module = component->topoc_query(comm, group, &priority, type); if (NULL == module) { /* @@ -242,7 +251,7 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, if(ompi_ftmpi_enabled) { /* check if module is tested for FT, warn if not. */ const char* ft_whitelist=""; - opal_show_help("help-ft-mpi.txt", "module:untested:failundef", true, + opal_show_help("help-mpi-ft.txt", "module:untested:failundef", true, best_component->topoc_version.mca_type_name, best_component->topoc_version.mca_component_name, ft_whitelist); @@ -251,6 +260,17 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, return OMPI_SUCCESS; } +int mca_topo_base_comm_select (const ompi_communicator_t *comm, mca_topo_base_module_t *preferred_module, + mca_topo_base_module_t **selected_module, uint32_t type) +{ + return _mca_topo_base_select (comm, NULL, preferred_module, selected_module, type); +} + +int mca_topo_base_group_select(const ompi_group_t *group, mca_topo_base_module_t *preferred_module, + mca_topo_base_module_t **selected_module, uint32_t type) +{ + return _mca_topo_base_select (NULL, group, preferred_module, selected_module, type); +} /* * This function fills in the null function pointers, in other words, diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index fdc202f879a..66e2976deb5 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. */ #include "ompi_config.h" @@ -289,20 +292,10 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, { int err; - if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old, - comm_old->c_local_group, - newcomm)) ) { + if (OMPI_SUCCESS != (err = ompi_comm_dup_with_info (comm_old, info, newcomm))) { OBJ_RELEASE(module); return err; } - // But if there is an info object, the above call didn't make use - // of it, so we'll do a dup-with-info to get the final comm and - // free the above intermediate newcomm: - if (info && info != &(MPI_INFO_NULL->super)) { - ompi_communicator_t *intermediate_comm = *newcomm; - ompi_comm_dup_with_info (intermediate_comm, info, newcomm); - ompi_comm_free(&intermediate_comm); - } assert(NULL == (*newcomm)->c_topo); (*newcomm)->c_topo = module; @@ -332,18 +325,10 @@ static void mca_topo_base_comm_dist_graph_2_2_0_construct(mca_topo_base_comm_dis } static void mca_topo_base_comm_dist_graph_2_2_0_destruct(mca_topo_base_comm_dist_graph_2_2_0_t * dist_graph) { - if (NULL != dist_graph->in) { - free(dist_graph->in); - } - if (NULL != dist_graph->inw) { - free(dist_graph->inw); - } - if (NULL != dist_graph->out) { - free(dist_graph->out); - } - if (NULL != dist_graph->outw) { - free(dist_graph->outw); - } + free(dist_graph->in); + free(dist_graph->inw); + free(dist_graph->out); + free(dist_graph->outw); } OBJ_CLASS_INSTANCE(mca_topo_base_comm_dist_graph_2_2_0_t, opal_object_t, diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c index 5b12042708b..336aa05c733 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corp. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. */ #include "ompi_config.h" @@ -20,40 +23,23 @@ #include "ompi/mca/topo/base/base.h" -int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, - ompi_communicator_t *comm_old, - int indegree, const int sources[], - const int sourceweights[], - int outdegree, - const int destinations[], - const int destweights[], - opal_info_t *info, int reorder, - ompi_communicator_t **newcomm) +static int _mca_topo_base_dist_graph_create_adjacent (mca_topo_base_module_t* module, int indegree, + const int sources[], const int sourceweights[], + int outdegree, const int destinations[], + const int destweights[], int reorder, + ompi_communicator_t **newcomm) { mca_topo_base_comm_dist_graph_2_2_0_t *topo = NULL; int err; - if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old, - comm_old->c_local_group, - newcomm)) ) { - return err; - } - // But if there is an info object, the above call didn't make use - // of it, so we'll do a dup-with-info to get the final comm and - // free the above intermediate newcomm: - if (info && info != &(MPI_INFO_NULL->super)) { - ompi_communicator_t *intermediate_comm = *newcomm; - ompi_comm_dup_with_info (intermediate_comm, info, newcomm); - ompi_comm_free(&intermediate_comm); - } - err = OMPI_ERR_OUT_OF_RESOURCE; /* suppose by default something bad will happens */ assert( NULL == (*newcomm)->c_topo ); topo = OBJ_NEW(mca_topo_base_comm_dist_graph_2_2_0_t); - if( NULL == topo ) { - goto bail_out; + if (NULL == topo) { + ompi_comm_free (newcomm); + return OMPI_ERR_OUT_OF_RESOURCE; } topo->in = topo->inw = NULL; topo->out = topo->outw = NULL; @@ -103,16 +89,29 @@ int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, bail_out: if (NULL != topo) { - if( NULL != topo->in ) free(topo->in); - if( MPI_UNWEIGHTED != sourceweights ) { - if( NULL != topo->inw ) free(topo->inw); - } - if( NULL != topo->out ) free(topo->out); - if( MPI_UNWEIGHTED != destweights ) { - if( NULL != topo->outw ) free(topo->outw); - } OBJ_RELEASE(topo); } + ompi_comm_free(newcomm); return err; } + +int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, + ompi_communicator_t *comm_old, + int indegree, const int sources[], + const int sourceweights[], + int outdegree, + const int destinations[], + const int destweights[], + opal_info_t *info, int reorder, + ompi_communicator_t **newcomm) +{ + int err; + + if (OMPI_SUCCESS != (err = ompi_comm_dup_with_info (comm_old, info, newcomm))) { + return err; + } + + return _mca_topo_base_dist_graph_create_adjacent (module, indegree, sources, sourceweights, outdegree, + destinations, destweights, reorder, newcomm); +} diff --git a/ompi/mca/topo/base/topo_base_find_available.c b/ompi/mca/topo/base/topo_base_find_available.c index 64a831c4cef..579f82e09fa 100644 --- a/ompi/mca/topo/base/topo_base_find_available.c +++ b/ompi/mca/topo/base/topo_base_find_available.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology diff --git a/ompi/mca/topo/base/topo_base_graph_create.c b/ompi/mca/topo/base/topo_base_graph_create.c index f41cd033d9d..dfd2708bd53 100644 --- a/ompi/mca/topo/base/topo_base_graph_create.c +++ b/ompi/mca/topo/base/topo_base_graph_create.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -24,6 +25,46 @@ #include "ompi/mca/topo/base/base.h" #include "ompi/mca/topo/topo.h" +static int mca_topo_base_graph_allocate (ompi_group_t *group, int nnodes, const int *index, const int *edges, + int *num_procs, mca_topo_base_comm_graph_2_2_0_t **graph_out) +{ + mca_topo_base_comm_graph_2_2_0_t *graph; + + *num_procs = group->grp_proc_count; + + if (*num_procs < nnodes) { + return MPI_ERR_DIMS; + } + + if (*num_procs > nnodes) { + *num_procs = nnodes; + } + + if (group->grp_my_rank > (nnodes - 1) || MPI_UNDEFINED == group->grp_my_rank) { + *graph_out = NULL; + return OMPI_SUCCESS; + } + + graph = OBJ_NEW(mca_topo_base_comm_graph_2_2_0_t); + if( NULL == graph ) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + graph->nnodes = nnodes; + graph->index = (int *) malloc (sizeof (int) * nnodes); + graph->edges = (int *) malloc (sizeof (int) * index[nnodes-1]); + if (OPAL_UNLIKELY(NULL == graph->index || NULL == graph->edges)) { + OBJ_RELEASE(graph); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + memcpy(graph->index, index, nnodes * sizeof(int)); + memcpy(graph->edges, edges, index[nnodes-1] * sizeof(int)); + + *graph_out = graph; + + return OMPI_SUCCESS; +} + /* * * function - makes a new communicator to which topology information @@ -40,111 +81,41 @@ * @retval MPI_ERR_OUT_OF_RESOURCE */ -int mca_topo_base_graph_create(mca_topo_base_module_t *topo, - ompi_communicator_t* old_comm, - int nnodes, - const int *index, - const int *edges, - bool reorder, - ompi_communicator_t** comm_topo) +int mca_topo_base_graph_create (mca_topo_base_module_t *topo, ompi_communicator_t *old_comm, + int nnodes, const int *index, const int *edges, bool reorder, + ompi_communicator_t **comm_topo) { - ompi_communicator_t *new_comm; - int new_rank, num_procs, ret, i; - ompi_proc_t **topo_procs = NULL; - mca_topo_base_comm_graph_2_2_0_t* graph; + mca_topo_base_comm_graph_2_2_0_t *graph; + ompi_group_t *c_local_group; + int num_procs, ret; - num_procs = old_comm->c_local_group->grp_proc_count; - new_rank = old_comm->c_local_group->grp_my_rank; assert(topo->type == OMPI_COMM_GRAPH); - if( num_procs < nnodes ) { - return MPI_ERR_DIMS; - } - if( num_procs > nnodes ) { - num_procs = nnodes; - } - if( new_rank > (nnodes - 1) ) { - new_rank = MPI_UNDEFINED; - num_procs = 0; - nnodes = 0; - } - - graph = OBJ_NEW(mca_topo_base_comm_graph_2_2_0_t); - if( NULL == graph ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - graph->nnodes = nnodes; + *comm_topo = MPI_COMM_NULL; - /* Don't do any of the other initialization if we're not supposed - to be part of the new communicator (because nnodes has been - reset to 0, making things like index[nnodes-1] be junk). - - JMS: This should really be refactored to use - comm_create_group(), because ompi_comm_allocate() still - complains about 0-byte mallocs in debug builds for 0-member - groups. */ - if (MPI_UNDEFINED != new_rank) { - graph->index = (int*)malloc(sizeof(int) * nnodes); - if (NULL == graph->index) { - OBJ_RELEASE(graph); - return OMPI_ERR_OUT_OF_RESOURCE; - } - memcpy(graph->index, index, nnodes * sizeof(int)); - - /* Graph communicator; copy the right data to the common information */ - graph->edges = (int*)malloc(sizeof(int) * index[nnodes-1]); - if (NULL == graph->edges) { - OBJ_RELEASE(graph); - return OMPI_ERR_OUT_OF_RESOURCE; - } - memcpy(graph->edges, edges, index[nnodes-1] * sizeof(int)); - - topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *)); - if (NULL == topo_procs) { - OBJ_RELEASE(graph); - return OMPI_ERR_OUT_OF_RESOURCE; - } - if(OMPI_GROUP_IS_DENSE(old_comm->c_local_group)) { - memcpy(topo_procs, - old_comm->c_local_group->grp_proc_pointers, - num_procs * sizeof(ompi_proc_t *)); - } else { - for(i = 0 ; i < num_procs; i++) { - topo_procs[i] = ompi_group_peer_lookup(old_comm->c_local_group,i); - } - } + ret = mca_topo_base_graph_allocate (old_comm->c_local_group, nnodes, index, edges, &num_procs, + &graph); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; } - /* allocate a new communicator */ - new_comm = ompi_comm_allocate(nnodes, 0); - if (NULL == new_comm) { - free(topo_procs); + c_local_group = ompi_group_flatten (old_comm->c_local_group, nnodes); + if (OPAL_UNLIKELY(NULL == c_local_group)) { OBJ_RELEASE(graph); return OMPI_ERR_OUT_OF_RESOURCE; } - ret = ompi_comm_enable(old_comm, new_comm, - new_rank, num_procs, topo_procs); - if (OMPI_SUCCESS != ret) { - free(topo_procs); + ret = ompi_comm_create (old_comm, c_local_group, comm_topo); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OBJ_RELEASE(graph); - if (MPI_COMM_NULL != new_comm) { - new_comm->c_topo = NULL; - new_comm->c_flags &= ~OMPI_COMM_GRAPH; - ompi_comm_free (&new_comm); - } return ret; } - - new_comm->c_topo = topo; - new_comm->c_topo->mtc.graph = graph; - new_comm->c_flags |= OMPI_COMM_GRAPH; - new_comm->c_topo->reorder = reorder; - *comm_topo = new_comm; - - if( MPI_UNDEFINED == new_rank ) { - ompi_comm_free(&new_comm); - *comm_topo = MPI_COMM_NULL; + + if (MPI_COMM_NULL != *comm_topo) { + (*comm_topo)->c_topo = topo; + (*comm_topo)->c_topo->mtc.graph = graph; + (*comm_topo)->c_flags |= OMPI_COMM_GRAPH; + (*comm_topo)->c_topo->reorder = reorder; } return OMPI_SUCCESS; diff --git a/ompi/mca/topo/basic/topo_basic.h b/ompi/mca/topo/basic/topo_basic.h index 006005bffcd..c02d75fdd40 100644 --- a/ompi/mca/topo/basic/topo_basic.h +++ b/ompi/mca/topo/basic/topo_basic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights diff --git a/ompi/mca/topo/basic/topo_basic_component.c b/ompi/mca/topo/basic/topo_basic_component.c index 83f26519273..0e39fe6a6f5 100644 --- a/ompi/mca/topo/basic/topo_basic_component.c +++ b/ompi/mca/topo/basic/topo_basic_component.c @@ -30,7 +30,7 @@ const char *mca_topo_basic_component_version_string = */ static int init_query(bool enable_progress_threads, bool enable_mpi_threads); static struct mca_topo_base_module_t * -comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type); +mca_topo_basic_query(const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type); /* * Public component structure @@ -40,9 +40,8 @@ mca_topo_basic_component_t mca_topo_basic_component = .topoc_version = { MCA_TOPO_BASE_VERSION_2_2_0, .mca_component_name = "basic", - .mca_component_major_version = OMPI_MAJOR_VERSION, - .mca_component_minor_version = OMPI_MINOR_VERSION, - .mca_component_release_version = OMPI_RELEASE_VERSION, + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), /* NULLs for the rest of the function pointers */ }, @@ -52,7 +51,7 @@ mca_topo_basic_component_t mca_topo_basic_component = }, .topoc_init_query = init_query, - .topoc_comm_query = comm_query, + .topoc_query = mca_topo_basic_query, }; @@ -64,7 +63,7 @@ static int init_query(bool enable_progress_threads, bool enable_mpi_threads) static struct mca_topo_base_module_t * -comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type) +mca_topo_basic_query (const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type) { /* Don't use OBJ_NEW, we need to zero the memory or the functions pointers * will not be correctly copied over from the base. @@ -81,5 +80,3 @@ comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type) basic->type = type; return basic; } - - diff --git a/ompi/mca/topo/topo.h b/ompi/mca/topo/topo.h index 7735250f290..5ff6186183f 100644 --- a/ompi/mca/topo/topo.h +++ b/ompi/mca/topo/topo.h @@ -45,12 +45,11 @@ typedef int (*mca_topo_base_component_init_query_2_2_0_fn_t) bool enable_mpi_threads); /* - * Communicator query, called during cart and graph communicator - * creation. + * Communicator/group query, called during cart and graph communicator creation. */ typedef struct mca_topo_base_module_t* -(*mca_topo_base_component_comm_query_2_2_0_fn_t) - (const ompi_communicator_t *comm, int *priority, uint32_t type); +(*mca_topo_base_component_query_2_2_0_fn_t) + (const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type); /* * Structure for topo v2.1.0 components.This is chained to MCA v2.0.0 @@ -60,7 +59,7 @@ typedef struct mca_topo_base_component_2_2_0_t { mca_base_component_data_t topoc_data; mca_topo_base_component_init_query_2_2_0_fn_t topoc_init_query; - mca_topo_base_component_comm_query_2_2_0_fn_t topoc_comm_query; + mca_topo_base_component_query_2_2_0_fn_t topoc_query; } mca_topo_base_component_2_2_0_t; typedef mca_topo_base_component_2_2_0_t mca_topo_base_component_t; @@ -344,7 +343,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_topo_base_module_t); /* * ****************************************************************** - * ********** Use in components that are of type topo v2.2.0 ******** + * ********** Use in components that are of type topo v2.3.0 ******** * ****************************************************************** */ #define MCA_TOPO_BASE_VERSION_2_2_0 \ diff --git a/ompi/mca/topo/treematch/topo_treematch.h b/ompi/mca/topo/treematch/topo_treematch.h index bcc4d748bfd..57703462d4d 100644 --- a/ompi/mca/topo/treematch/topo_treematch.h +++ b/ompi/mca/topo/treematch/topo_treematch.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights diff --git a/ompi/mca/topo/treematch/topo_treematch_component.c b/ompi/mca/topo/treematch/topo_treematch_component.c index fca7e5b71b0..e557d44b554 100644 --- a/ompi/mca/topo/treematch/topo_treematch_component.c +++ b/ompi/mca/topo/treematch/topo_treematch_component.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -26,7 +27,7 @@ const char *mca_topo_treematch_component_version_string = */ static int init_query(bool enable_progress_threads, bool enable_mpi_threads); static struct mca_topo_base_module_t * -comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type); +mca_topo_treematch_query(const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type); static int mca_topo_treematch_component_register(void); /* @@ -34,29 +35,24 @@ static int mca_topo_treematch_component_register(void); */ mca_topo_treematch_component_2_2_0_t mca_topo_treematch_component = { - { - { + .super = { + .topoc_version = { MCA_TOPO_BASE_VERSION_2_2_0, - - "treematch", - OMPI_MAJOR_VERSION, - OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION, - NULL, /* component open */ - NULL, /* component close */ - NULL, /* component query */ - mca_topo_treematch_component_register, /* component register */ + .mca_component_name = "treematch", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + .mca_register_component_params = mca_topo_treematch_component_register, }, - { + .topoc_data = { /* The component is checkpoint ready */ MCA_BASE_METADATA_PARAM_CHECKPOINT }, - init_query, - comm_query + .topoc_init_query = init_query, + .topoc_query = mca_topo_treematch_query, }, - 0 /* reorder: by default centralized */ + .reorder_mode = 0 /* reorder: by default centralized */ }; @@ -72,7 +68,7 @@ static int init_query(bool enable_progress_threads, bool enable_mpi_threads) static struct mca_topo_base_module_t * -comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type) +mca_topo_treematch_query(const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type) { mca_topo_treematch_module_t *treematch; diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c index b3ba707c746..4ca64d7c830 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c @@ -50,6 +50,6 @@ mca_vprotocol_pessimist_module_t mca_vprotocol_pessimist = int mca_vprotocol_pessimist_dump(struct ompi_communicator_t* comm, int verbose) { - V_OUTPUT_VERBOSE(verbose, "vprotocol_pessimist: dump for comm %d", comm->c_contextid); + V_OUTPUT_VERBOSE(verbose, "vprotocol_pessimist: dump for comm %s", ompi_comm_print_cid (comm)); return mca_pml_v.host_pml.pml_dump(comm, verbose); } diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h index b639b47b394..07a9944a25c 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h @@ -195,7 +195,7 @@ static inline void vprotocol_pessimist_sender_based_copy_start(ompi_request_t *r sbhdr->size = pmlreq->req_bytes_packed; sbhdr->dst = pmlreq->req_base.req_peer; sbhdr->tag = pmlreq->req_base.req_tag; - sbhdr->contextid = pmlreq->req_base.req_comm->c_contextid; + sbhdr->contextid = ompi_comm_get_extended_cid (pmlreq->req_base.req_comm); sbhdr->sequence = pmlreq->req_base.req_sequence; ftreq->sb.cursor += sizeof(vprotocol_pessimist_sender_based_header_t); V_OUTPUT_VERBOSE(70, "pessimist:\tsb\tsend\t%"PRIpclock"\tsize %lu (+%lu header)", VPESSIMIST_FTREQ(req)->reqid, (long unsigned)pmlreq->req_bytes_packed, (long unsigned)sizeof(vprotocol_pessimist_sender_based_header_t)); diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h index c00dfff70e3..c19ade18e76 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h @@ -46,7 +46,7 @@ typedef struct vprotocol_pessimist_sender_based_header_t size_t size; int dst; int tag; - uint32_t contextid; + ompi_comm_extended_cid_t contextid; vprotocol_pessimist_clock_t sequence; } vprotocol_pessimist_sender_based_header_t; diff --git a/ompi/message/message.c b/ompi/message/message.c index deb0a4697f9..9bbe5f3d1f5 100644 --- a/ompi/message/message.c +++ b/ompi/message/message.c @@ -6,6 +6,8 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +22,7 @@ #include "opal/class/opal_object.h" #include "ompi/message/message.h" #include "ompi/constants.h" +#include "ompi/instance/instance.h" static void ompi_message_constructor(ompi_message_t *msg); @@ -27,6 +30,8 @@ OBJ_CLASS_INSTANCE(ompi_message_t, opal_free_list_item_t, ompi_message_constructor, NULL); +static int ompi_message_finalize (void); + opal_free_list_t ompi_message_free_list = {{{0}}}; opal_pointer_array_t ompi_message_f_to_c_table = {{0}}; @@ -67,11 +72,12 @@ ompi_message_init(void) return OMPI_ERR_NOT_FOUND; } + ompi_mpi_instance_append_finalize (ompi_message_finalize); + return rc; } -int -ompi_message_finalize(void) +static int ompi_message_finalize (void) { OBJ_DESTRUCT(&ompi_message_no_proc); OBJ_DESTRUCT(&ompi_message_free_list); diff --git a/ompi/message/message.h b/ompi/message/message.h index 0f0f1eacfac..0706a7490fb 100644 --- a/ompi/message/message.h +++ b/ompi/message/message.h @@ -4,6 +4,8 @@ * Copyright (c) 2012-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -49,8 +51,6 @@ typedef struct ompi_predefined_message_t ompi_predefined_message_t; int ompi_message_init(void); -int ompi_message_finalize(void); - OMPI_DECLSPEC extern opal_free_list_t ompi_message_free_list; OMPI_DECLSPEC extern opal_pointer_array_t ompi_message_f_to_c_table; OMPI_DECLSPEC extern ompi_predefined_message_t ompi_message_no_proc; diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index d9cb60bf19a..4e6098a8ab9 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2021 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Sandia National Laboratories. All rights reserved. # Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. # Copyright (c) 2012-2013 Inria. All rights reserved. @@ -17,6 +17,8 @@ # reserved. # Copyright (c) 2015-2020 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -24,67 +26,72 @@ # $HEADER$ # -SUBDIRS = profile - -# OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols -# to be replaced by PMPI_*. -# In this directory, we need it to be 0 - -AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=0 - -# -# The top directory always builds MPI_* bindings. The bottom directory -# always builds PMPI_* bindings. The cases where the top directory -# needs to be built are: -# -# 1. When profiling is disabled. -# 2. When profiling is enabled but weak symbol support is absent. -# +# The purpose of the profiling layer is to allow intercept libraries +# which override the MPI_ namespace symbols. We potentially compile +# every MPI function twice. We always build the profiling layer, +# because the symbols that are always implemented as functions are the +# PMPI_ namespace symbols. We sometimes also build the non-profiling +# layer, if weak symbols can't be used to alias the MPI_ namespace +# into the PMPI_ namespace. -noinst_LTLIBRARIES = libmpi_c.la +noinst_LTLIBRARIES = libmpi_c.la libmpi_c_profile.la if BUILD_MPI_BINDINGS_LAYER -noinst_LTLIBRARIES += libmpi_c_mpi.la +noinst_LTLIBRARIES += libmpi_c_noprofile.la endif headers = bindings.h -# -# libmpi_c.la is always build because it contains some non-profilied -# functions. -# - +# attr_fn.c contains attribute manipulation functions which do not +# profiling implications, and so are always built. libmpi_c_la_SOURCES = \ attr_fn.c +libmpi_c_la_LIBADD = libmpi_c_profile.la +if BUILD_MPI_BINDINGS_LAYER +libmpi_c_la_LIBADD += libmpi_c_noprofile.la +endif + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +ompidir = $(ompiincludedir)/$(subdir) +ompi_HEADERS = $(headers) +endif # -# libmpi_c_mpi.la is only built in some cases (see above) +# List of all C files that have profile versions # - -libmpi_c_mpi_la_SOURCES = \ +interface_profile_sources = \ abort.c \ add_error_class.c \ add_error_code.c \ add_error_string.c \ allgather.c \ iallgather.c \ + allgather_init.c \ allgatherv.c \ iallgatherv.c \ + allgatherv_init.c \ alloc_mem.c \ allreduce.c \ iallreduce.c \ + allreduce_init.c \ alltoall.c \ ialltoall.c \ + alltoall_init.c \ alltoallv.c \ ialltoallv.c \ + alltoallv_init.c \ alltoallw.c \ ialltoallw.c \ + alltoallw_init.c \ attr_delete.c \ attr_get.c \ attr_put.c \ barrier.c \ ibarrier.c \ + barrier_init.c \ bcast.c \ ibcast.c \ + bcast_init.c \ bsend.c \ bsend_init.c \ buffer_attach.c \ @@ -106,6 +113,7 @@ libmpi_c_mpi_la_SOURCES = \ comm_connect.c \ comm_create.c \ comm_create_errhandler.c \ + comm_create_from_group.c \ comm_create_group.c \ comm_create_keyval.c \ comm_delete_attr.c \ @@ -113,6 +121,7 @@ libmpi_c_mpi_la_SOURCES = \ comm_dup.c \ comm_dup_with_info.c \ comm_idup.c \ + comm_idup_with_info.c \ comm_f2c.c \ comm_free.c \ comm_free_keyval.c \ @@ -140,7 +149,7 @@ libmpi_c_mpi_la_SOURCES = \ comm_split.c \ comm_split_type.c \ comm_test_inter.c \ - compare_and_swap.c \ + compare_and_swap.c \ dims_create.c \ errhandler_c2f.c \ errhandler_f2c.c \ @@ -148,8 +157,9 @@ libmpi_c_mpi_la_SOURCES = \ error_class.c \ error_string.c \ exscan.c \ - fetch_and_op.c \ iexscan.c \ + exscan_init.c \ + fetch_and_op.c \ file_c2f.c \ file_call_errhandler.c \ file_close.c \ @@ -216,13 +226,15 @@ libmpi_c_mpi_la_SOURCES = \ free_mem.c \ gather.c \ igather.c \ + gather_init.c \ gatherv.c \ igatherv.c \ + gatherv_init.c \ get_address.c \ get_count.c \ get_elements.c \ get_elements_x.c \ - get_accumulate.c \ + get_accumulate.c \ get_library_version.c \ get_processor_name.c \ get_version.c \ @@ -240,6 +252,7 @@ libmpi_c_mpi_la_SOURCES = \ group_excl.c \ group_f2c.c \ group_free.c \ + group_from_session_pset.c \ group_incl.c \ group_intersection.c \ group_range_excl.c \ @@ -260,18 +273,22 @@ libmpi_c_mpi_la_SOURCES = \ info_get.c \ info_get_nkeys.c \ info_get_nthkey.c \ + info_get_string.c \ info_get_valuelen.c \ info_set.c \ init.c \ init_thread.c \ initialized.c \ intercomm_create.c \ + intercomm_create_from_groups.c \ intercomm_merge.c \ iprobe.c \ irecv.c \ irsend.c \ is_thread_main.c \ isend.c \ + isendrecv.c \ + isendrecv_replace.c \ issend.c \ lookup_name.c \ message_f2c.c \ @@ -280,14 +297,19 @@ libmpi_c_mpi_la_SOURCES = \ mrecv.c \ neighbor_allgather.c \ ineighbor_allgather.c \ + neighbor_allgather_init.c \ neighbor_allgatherv.c \ ineighbor_allgatherv.c \ + neighbor_allgatherv_init.c \ neighbor_alltoall.c \ ineighbor_alltoall.c \ + neighbor_alltoall_init.c \ neighbor_alltoallv.c \ ineighbor_alltoallv.c \ + neighbor_alltoallv_init.c \ neighbor_alltoallw.c \ ineighbor_alltoallw.c \ + neighbor_alltoallw_init.c \ keyval_create.c \ keyval_free.c \ op_c2f.c \ @@ -310,36 +332,52 @@ libmpi_c_mpi_la_SOURCES = \ psend_init.c \ publish_name.c \ query_thread.c \ - raccumulate.c \ + raccumulate.c \ recv_init.c \ recv.c \ reduce.c \ - register_datarep.c \ ireduce.c \ + reduce_init.c \ + register_datarep.c \ reduce_local.c \ reduce_scatter.c \ ireduce_scatter.c \ + reduce_scatter_init.c \ reduce_scatter_block.c \ ireduce_scatter_block.c \ + reduce_scatter_block_init.c \ request_c2f.c \ request_f2c.c \ request_free.c \ request_get_status.c \ - rget.c \ - rget_accumulate.c \ - rput.c \ + rget.c \ + rget_accumulate.c \ + rput.c \ rsend_init.c \ rsend.c \ scan.c \ iscan.c \ + scan_init.c \ scatter.c \ iscatter.c \ + scatter_init.c \ scatterv.c \ iscatterv.c \ + scatterv_init.c \ send.c \ send_init.c \ sendrecv.c \ sendrecv_replace.c \ + session_c2f.c \ + session_create_errhandler.c \ + session_get_info.c \ + session_get_num_psets.c \ + session_get_nth_pset.c \ + session_get_pset_info.c \ + session_init.c \ + session_f2c.c \ + session_finalize.c \ + session_set_info.c \ ssend_init.c \ ssend.c \ start.c \ @@ -406,24 +444,24 @@ libmpi_c_mpi_la_SOURCES = \ accumulate.c \ get.c \ put.c \ - win_allocate.c \ - win_allocate_shared.c \ - win_attach.c \ + win_allocate.c \ + win_allocate_shared.c \ + win_attach.c \ win_c2f.c \ win_call_errhandler.c \ win_complete.c \ win_create_errhandler.c \ win_create_keyval.c \ win_create.c \ - win_create_dynamic.c \ + win_create_dynamic.c \ win_delete_attr.c \ - win_detach.c \ + win_detach.c \ win_f2c.c \ win_fence.c \ - win_flush.c \ - win_flush_all.c \ - win_flush_local.c \ - win_flush_local_all.c \ + win_flush.c \ + win_flush_all.c \ + win_flush_local.c \ + win_flush_local_all.c \ win_free_keyval.c \ win_free.c \ win_get_attr.c \ @@ -432,23 +470,22 @@ libmpi_c_mpi_la_SOURCES = \ win_get_info.c \ win_get_name.c \ win_lock.c \ - win_lock_all.c \ + win_lock_all.c \ win_post.c \ win_set_attr.c \ win_set_errhandler.c \ win_set_info.c \ win_set_name.c \ - win_shared_query.c \ - win_sync.c \ + win_shared_query.c \ + win_sync.c \ win_start.c \ win_test.c \ win_unlock.c \ - win_unlock_all.c \ + win_unlock_all.c \ win_wait.c - if OMPI_ENABLE_MPI1_COMPAT -libmpi_c_mpi_la_SOURCES += \ +interface_profile_sources += \ address.c \ errhandler_create.c \ errhandler_get.c \ @@ -461,9 +498,8 @@ libmpi_c_mpi_la_SOURCES += \ type_ub.c endif -# Conditionally install the header files +libmpi_c_profile_la_SOURCES = $(interface_profile_sources) +libmpi_c_profile_la_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=1 -if WANT_INSTALL_HEADERS -ompidir = $(ompiincludedir)/$(subdir) -ompi_HEADERS = $(headers) -endif +libmpi_c_noprofile_la_SOURCES = $(interface_profile_sources) +libmpi_c_noprofile_la_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=0 diff --git a/ompi/mpiext/pcollreq/c/allgather_init.c b/ompi/mpi/c/allgather_init.c similarity index 89% rename from ompi/mpiext/pcollreq/c/allgather_init.c rename to ompi/mpi/c/allgather_init.c index 1207fbac685..d46c2e77d8d 100644 --- a/ompi/mpiext/pcollreq/c/allgather_init.c +++ b/ompi/mpi/c/allgather_init.c @@ -33,22 +33,21 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Allgather_init = PMPIX_Allgather_init +#pragma weak MPI_Allgather_init = PMPI_Allgather_init #endif -#define MPIX_Allgather_init PMPIX_Allgather_init +#define MPI_Allgather_init PMPI_Allgather_init #endif -static const char FUNC_NAME[] = "MPIX_Allgather_init"; +static const char FUNC_NAME[] = "MPI_Allgather_init"; -int MPIX_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpiext/pcollreq/c/allgatherv_init.c b/ompi/mpi/c/allgatherv_init.c similarity index 90% rename from ompi/mpiext/pcollreq/c/allgatherv_init.c rename to ompi/mpi/c/allgatherv_init.c index 356a7ecdfc7..5c1ae969d4e 100644 --- a/ompi/mpiext/pcollreq/c/allgatherv_init.c +++ b/ompi/mpi/c/allgatherv_init.c @@ -33,23 +33,22 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Allgatherv_init = PMPIX_Allgatherv_init +#pragma weak MPI_Allgatherv_init = PMPI_Allgatherv_init #endif -#define MPIX_Allgatherv_init PMPIX_Allgatherv_init +#define MPI_Allgatherv_init PMPI_Allgatherv_init #endif -static const char FUNC_NAME[] = "MPIX_Allgatherv_init"; +static const char FUNC_NAME[] = "MPI_Allgatherv_init"; -int MPIX_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], const int displs[], - MPI_Datatype recvtype, MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], + MPI_Datatype recvtype, MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int i, size, err; diff --git a/ompi/mpiext/pcollreq/c/allreduce_init.c b/ompi/mpi/c/allreduce_init.c similarity index 91% rename from ompi/mpiext/pcollreq/c/allreduce_init.c rename to ompi/mpi/c/allreduce_init.c index 94971d43c2e..55bc5570093 100644 --- a/ompi/mpiext/pcollreq/c/allreduce_init.c +++ b/ompi/mpi/c/allreduce_init.c @@ -34,22 +34,21 @@ #include "ompi/op/op.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Allreduce_init = PMPIX_Allreduce_init +#pragma weak MPI_Allreduce_init = PMPI_Allreduce_init #endif -#define MPIX_Allreduce_init PMPIX_Allreduce_init +#define MPI_Allreduce_init PMPI_Allreduce_init #endif -static const char FUNC_NAME[] = "MPIX_Allreduce_init"; +static const char FUNC_NAME[] = "MPI_Allreduce_init"; -int MPIX_Allreduce_init(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Allreduce_init(const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpiext/pcollreq/c/alltoall_init.c b/ompi/mpi/c/alltoall_init.c similarity index 89% rename from ompi/mpiext/pcollreq/c/alltoall_init.c rename to ompi/mpi/c/alltoall_init.c index 777ba7c9efa..1a47e7b7cbf 100644 --- a/ompi/mpiext/pcollreq/c/alltoall_init.c +++ b/ompi/mpi/c/alltoall_init.c @@ -33,22 +33,21 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Alltoall_init = PMPIX_Alltoall_init +#pragma weak MPI_Alltoall_init = PMPI_Alltoall_init #endif -#define MPIX_Alltoall_init PMPIX_Alltoall_init +#define MPI_Alltoall_init PMPI_Alltoall_init #endif -static const char FUNC_NAME[] = "MPIX_Alltoall_init"; +static const char FUNC_NAME[] = "MPI_Alltoall_init"; -int MPIX_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request) { size_t sendtype_size, recvtype_size; int err; diff --git a/ompi/mpiext/pcollreq/c/alltoallv_init.c b/ompi/mpi/c/alltoallv_init.c similarity index 90% rename from ompi/mpiext/pcollreq/c/alltoallv_init.c rename to ompi/mpi/c/alltoallv_init.c index 5cb593c0cf5..a0e5cb94aad 100644 --- a/ompi/mpiext/pcollreq/c/alltoallv_init.c +++ b/ompi/mpi/c/alltoallv_init.c @@ -32,23 +32,22 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Alltoallv_init = PMPIX_Alltoallv_init +#pragma weak MPI_Alltoallv_init = PMPI_Alltoallv_init #endif -#define MPIX_Alltoallv_init PMPIX_Alltoallv_init +#define MPI_Alltoallv_init PMPI_Alltoallv_init #endif -static const char FUNC_NAME[] = "MPIX_Alltoallv_init"; +static const char FUNC_NAME[] = "MPI_Alltoallv_init"; -int MPIX_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], - MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], - const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], + MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], + const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int i, size, err; diff --git a/ompi/mpiext/pcollreq/c/alltoallw_init.c b/ompi/mpi/c/alltoallw_init.c similarity index 89% rename from ompi/mpiext/pcollreq/c/alltoallw_init.c rename to ompi/mpi/c/alltoallw_init.c index c85579ddd29..dc80e4abd19 100644 --- a/ompi/mpiext/pcollreq/c/alltoallw_init.c +++ b/ompi/mpi/c/alltoallw_init.c @@ -32,23 +32,22 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Alltoallw_init = PMPIX_Alltoallw_init +#pragma weak MPI_Alltoallw_init = PMPI_Alltoallw_init #endif -#define MPIX_Alltoallw_init PMPIX_Alltoallw_init +#define MPI_Alltoallw_init PMPI_Alltoallw_init #endif -static const char FUNC_NAME[] = "MPIX_Alltoallw_init"; +static const char FUNC_NAME[] = "MPI_Alltoallw_init"; -int MPIX_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], - const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int sdispls[], + const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], + const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int i, size, err; diff --git a/ompi/mpiext/pcollreq/c/barrier_init.c b/ompi/mpi/c/barrier_init.c similarity index 87% rename from ompi/mpiext/pcollreq/c/barrier_init.c rename to ompi/mpi/c/barrier_init.c index 76084da886a..fe0a50a383e 100644 --- a/ompi/mpiext/pcollreq/c/barrier_init.c +++ b/ompi/mpi/c/barrier_init.c @@ -27,20 +27,19 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Barrier_init = PMPIX_Barrier_init +#pragma weak MPI_Barrier_init = PMPI_Barrier_init #endif -#define MPIX_Barrier_init PMPIX_Barrier_init +#define MPI_Barrier_init PMPI_Barrier_init #endif -static const char FUNC_NAME[] = "MPIX_Barrier_init"; +static const char FUNC_NAME[] = "MPI_Barrier_init"; -int MPIX_Barrier_init(MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Barrier_init(MPI_Comm comm, MPI_Info info, MPI_Request *request) { int err = MPI_SUCCESS; diff --git a/ompi/mpiext/pcollreq/c/bcast_init.c b/ompi/mpi/c/bcast_init.c similarity index 88% rename from ompi/mpiext/pcollreq/c/bcast_init.c rename to ompi/mpi/c/bcast_init.c index 9a61aff8a95..8a9791be39e 100644 --- a/ompi/mpiext/pcollreq/c/bcast_init.c +++ b/ompi/mpi/c/bcast_init.c @@ -21,21 +21,20 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Bcast_init = PMPIX_Bcast_init +#pragma weak MPI_Bcast_init = PMPI_Bcast_init #endif -#define MPIX_Bcast_init PMPIX_Bcast_init +#define MPI_Bcast_init PMPI_Bcast_init #endif -static const char FUNC_NAME[] = "MPIX_Bcast_init"; +static const char FUNC_NAME[] = "MPI_Bcast_init"; -int MPIX_Bcast_init(void *buffer, int count, MPI_Datatype datatype, - int root, MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Bcast_init(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpi/c/comm_create_errhandler.c b/ompi/mpi/c/comm_create_errhandler.c index e342037cc92..9caf0510300 100644 --- a/ompi/mpi/c/comm_create_errhandler.c +++ b/ompi/mpi/c/comm_create_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,29 +43,29 @@ static const char FUNC_NAME[] = "MPI_Comm_create_errhandler"; int MPI_Comm_create_errhandler(MPI_Comm_errhandler_function *function, MPI_Errhandler *errhandler) { - int err = MPI_SUCCESS; + int err = MPI_SUCCESS; - /* Error checking */ + /* Error checking */ - if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == function || - NULL == errhandler) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, - FUNC_NAME); + if (NULL == function || + NULL == errhandler) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, + FUNC_NAME); + } } - } - /* Create and cache the errhandler. Sets a refcount of 1. */ + /* Create and cache the errhandler. Sets a refcount of 1. */ - *errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, - (ompi_errhandler_generic_handler_fn_t*) function, - OMPI_ERRHANDLER_LANG_C); - if (NULL == *errhandler) { - err = MPI_ERR_INTERN; - } + *errhandler = + ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, + (ompi_errhandler_generic_handler_fn_t*) function, + OMPI_ERRHANDLER_LANG_C); + if (NULL == *errhandler) { + err = MPI_ERR_INTERN; + } - OMPI_ERRHANDLER_NOHANDLE_RETURN(err, MPI_ERR_INTERN, FUNC_NAME); + OMPI_ERRHANDLER_NOHANDLE_RETURN(err, MPI_ERR_INTERN, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_create_from_group.c b/ompi/mpi/c/comm_create_from_group.c new file mode 100644 index 00000000000..e3347b6f72a --- /dev/null +++ b/ompi/mpi/c/comm_create_from_group.c @@ -0,0 +1,92 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2008 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Comm_create_from_group = PMPI_Comm_create_from_group +#endif +#define MPI_Comm_create_from_group PMPI_Comm_create_from_group +#endif + +static const char FUNC_NAME[] = "MPI_Comm_create_from_group"; + + +int MPI_Comm_create_from_group (MPI_Group group, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newcomm) { + int rc; + + MEMCHECKER( + memchecker_comm(comm); + ); + + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (NULL == tag) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_TAG, FUNC_NAME); + } + + if (NULL == group) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_GROUP, FUNC_NAME); + } + + if (NULL == info || ompi_info_is_freed(info)) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_INFO, FUNC_NAME); + } + + if (NULL == newcomm) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_ARG, FUNC_NAME); + } + } + + if (MPI_GROUP_NULL == group || MPI_UNDEFINED == ompi_group_rank (group)) { + *newcomm = MPI_COMM_NULL; + return MPI_SUCCESS; + } + + + rc = ompi_comm_create_from_group ((ompi_group_t *) group, tag, &info->super, errhandler, + (ompi_communicator_t **) newcomm); + if (MPI_SUCCESS != rc) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + rc, FUNC_NAME); + } + + return rc; +} diff --git a/ompi/mpi/c/comm_create_keyval.c b/ompi/mpi/c/comm_create_keyval.c index 5feb61a3256..3be53c6a05d 100644 --- a/ompi/mpi/c/comm_create_keyval.c +++ b/ompi/mpi/c/comm_create_keyval.c @@ -54,7 +54,7 @@ int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_copy_attr_fn, } } - copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*)comm_copy_attr_fn; + copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function) comm_copy_attr_fn; del_fn.attr_communicator_delete_fn = comm_delete_attr_fn; ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, diff --git a/ompi/mpi/c/comm_get_errhandler.c b/ompi/mpi/c/comm_get_errhandler.c index 301cdd91862..288476e1e9b 100644 --- a/ompi/mpi/c/comm_get_errhandler.c +++ b/ompi/mpi/c/comm_get_errhandler.c @@ -15,6 +15,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" +#include "ompi/instance/instance.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -43,6 +46,8 @@ static const char FUNC_NAME[] = "MPI_Comm_get_errhandler"; int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler) { + int ret = MPI_SUCCESS; + /* Error checking */ MEMCHECKER( memchecker_comm(comm); @@ -68,7 +73,10 @@ int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler) *errhandler = comm->error_handler; OPAL_THREAD_UNLOCK(&(comm->c_lock)); + /* make sure the infrastructure is initialized */ + ret = ompi_mpi_instance_retain (); + /* All done */ - return MPI_SUCCESS; + return ret; } diff --git a/ompi/mpi/c/comm_get_info.c b/ompi/mpi/c/comm_get_info.c index cb9ff2146cf..138f1656dcf 100644 --- a/ompi/mpi/c/comm_get_info.c +++ b/ompi/mpi/c/comm_get_info.c @@ -46,21 +46,22 @@ int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used) } if (NULL == comm->super.s_info) { -/* - * Setup any defaults if MPI_Win_set_info was never called - */ + /* + * Setup any defaults if MPI_Win_set_info was never called + */ opal_infosubscribe_change_info(&comm->super, &MPI_INFO_NULL->super); } - (*info_used) = OBJ_NEW(ompi_info_t); + *info_used = ompi_info_allocate (); if (NULL == (*info_used)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); } + opal_info_t *opal_info_used = &(*info_used)->super; - opal_info_dup_mpistandard(comm->super.s_info, &opal_info_used); + opal_info_dup(comm->super.s_info, &opal_info_used); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/comm_idup_with_info.c b/ompi/mpi/c/comm_idup_with_info.c new file mode 100644 index 00000000000..1abbd35b5a9 --- /dev/null +++ b/ompi/mpi/c/comm_idup_with_info.c @@ -0,0 +1,86 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2008 University of Houston. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Comm_idup_with_info = PMPI_Comm_idup_with_info +#endif +#define MPI_Comm_idup_with_info PMPI_Comm_idup_with_info +#endif + +static const char FUNC_NAME[] = "MPI_Comm_idup_with_info"; + +int MPI_Comm_idup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm, MPI_Request *request) +{ + int rc; + + MEMCHECKER( + memchecker_comm(comm); + ); + + /* argument checking */ + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (ompi_comm_invalid (comm)) + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, + FUNC_NAME); + if (NULL == info || ompi_info_is_freed(info)) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_INFO, + FUNC_NAME); + } + + if ( NULL == newcomm ) + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + FUNC_NAME); + } + +#if OPAL_ENABLE_FT_MPI + /* + * An early check, so as to return early if we are using a broken + * communicator. This is not absolutely necessary since we will + * check for this, and other, error conditions during the operation. + */ + if( OPAL_UNLIKELY(!ompi_comm_iface_create_check(comm, &rc)) ) { + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); + } +#endif + + rc = ompi_comm_idup_with_info (comm, &info->super, newcomm, request); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); +} + diff --git a/ompi/mpi/c/errhandler_f2c.c b/ompi/mpi/c/errhandler_f2c.c index bf4dce1994f..8ec5dd2527e 100644 --- a/ompi/mpi/c/errhandler_f2c.c +++ b/ompi/mpi/c/errhandler_f2c.c @@ -13,6 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2020-2021 Triad National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,23 +42,41 @@ static const char FUNC_NAME[] = "MPI_Errhandler_f2c"; MPI_Errhandler MPI_Errhandler_f2c(MPI_Fint errhandler_f) { int eh_index = OMPI_FINT_2_INT(errhandler_f); - - /* Error checking */ - - if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - } + MPI_Errhandler c_err_handler; /* Per MPI-2:4.12.4, do not invoke an error handler if we get an invalid fortran handle. If we get an invalid fortran handle, return an invalid C handle. */ - if (eh_index < 0 || - eh_index >= - opal_pointer_array_get_size(&ompi_errhandler_f_to_c_table)) { - return NULL; + /* + * special cases for MPI_ERRORS_ARE_FATAL and MPI_ERRORS_RETURN - + * needed for MPI 4.0 + */ + + switch(eh_index) { + case OMPI_ERRHANDLER_NULL_FORTRAN: + c_err_handler = MPI_ERRHANDLER_NULL; + break; + case OMPI_ERRORS_ARE_FATAL_FORTRAN: + c_err_handler = MPI_ERRORS_ARE_FATAL; + break; + case OMPI_ERRORS_RETURN_FORTRAN: + c_err_handler = MPI_ERRORS_RETURN; + break; + default: + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + } + if (eh_index < 0 || + eh_index >= + opal_pointer_array_get_size(&ompi_errhandler_f_to_c_table)) { + c_err_handler = NULL; + } else { + c_err_handler = (MPI_Errhandler)opal_pointer_array_get_item(&ompi_errhandler_f_to_c_table, + eh_index); + } + break; } - return (MPI_Errhandler)opal_pointer_array_get_item(&ompi_errhandler_f_to_c_table, - eh_index); + return c_err_handler; } diff --git a/ompi/mpi/c/errhandler_free.c b/ompi/mpi/c/errhandler_free.c index a87038f4707..77e7da2e919 100644 --- a/ompi/mpi/c/errhandler_free.c +++ b/ompi/mpi/c/errhandler_free.c @@ -41,7 +41,7 @@ int MPI_Errhandler_free(MPI_Errhandler *errhandler) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - /* Raise an MPI error if we got NULL or if we got an intrinsic + /* Raise an MPI exception if we got NULL or if we got an intrinsic *and* the reference count is 1 (meaning that this FREE would actually free the underlying intrinsic object). This is ugly but necessary -- see below. */ @@ -69,7 +69,7 @@ int MPI_Errhandler_free(MPI_Errhandler *errhandler) So decrease the refcount here. */ - OBJ_RELEASE(*errhandler); + ompi_errhandler_free (*errhandler); *errhandler = MPI_ERRHANDLER_NULL; /* All done */ diff --git a/ompi/mpiext/pcollreq/c/exscan_init.c b/ompi/mpi/c/exscan_init.c similarity index 88% rename from ompi/mpiext/pcollreq/c/exscan_init.c rename to ompi/mpi/c/exscan_init.c index 0bac072785c..9c18eade276 100644 --- a/ompi/mpiext/pcollreq/c/exscan_init.c +++ b/ompi/mpi/c/exscan_init.c @@ -30,23 +30,22 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" #include "ompi/mca/coll/base/coll_base_util.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Exscan_init = PMPIX_Exscan_init +#pragma weak MPI_Exscan_init = PMPI_Exscan_init #endif -#define MPIX_Exscan_init PMPIX_Exscan_init +#define MPI_Exscan_init PMPI_Exscan_init #endif -static const char FUNC_NAME[] = "MPIX_Exscan_init"; +static const char FUNC_NAME[] = "MPI_Exscan_init"; -int MPIX_Exscan_init(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Exscan_init(const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpi/c/file_create_errhandler.c b/ompi/mpi/c/file_create_errhandler.c index a839ec3a9fa..4041d00b658 100644 --- a/ompi/mpi/c/file_create_errhandler.c +++ b/ompi/mpi/c/file_create_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,31 +40,31 @@ static const char FUNC_NAME[] = "MPI_File_create_errhandler"; -int MPI_File_create_errhandler(MPI_File_errhandler_function *function, - MPI_Errhandler *errhandler) { - int err = MPI_SUCCESS; +int MPI_File_create_errhandler (MPI_File_errhandler_function *function, + MPI_Errhandler *errhandler) { + int err = MPI_SUCCESS; - /* Error checking */ + /* Error checking */ - if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == function || - NULL == errhandler) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == function || + NULL == errhandler) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, "MPI_File_create_errhandler"); + } } - } - /* Create and cache the errhandler. Sets a refcount of 1. */ + /* Create and cache the errhandler. Sets a refcount of 1. */ - *errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, - (ompi_errhandler_generic_handler_fn_t*) function, - OMPI_ERRHANDLER_LANG_C); - if (NULL == *errhandler) { - err = MPI_ERR_INTERN; - } + *errhandler = + ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, + (ompi_errhandler_generic_handler_fn_t*) function, + OMPI_ERRHANDLER_LANG_C); + if (NULL == *errhandler) { + err = MPI_ERR_INTERN; + } - OMPI_ERRHANDLER_NOHANDLE_RETURN(err, MPI_ERR_INTERN, - "MPI_File_create_errhandler"); + OMPI_ERRHANDLER_NOHANDLE_RETURN(err, MPI_ERR_INTERN, + "MPI_File_create_errhandler"); } diff --git a/ompi/mpi/c/file_get_errhandler.c b/ompi/mpi/c/file_get_errhandler.c index b78de9ce292..8836ec16b62 100644 --- a/ompi/mpi/c/file_get_errhandler.c +++ b/ompi/mpi/c/file_get_errhandler.c @@ -15,6 +15,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,6 +44,8 @@ static const char FUNC_NAME[] = "MPI_File_get_errhandler"; int MPI_File_get_errhandler( MPI_File file, MPI_Errhandler *errhandler) { + int ret = MPI_SUCCESS; + /* Error checking */ if (MPI_PARAM_CHECK) { @@ -67,7 +71,10 @@ int MPI_File_get_errhandler( MPI_File file, MPI_Errhandler *errhandler) OBJ_RETAIN(file->error_handler); OPAL_THREAD_UNLOCK(&file->f_lock); + /* make sure the infrastructure is initialized */ + ret = ompi_mpi_instance_retain (); + /* All done */ - return MPI_SUCCESS; + return ret; } diff --git a/ompi/mpi/c/file_get_info.c b/ompi/mpi/c/file_get_info.c index 8d1411dedc8..8eabe8a0217 100644 --- a/ompi/mpi/c/file_get_info.c +++ b/ompi/mpi/c/file_get_info.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -13,6 +14,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2019 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,20 +73,20 @@ int MPI_File_get_info(MPI_File fh, MPI_Info *info_used) } if (NULL == fh->super.s_info) { -/* - * Setup any defaults if MPI_Win_set_info was never called - */ + /* + * Setup any defaults if MPI_Win_set_info was never called + */ opal_infosubscribe_change_info(&fh->super, &MPI_INFO_NULL->super); } - (*info_used) = OBJ_NEW(ompi_info_t); - if (NULL == (*info_used)) { + *info_used = ompi_info_allocate (); + if (NULL == *info_used) { return OMPI_ERRHANDLER_INVOKE(fh, MPI_ERR_NO_MEM, FUNC_NAME); } opal_info_t *opal_info_used = &(*info_used)->super; - opal_info_dup_mpistandard(fh->super.s_info, &opal_info_used); + opal_info_dup(fh->super.s_info, &opal_info_used); return OMPI_SUCCESS; } diff --git a/ompi/mpiext/pcollreq/c/gather_init.c b/ompi/mpi/c/gather_init.c similarity index 94% rename from ompi/mpiext/pcollreq/c/gather_init.c rename to ompi/mpi/c/gather_init.c index 5b9b75b82b9..9932483e512 100644 --- a/ompi/mpiext/pcollreq/c/gather_init.c +++ b/ompi/mpi/c/gather_init.c @@ -33,22 +33,21 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Gather_init = PMPIX_Gather_init +#pragma weak MPI_Gather_init = PMPI_Gather_init #endif -#define MPIX_Gather_init PMPIX_Gather_init +#define MPI_Gather_init PMPI_Gather_init #endif -static const char FUNC_NAME[] = "MPIX_Gather_init"; +static const char FUNC_NAME[] = "MPI_Gather_init"; -int MPIX_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int root, MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpiext/pcollreq/c/gatherv_init.c b/ompi/mpi/c/gatherv_init.c similarity index 94% rename from ompi/mpiext/pcollreq/c/gatherv_init.c rename to ompi/mpi/c/gatherv_init.c index 30291ee8efe..7a17f7ae726 100644 --- a/ompi/mpiext/pcollreq/c/gatherv_init.c +++ b/ompi/mpi/c/gatherv_init.c @@ -30,24 +30,23 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Gatherv_init = PMPIX_Gatherv_init +#pragma weak MPI_Gatherv_init = PMPI_Gatherv_init #endif -#define MPIX_Gatherv_init PMPIX_Gatherv_init +#define MPI_Gatherv_init PMPI_Gatherv_init #endif -static const char FUNC_NAME[] = "MPIX_Gatherv_init"; +static const char FUNC_NAME[] = "MPI_Gatherv_init"; -int MPIX_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], const int displs[], - MPI_Datatype recvtype, int root, MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], + MPI_Datatype recvtype, int root, MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int i, size, err; diff --git a/ompi/mpi/c/group_from_session_pset.c b/ompi/mpi/c/group_from_session_pset.c new file mode 100644 index 00000000000..4ee2dfe0451 --- /dev/null +++ b/ompi/mpi/c/group_from_session_pset.c @@ -0,0 +1,42 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Group_from_session_pset = PMPI_Group_from_session_pset +#endif +#define MPI_Group_from_session_pset PMPI_Group_from_session_pset +#endif + +static const char FUNC_NAME[] = "MPI_Group_from_session_pset"; + + +int MPI_Group_from_session_pset (MPI_Session session, const char *pset_name, MPI_Group *newgroup) +{ + int rc; + + if ( MPI_PARAM_CHECK ) { + if (NULL == session || NULL == pset_name || NULL == newgroup) { + return OMPI_ERRHANDLER_INVOKE(session, MPI_ERR_ARG, FUNC_NAME); + } + } + + rc = ompi_group_from_pset (session, pset_name, newgroup); + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, session, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/improbe.c b/ompi/mpi/c/improbe.c index 22831c4bad3..d0ea3f708f6 100644 --- a/ompi/mpi/c/improbe.c +++ b/ompi/mpi/c/improbe.c @@ -1,10 +1,10 @@ /* * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2020 The University of Tennessee and The University + * Copyright (c) 2020-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ @@ -61,7 +61,7 @@ int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag, if (MPI_PROC_NULL == source) { if (MPI_STATUS_IGNORE != status) { - *status = ompi_request_empty.req_status; + OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false); /* Per MPI-1, the MPI_ERROR field is not defined for single-completion calls */ MEMCHECKER( diff --git a/ompi/mpi/c/info_c2f.c b/ompi/mpi/c/info_c2f.c index 55b795016b8..372c8abe766 100644 --- a/ompi/mpi/c/info_c2f.c +++ b/ompi/mpi/c/info_c2f.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,14 +37,12 @@ #define MPI_Info_c2f PMPI_Info_c2f #endif -static const char FUNC_NAME[] = "MPI_Info_c2f"; +/* static const char FUNC_NAME[] = "MPI_Info_c2f"; */ MPI_Fint MPI_Info_c2f(MPI_Info info) { if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == info || ompi_info_is_freed(info)) { return OMPI_INT_2_FINT(-1); } diff --git a/ompi/mpi/c/info_create.c b/ompi/mpi/c/info_create.c index a41433e116d..c6f7ee18f26 100644 --- a/ompi/mpi/c/info_create.c +++ b/ompi/mpi/c/info_create.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,20 +53,13 @@ static const char FUNC_NAME[] = "MPI_Info_create"; int MPI_Info_create(MPI_Info *info) { if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, FUNC_NAME); } } - /* - * Call the object create function. This function not only - * allocates the space for MPI_Info, but also calls all the - * relevant init functions. Should I check if the fortran - * handle is valid - */ - (*info) = OBJ_NEW(ompi_info_t); + *info = ompi_info_allocate (); if (NULL == (*info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); diff --git a/ompi/mpi/c/info_delete.c b/ompi/mpi/c/info_delete.c index e1f53539e0f..3fcf5256782 100644 --- a/ompi/mpi/c/info_delete.c +++ b/ompi/mpi/c/info_delete.c @@ -15,6 +15,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -61,7 +63,6 @@ int MPI_Info_delete(MPI_Info info, const char *key) { * This function merely deletes the (key,val) pair in info */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_dup.c b/ompi/mpi/c/info_dup.c index 7c738d7b01c..bbe320d3955 100644 --- a/ompi/mpi/c/info_dup.c +++ b/ompi/mpi/c/info_dup.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -65,7 +68,6 @@ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) { */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || NULL == newinfo || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_f2c.c b/ompi/mpi/c/info_f2c.c index 8c6383a20e6..4e7095ce26d 100644 --- a/ompi/mpi/c/info_f2c.c +++ b/ompi/mpi/c/info_f2c.c @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -13,6 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,16 +50,25 @@ MPI_Info MPI_Info_f2c(MPI_Fint info) { int info_index = OMPI_FINT_2_INT(info); - /* check the arguments */ + /* Per MPI-2:4.12.4, do not invoke an error handler if we get an + invalid fortran handle. If we get an invalid fortran handle, + return an invalid C handle. */ + /* + * Deal with special pre-defined cases for MPI 4.0 + */ + + if (info_index == 0) { + return MPI_INFO_NULL; + } + + if (info_index == 1) { + return MPI_INFO_ENV; + } if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } - /* Per MPI-2:4.12.4, do not invoke an error handler if we get an - invalid fortran handle. If we get an invalid fortran handle, - return an invalid C handle. */ - if (info_index < 0 || info_index >= opal_pointer_array_get_size(&ompi_info_f_to_c_table)) { diff --git a/ompi/mpi/c/info_free.c b/ompi/mpi/c/info_free.c index 32220c84c04..5e82d86172e 100644 --- a/ompi/mpi/c/info_free.c +++ b/ompi/mpi/c/info_free.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology diff --git a/ompi/mpi/c/info_get.c b/ompi/mpi/c/info_get.c index 4b4da55f60a..563c6cd1f83 100644 --- a/ompi/mpi/c/info_get.c +++ b/ompi/mpi/c/info_get.c @@ -15,6 +15,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,7 +76,6 @@ int MPI_Info_get(MPI_Info info, const char *key, int valuelen, * necessary structures. */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_get_nkeys.c b/ompi/mpi/c/info_get_nkeys.c index fe79178af95..850dadeb5ca 100644 --- a/ompi/mpi/c/info_get_nkeys.c +++ b/ompi/mpi/c/info_get_nkeys.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,7 +58,6 @@ int MPI_Info_get_nkeys(MPI_Info info, int *nkeys) int err; if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_get_string.c b/ompi/mpi/c/info_get_string.c new file mode 100644 index 00000000000..e13baaefcc8 --- /dev/null +++ b/ompi/mpi/c/info_get_string.c @@ -0,0 +1,119 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/info/info.h" +#include "opal/util/string_copy.h" +#include +#include + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Info_get_string = PMPI_Info_get_string +#endif +#define MPI_Info_get_string PMPI_Info_get_string +#endif + +static const char FUNC_NAME[] = "MPI_Info_get_string"; + +/** + * MPI_Info_get_string - Get a (key, value) pair from an 'MPI_Info' object + * + * @param info info object (handle) + * @param key null-terminated character string of the index key + * @param buflen maximum length of 'value' (integer) + * @param value null-terminated character string of the value + * @param flag true (1) if 'key' defined on 'info', false (0) if not + * (logical) + * + * @retval MPI_SUCCESS + * @retval MPI_ERR_ARG + * @retval MPI_ERR_INFO + * @retval MPI_ERR_INFO_KEY + * @retval MPI_ERR_INFO_VALUE + * + */ +int MPI_Info_get_string(MPI_Info info, const char *key, int *buflen, + char *value, int *flag) +{ + int err; + int key_length; + opal_cstring_t *info_str; + + /* + * Simple function. All we need to do is search for the value + * having the "key" associated with it and then populate the + * necessary structures. + */ + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == info || MPI_INFO_NULL == info || + ompi_info_is_freed(info)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, + FUNC_NAME); + } + + key_length = (key) ? (int)strlen (key) : 0; + if ((NULL == key) || (0 == key_length) || + (MPI_MAX_INFO_KEY <= key_length)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO_KEY, + FUNC_NAME); + } + if (NULL == buflen) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, + FUNC_NAME); + } + if ((NULL == value) && *buflen) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO_VALUE, + FUNC_NAME); + } + if (NULL == flag) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, + FUNC_NAME); + } + } + + if (0 == *buflen) { + err = ompi_info_get_valuelen(info, key, buflen, flag); + if (1 == *flag) { + *buflen += 1; /* add on for the \0, see MPI 4.0 Standard */ + } + } else { + err = ompi_info_get(info, key, &info_str, flag); + if (*flag) { + opal_string_copy(value, info_str->string, *buflen); + *buflen = info_str->length + 1; /* add on for the \0, see MPI 4.0 Standard */ + OBJ_RELEASE(info_str); + } + } + + OMPI_ERRHANDLER_NOHANDLE_RETURN(err, err, FUNC_NAME); +} diff --git a/ompi/mpi/c/info_get_valuelen.c b/ompi/mpi/c/info_get_valuelen.c index 575de6d407d..e40d3c110f8 100644 --- a/ompi/mpi/c/info_get_valuelen.c +++ b/ompi/mpi/c/info_get_valuelen.c @@ -14,6 +14,8 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,7 +72,6 @@ int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, * having the "key" associated with it and return the length */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_set.c b/ompi/mpi/c/info_set.c index bb3c5c6198c..13843ae009c 100644 --- a/ompi/mpi/c/info_set.c +++ b/ompi/mpi/c/info_set.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -13,6 +14,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,7 +80,6 @@ int MPI_Info_set(MPI_Info info, const char *key, const char *value) */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_INFO, @@ -99,17 +101,6 @@ int MPI_Info_set(MPI_Info info, const char *key, const char *value) } } -// An extra warning condition is a key that uses our reserved prefix "__IN_". -// That one is used internally to deal with the dynamic nature the key/val -// pairs where we have callbacks that modify the val, and the MPI standard -// wants the get_info call to give back the original setting rather than -// the callback-modified setting. So if a user directly used a key __IN_foo -// it would confuse our accounting slightly. - if (0 == strncmp(key, OPAL_INFO_SAVE_PREFIX, strlen(OPAL_INFO_SAVE_PREFIX))) { - opal_show_help("help-mpi-api.txt", "info-set-with-reserved-prefix", true, - key, OPAL_INFO_SAVE_PREFIX); - } - /* * If all is right with the arguments, then call the back-end * allocator. diff --git a/ompi/mpi/c/init_thread.c b/ompi/mpi/c/init_thread.c index 2cdad8ff3af..95ca9df25e2 100644 --- a/ompi/mpi/c/init_thread.c +++ b/ompi/mpi/c/init_thread.c @@ -48,6 +48,7 @@ int MPI_Init_thread(int *argc, char ***argv, int required, int *provided) { int err, safe_required = MPI_THREAD_SERIALIZED; + char *env; ompi_hook_base_mpi_init_thread_top(argc, argv, required, provided); @@ -56,7 +57,13 @@ int MPI_Init_thread(int *argc, char ***argv, int required, */ if( (MPI_THREAD_SINGLE == required) || (MPI_THREAD_SERIALIZED == required) || (MPI_THREAD_FUNNELED == required) || (MPI_THREAD_MULTIPLE == required) ) { - safe_required = required; + + if (NULL != (env = getenv("OMPI_MPI_THREAD_LEVEL"))) { + safe_required = atoi(env); + } + else { + safe_required = required; + } } *provided = safe_required; diff --git a/ompi/mpi/c/intercomm_create.c b/ompi/mpi/c/intercomm_create.c index 5c582c7c0e5..0e8a903032a 100644 --- a/ompi/mpi/c/intercomm_create.c +++ b/ompi/mpi/c/intercomm_create.c @@ -17,6 +17,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,14 +50,7 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm bridge_comm, int remote_leader, int tag, MPI_Comm *newintercomm) { - int local_size=0, local_rank=0; - int lleader=0, rleader=0; - ompi_communicator_t *newcomp=NULL; - struct ompi_proc_t **rprocs=NULL; - int rc=0, rsize=0; - ompi_proc_t **proc_list=NULL; - int j; - ompi_group_t *new_group_pointer; + int rc; MEMCHECKER( memchecker_comm(local_comm); @@ -89,169 +84,9 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, */ #endif - local_size = ompi_comm_size ( local_comm ); - local_rank = ompi_comm_rank ( local_comm ); - lleader = local_leader; - rleader = remote_leader; - - if ( MPI_PARAM_CHECK ) { - if ( (0 > local_leader) || (local_leader >= local_size) ) - return OMPI_ERRHANDLER_INVOKE ( local_comm, MPI_ERR_ARG, - FUNC_NAME); - - /* remember that the remote_leader and bridge_comm arguments - just have to be valid at the local_leader */ - if ( local_rank == local_leader ) { - if ( ompi_comm_invalid ( bridge_comm ) || - (bridge_comm->c_flags & OMPI_COMM_INTER) ) { - return OMPI_ERRHANDLER_INVOKE ( local_comm, MPI_ERR_COMM, - FUNC_NAME); - } - if ( (remote_leader < 0) || (remote_leader >= ompi_comm_size(bridge_comm))) { - return OMPI_ERRHANDLER_INVOKE ( local_comm, MPI_ERR_ARG, - FUNC_NAME); - } - } /* if ( local_rank == local_leader ) */ - } - - if ( local_rank == local_leader ) { - MPI_Request req; - - /* local leader exchange group sizes lists */ - rc = MCA_PML_CALL(irecv(&rsize, 1, MPI_INT, rleader, tag, bridge_comm, - &req)); - if ( rc != MPI_SUCCESS ) { -#if OPAL_ENABLE_FT_MPI - if( MPI_ERR_PROC_FAILED == rc ) { - rsize = 0; - goto skip_handshake; - } -#endif /* OPAL_ENABLE_FT_MPI */ - goto err_exit; - } - rc = MCA_PML_CALL(send (&local_size, 1, MPI_INT, rleader, tag, - MCA_PML_BASE_SEND_STANDARD, bridge_comm)); - if ( rc != MPI_SUCCESS ) { -#if OPAL_ENABLE_FT_MPI - if( MPI_ERR_PROC_FAILED == rc ) { - rsize = 0; - goto skip_handshake; - } -#endif /* OPAL_ENABLE_FT_MPI */ - goto err_exit; - } -#if OPAL_ENABLE_FT_MPI - skip_handshake: /* nothing special */; -#endif /* OPAL_ENABLE_FT_MPI */ - rc = ompi_request_wait( &req, MPI_STATUS_IGNORE); - if ( rc != MPI_SUCCESS ) { - rsize = 0; /* participate in the collective and then done */ - } - } - - /* bcast size and list of remote processes to all processes in local_comm */ - rc = local_comm->c_coll->coll_bcast ( &rsize, 1, MPI_INT, lleader, - local_comm, - local_comm->c_coll->coll_bcast_module); - if ( rc != MPI_SUCCESS ) { -#if OPAL_ENABLE_FT_MPI - if ( local_rank != local_leader ) { - goto err_exit; - } - /* the leaders must go in the ger_rprocs in order to avoid deadlocks */ -#else - goto err_exit; -#endif /* OPAL_ENABLE_FT_MPI */ - } - - rc = ompi_comm_get_rprocs( local_comm, bridge_comm, lleader, - remote_leader, tag, rsize, &rprocs ); - if ( OMPI_SUCCESS != rc ) { - goto err_exit; - } - - if ( MPI_PARAM_CHECK ) { - if(OMPI_GROUP_IS_DENSE(local_comm->c_local_group)) { - rc = ompi_comm_overlapping_groups(local_comm->c_local_group->grp_proc_count, - local_comm->c_local_group->grp_proc_pointers, - rsize, - rprocs); - } - else { - proc_list = (ompi_proc_t **) calloc (local_comm->c_local_group->grp_proc_count, - sizeof (ompi_proc_t *)); - for(j=0 ; jc_local_group->grp_proc_count ; j++) { - proc_list[j] = ompi_group_peer_lookup(local_comm->c_local_group,j); - } - rc = ompi_comm_overlapping_groups(local_comm->c_local_group->grp_proc_count, - proc_list, - rsize, - rprocs); - } - if ( OMPI_SUCCESS != rc ) { - goto err_exit; - } - } - new_group_pointer = ompi_group_allocate(rsize); - if( NULL == new_group_pointer ) { - rc = MPI_ERR_GROUP; - goto err_exit; - } - - /* put group elements in the list */ - for (j = 0; j < rsize; j++) { - new_group_pointer->grp_proc_pointers[j] = rprocs[j]; - OBJ_RETAIN(rprocs[j]); - } - - rc = ompi_comm_set ( &newcomp, /* new comm */ - local_comm, /* old comm */ - local_comm->c_local_group->grp_proc_count, /* local_size */ - NULL, /* local_procs*/ - rsize, /* remote_size */ - NULL, /* remote_procs */ - NULL, /* attrs */ - local_comm->error_handler, /* error handler*/ - false, /* dont copy the topo */ - local_comm->c_local_group, /* local group */ - new_group_pointer /* remote group */ - ); - - if ( MPI_SUCCESS != rc ) { - goto err_exit; - } - - OBJ_RELEASE(new_group_pointer); - new_group_pointer = MPI_GROUP_NULL; - - /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid (newcomp, local_comm, bridge_comm, &lleader, - &rleader, false, OMPI_COMM_CID_INTRA_BRIDGE); - if ( MPI_SUCCESS != rc ) { - goto err_exit; - } - - /* activate comm and init coll-module */ - rc = ompi_comm_activate (&newcomp, local_comm, bridge_comm, &lleader, &rleader, - false, OMPI_COMM_CID_INTRA_BRIDGE); - if ( MPI_SUCCESS != rc ) { - goto err_exit; - } - - err_exit: - if ( NULL != rprocs ) { - free ( rprocs ); - } - if ( NULL != proc_list ) { - free ( proc_list ); - } - if ( OMPI_SUCCESS != rc ) { - *newintercomm = MPI_COMM_NULL; - return OMPI_ERRHANDLER_INVOKE(local_comm, rc, - FUNC_NAME); - } + rc = ompi_intercomm_create (local_comm, local_leader, bridge_comm, remote_leader, tag, + newintercomm); - *newintercomm = newcomp; - return MPI_SUCCESS; + OMPI_ERRHANDLER_RETURN (rc, local_comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/intercomm_create_from_groups.c b/ompi/mpi/c/intercomm_create_from_groups.c new file mode 100644 index 00000000000..a11a936b7d9 --- /dev/null +++ b/ompi/mpi/c/intercomm_create_from_groups.c @@ -0,0 +1,95 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2009 University of Houston. All rights reserved. + * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/communicator/communicator.h" +#include "ompi/request/request.h" +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Intercomm_create_from_groups = PMPI_Intercomm_create_from_groups +#endif +#define MPI_Intercomm_create_from_groups PMPI_Intercomm_create_from_groups +#endif + +static const char FUNC_NAME[] = "MPI_Intercomm_create_from_groups"; + + +int MPI_Intercomm_create_from_groups (MPI_Group local_group, int local_leader, MPI_Group remote_group, + int remote_leader, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newintercomm) +{ + int rc; + + MEMCHECKER( + memchecker_comm(local_comm); + memchecker_comm(bridge_comm); + ); + + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (NULL == errhandler) { + return MPI_ERR_ARG; + } + + if (NULL == local_group || NULL == remote_group) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_GROUP, FUNC_NAME); + } + if (NULL == info || ompi_info_is_freed(info)) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_INFO, FUNC_NAME); + } + if (NULL == tag) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_TAG, FUNC_NAME); + } + if (NULL == newintercomm) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_ARG, FUNC_NAME); + } + } + + rc = ompi_intercomm_create_from_groups (local_group, local_leader, remote_group, remote_leader, tag, + &info->super, errhandler, newintercomm); + + if (MPI_SUCCESS != rc) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + rc, FUNC_NAME); + } + + return rc; +} + diff --git a/ompi/mpi/c/intercomm_merge.c b/ompi/mpi/c/intercomm_merge.c index 18c458e5ca9..9cb3f31193d 100644 --- a/ompi/mpi/c/intercomm_merge.c +++ b/ompi/mpi/c/intercomm_merge.c @@ -18,6 +18,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,13 +50,12 @@ static const char FUNC_NAME[] = "MPI_Intercomm_merge"; int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newcomm) { - ompi_communicator_t *newcomp=MPI_COMM_NULL; + ompi_communicator_t *newcomp = MPI_COMM_NULL; ompi_proc_t **procs=NULL; + int first, thigh = high; int local_size, remote_size; - int first; int total_size; int rc=MPI_SUCCESS; - int thigh = high; ompi_group_t *new_group_pointer; MEMCHECKER( @@ -96,8 +97,7 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, first = ompi_comm_determine_first ( intercomm, thigh ); if ( MPI_UNDEFINED == first ) { - rc = MPI_ERR_INTERN; - goto exit; + return OMPI_ERRHANDLER_INVOKE(intercomm, MPI_ERR_INTERN, FUNC_NAME); } if ( first ) { @@ -115,10 +115,9 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, NULL, /* remote_procs */ NULL, /* attrs */ intercomm->error_handler, /* error handler*/ - false, /* don't copy the topo */ new_group_pointer, /* local group */ - NULL /* remote group */ - ); + NULL, /* remote group */ + 0); if ( MPI_SUCCESS != rc ) { goto exit; } @@ -141,6 +140,7 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, } exit: + if ( NULL != procs ) { free ( procs ); } @@ -155,4 +155,3 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, *newcomm = newcomp; return MPI_SUCCESS; } - diff --git a/ompi/mpi/c/iprobe.c b/ompi/mpi/c/iprobe.c index 7cea64d8bde..c156e704f86 100644 --- a/ompi/mpi/c/iprobe.c +++ b/ompi/mpi/c/iprobe.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -67,7 +67,7 @@ int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status if (MPI_PROC_NULL == source) { *flag = 1; if (MPI_STATUS_IGNORE != status) { - *status = ompi_request_empty.req_status; + OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false); /* * Per MPI-1, the MPI_ERROR field is not defined for single-completion calls */ diff --git a/ompi/mpi/c/ireduce.c b/ompi/mpi/c/ireduce.c index 34e773f43a9..f295d426d68 100644 --- a/ompi/mpi/c/ireduce.c +++ b/ompi/mpi/c/ireduce.c @@ -100,7 +100,8 @@ int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, free(msg); return ret; } else if ((ompi_comm_rank(comm) != root && MPI_IN_PLACE == sendbuf) || - (ompi_comm_rank(comm) == root && ((MPI_IN_PLACE == recvbuf) || (sendbuf == recvbuf)))) { + (ompi_comm_rank(comm) == root && ((MPI_IN_PLACE == recvbuf) || + ((sendbuf == recvbuf) && (0 != count))))) { err = MPI_ERR_ARG; } else { OMPI_CHECK_DATATYPE_FOR_SEND(err, datatype, count); diff --git a/ompi/mpi/c/isendrecv.c b/ompi/mpi/c/isendrecv.c new file mode 100644 index 00000000000..3d68f388faf --- /dev/null +++ b/ompi/mpi/c/isendrecv.c @@ -0,0 +1,193 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2021 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/communicator/comm_request.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/request/request.h" +#include "ompi/memchecker.h" +#include "ompi/runtime/ompi_spc.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Isendrecv = PMPI_Isendrecv +#endif +#define MPI_Isendrecv PMPI_Isendrecv +#endif + +static const char FUNC_NAME[] = "MPI_Isendrecv"; + +struct ompi_isendrecv_context_t { + opal_object_t super; + int nreqs; + int source; + ompi_request_t *subreq[2]; +}; + +typedef struct ompi_isendrecv_context_t ompi_isendrecv_context_t; +OBJ_CLASS_INSTANCE(ompi_isendrecv_context_t, opal_object_t, NULL, NULL); + +static int ompi_isendrecv_complete_func (ompi_comm_request_t *request) +{ + ompi_isendrecv_context_t *context = + (ompi_isendrecv_context_t *) request->context; + + /* + * Copy the status from the receive side of the sendrecv request? + * But what if the send failed? + * + * Probably need to bring up in the MPI forum. + */ + + if (MPI_PROC_NULL != context->source) { + OMPI_COPY_STATUS(&request->super.req_status, + context->subreq[0]->req_status, false); + } else { + OMPI_COPY_STATUS(&request->super.req_status, + ompi_request_empty.req_status, false); + } + + if(NULL != context->subreq[0]) { + ompi_request_free(&context->subreq[0]); + } + if(NULL != context->subreq[1]) { + ompi_request_free(&context->subreq[1]); + } + + return OMPI_SUCCESS; +} + + +int MPI_Isendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + int dest, int sendtag, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, + MPI_Comm comm, MPI_Request *request) +{ + ompi_isendrecv_context_t *context = NULL; + ompi_comm_request_t *crequest; + int rc = MPI_SUCCESS; + int nreqs = 0; + uint32_t flags; + + SPC_RECORD(OMPI_SPC_ISENDRECV, 1); + + MEMCHECKER( + memchecker_datatype(sendtype); + memchecker_datatype(recvtype); + memchecker_call(&opal_memchecker_base_isdefined, sendbuf, sendcount, sendtype); + memchecker_comm(comm); + ); + + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_CHECK_DATATYPE_FOR_SEND(rc, sendtype, sendcount); + OMPI_CHECK_DATATYPE_FOR_RECV(rc, recvtype, recvcount); + OMPI_CHECK_USER_BUFFER(rc, sendbuf, sendtype, sendcount); + OMPI_CHECK_USER_BUFFER(rc, recvbuf, recvtype, recvcount); + + if (ompi_comm_invalid(comm)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, FUNC_NAME); + } else if (dest != MPI_PROC_NULL && ompi_comm_peer_invalid(comm, dest)) { + rc = MPI_ERR_RANK; + } else if (sendtag < 0 || sendtag > mca_pml.pml_max_tag) { + rc = MPI_ERR_TAG; + } else if (source != MPI_PROC_NULL && source != MPI_ANY_SOURCE && ompi_comm_peer_invalid(comm, source)) { + rc = MPI_ERR_RANK; + } else if (((recvtag < 0) && (recvtag != MPI_ANY_TAG)) || (recvtag > mca_pml.pml_max_tag)) { + rc = MPI_ERR_TAG; + } else if (request == NULL) { + rc = MPI_ERR_REQUEST; + } + + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + crequest = ompi_comm_request_get (); + if (NULL == crequest) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + context = OBJ_NEW(ompi_isendrecv_context_t); + if (NULL == context) { + ompi_comm_request_return (crequest); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + crequest->context = &context->super; + context->subreq[0] = NULL; + context->subreq[1] = NULL; + context->source = source; + + if (source != MPI_PROC_NULL) { /* post recv */ + rc = MCA_PML_CALL(irecv(recvbuf, recvcount, recvtype, + source, recvtag, comm, &context->subreq[nreqs++])); + if (MPI_SUCCESS != rc) { + OBJ_RELEASE(context); + ompi_comm_request_return (crequest); + } + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + if (dest != MPI_PROC_NULL) { /* send */ + rc = MCA_PML_CALL(isend(sendbuf, sendcount, sendtype, dest, + sendtag, MCA_PML_BASE_SEND_STANDARD, comm, &context->subreq[nreqs++])); + if (MPI_SUCCESS != rc) { + OBJ_RELEASE(context); + ompi_comm_request_return (crequest); + } + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + /* + * schedule the operation + */ + + context->nreqs = nreqs; + assert(nreqs <= 2); + + flags = OMPI_COMM_REQ_FLAG_RETAIN_SUBREQ; + + rc = ompi_comm_request_schedule_append_w_flags(crequest, ompi_isendrecv_complete_func, + context->subreq, nreqs, flags); + if (MPI_SUCCESS != rc) { + OBJ_RELEASE(context); + ompi_comm_request_return (crequest); + } + + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + + /* kick off the request */ + + ompi_comm_request_start (crequest); + *request = &crequest->super; + + return rc; +} diff --git a/ompi/mpi/c/isendrecv_replace.c b/ompi/mpi/c/isendrecv_replace.c new file mode 100644 index 00000000000..8a736b2a239 --- /dev/null +++ b/ompi/mpi/c/isendrecv_replace.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/communicator/comm_request.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_convertor.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/proc/proc.h" +#include "ompi/memchecker.h" +#include "ompi/runtime/ompi_spc.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Isendrecv_replace = PMPI_Isendrecv_replace +#endif +#define MPI_Isendrecv_replace PMPI_Isendrecv_replace +#endif + +static const char FUNC_NAME[] = "MPI_Isendrecv_replace"; + +struct ompi_isendrecv_replace_context_t { + opal_object_t super; + opal_convertor_t convertor; + size_t packed_size; + unsigned char packed_data[2048]; + struct iovec iov; + int nreqs; + int source; + ompi_request_t *subreq[2]; +}; + +typedef struct ompi_isendrecv_replace_context_t ompi_isendrecv_replace_context_t; + +static void ompi_isendrecv_context_constructor(ompi_isendrecv_replace_context_t *context) +{ + context->packed_size = 0; + OBJ_CONSTRUCT(&context->convertor, opal_convertor_t); +} + +static void ompi_isendrecv_context_destructor(ompi_isendrecv_replace_context_t *context) +{ + if (context->packed_size > sizeof(context->packed_data)) { + PMPI_Free_mem(context->iov.iov_base); + } + OBJ_DESTRUCT(&context->convertor); +} + +OBJ_CLASS_INSTANCE(ompi_isendrecv_replace_context_t, + opal_object_t, + ompi_isendrecv_context_constructor, + ompi_isendrecv_context_destructor); + +static int ompi_isendrecv_replace_complete_func (ompi_comm_request_t *request) +{ + ompi_isendrecv_replace_context_t *context = + (ompi_isendrecv_replace_context_t *) request->context; + + /* + * Copy the status from the receive side of the sendrecv request? + * But what if the send failed? + * + * Probably need to bring up in the MPI forum. + */ + + if (MPI_PROC_NULL != context->source) { + OMPI_COPY_STATUS(&request->super.req_status, + context->subreq[0]->req_status, false); + } else { + OMPI_COPY_STATUS(&request->super.req_status, + ompi_request_empty.req_status, false); + } + + if(NULL != context->subreq[0]) { + ompi_request_free(&context->subreq[0]); + } + if(NULL != context->subreq[1]) { + ompi_request_free(&context->subreq[1]); + } + + return OMPI_SUCCESS; +} + + +int MPI_Isendrecv_replace(void * buf, int count, MPI_Datatype datatype, + int dest, int sendtag, int source, int recvtag, + MPI_Comm comm, MPI_Request *request) + +{ + int rc = MPI_SUCCESS; + size_t max_data; + uint32_t iov_count; + ompi_comm_request_t *crequest = NULL; + ompi_isendrecv_replace_context_t *context = NULL; + int nreqs = 0; + uint32_t flags; + + SPC_RECORD(OMPI_SPC_ISENDRECV_REPLACE, 1); + + MEMCHECKER( + memchecker_datatype(datatype); + memchecker_call(&opal_memchecker_base_isdefined, buf, count, datatype); + memchecker_comm(comm); + ); + + if ( MPI_PARAM_CHECK ) { + rc = MPI_SUCCESS; + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_CHECK_DATATYPE_FOR_RECV(rc, datatype, count); + + if (ompi_comm_invalid(comm)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, FUNC_NAME); + } else if (dest != MPI_PROC_NULL && ompi_comm_peer_invalid(comm, dest)) { + rc = MPI_ERR_RANK; + } else if (sendtag < 0 || sendtag > mca_pml.pml_max_tag) { + rc = MPI_ERR_TAG; + } else if (source != MPI_PROC_NULL && source != MPI_ANY_SOURCE && ompi_comm_peer_invalid(comm, source)) { + rc = MPI_ERR_RANK; + } else if (((recvtag < 0) && (recvtag != MPI_ANY_TAG)) || (recvtag > mca_pml.pml_max_tag)) { + rc = MPI_ERR_TAG; + } else if (request == NULL) { + rc = MPI_ERR_REQUEST; + } + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + /* simple case */ + if ( source == MPI_PROC_NULL || dest == MPI_PROC_NULL || count == 0 ) { + rc = PMPI_Isendrecv(buf, count, datatype, dest, sendtag, buf, count, datatype, source, recvtag, comm, request); + return rc; + } + + ompi_proc_t* proc = ompi_comm_peer_lookup(comm, dest); + if(proc == NULL) { + rc = MPI_ERR_RANK; + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); + } + + crequest = ompi_comm_request_get (); + if (NULL == crequest) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + context = OBJ_NEW(ompi_isendrecv_replace_context_t); + if (NULL == context) { + ompi_comm_request_return (crequest); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + context->iov.iov_base = context->packed_data; + context->iov.iov_len = sizeof(context->packed_data); + + crequest->context = &context->super; + context->subreq[0] = NULL; + context->subreq[1] = NULL; + context->source = source; + + /* initialize convertor to unpack recv buffer */ + OBJ_CONSTRUCT(&context->convertor, opal_convertor_t); + opal_convertor_copy_and_prepare_for_send( proc->super.proc_convertor, &(datatype->super), + count, buf, 0, &context->convertor ); + + /* setup a buffer for recv */ + opal_convertor_get_packed_size( &context->convertor, &context->packed_size ); + if( context->packed_size > sizeof(context->packed_data) ) { + rc = PMPI_Alloc_mem(context->packed_size, MPI_INFO_NULL, &context->iov.iov_base); + if(OMPI_SUCCESS != rc) { + OBJ_RELEASE(context); + ompi_comm_request_return (crequest); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); + } + context->iov.iov_len = context->packed_size; + } + max_data = context->packed_size; + iov_count = 1; + rc = opal_convertor_pack(&context->convertor, &context->iov, &iov_count, &max_data); + if ( 0 > rc ) { + OBJ_RELEASE(context); + ompi_comm_request_return (crequest); + rc = MPI_ERR_UNKNOWN; + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); + } + + if (source != MPI_PROC_NULL) { /* post recv */ + rc = MCA_PML_CALL(irecv(buf, count, datatype, + source, recvtag, comm, &context->subreq[nreqs++])); + if (MPI_SUCCESS != rc) { + OBJ_RELEASE(context); + ompi_comm_request_return (crequest); + } + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + if (dest != MPI_PROC_NULL) { /* send */ + rc = MCA_PML_CALL(isend(context->iov.iov_base, context->packed_size, MPI_PACKED, dest, + sendtag, MCA_PML_BASE_SEND_STANDARD, comm, + &context->subreq[nreqs++])); + if (MPI_SUCCESS != rc) { + OBJ_RELEASE(context); + ompi_comm_request_return (crequest); + } + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + /* + * schedule the operation + */ + + context->nreqs = nreqs; + assert(nreqs <= 2); + + flags = OMPI_COMM_REQ_FLAG_RETAIN_SUBREQ; + + rc = ompi_comm_request_schedule_append_w_flags(crequest, + ompi_isendrecv_replace_complete_func, + context->subreq, + nreqs, + flags); + if (MPI_SUCCESS != rc) { + OBJ_RELEASE(context); + ompi_comm_request_return (crequest); + } + + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + + /* kick off the request */ + + ompi_comm_request_start (crequest); + *request = &crequest->super; + + return rc; +} diff --git a/ompi/mpi/c/keyval_create.c b/ompi/mpi/c/keyval_create.c index 1a8a268fed8..a0b7f563b1d 100644 --- a/ompi/mpi/c/keyval_create.c +++ b/ompi/mpi/c/keyval_create.c @@ -56,7 +56,7 @@ int MPI_Keyval_create(MPI_Copy_function *copy_attr_fn, } } - copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*)copy_attr_fn; + copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function) copy_attr_fn; del_fn.attr_communicator_delete_fn = delete_attr_fn; ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, diff --git a/ompi/mpi/c/mprobe.c b/ompi/mpi/c/mprobe.c index a608174ecca..b63e446b91c 100644 --- a/ompi/mpi/c/mprobe.c +++ b/ompi/mpi/c/mprobe.c @@ -1,11 +1,11 @@ /* * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2020 The University of Tennessee and The University + * Copyright (c) 2020-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ @@ -62,7 +62,7 @@ int MPI_Mprobe(int source, int tag, MPI_Comm comm, if (MPI_PROC_NULL == source) { if (MPI_STATUS_IGNORE != status) { - *status = ompi_request_empty.req_status; + OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false); /* Per MPI-1, the MPI_ERROR field is not defined for single-completion calls */ MEMCHECKER( diff --git a/ompi/mpi/c/mrecv.c b/ompi/mpi/c/mrecv.c index 692bf87da8b..2e209332e40 100644 --- a/ompi/mpi/c/mrecv.c +++ b/ompi/mpi/c/mrecv.c @@ -4,7 +4,7 @@ * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2018 The University of Tennessee and The University + * Copyright (c) 2018-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ @@ -68,7 +68,7 @@ int MPI_Mrecv(void *buf, int count, MPI_Datatype type, if (&ompi_message_no_proc.message == *message) { if (MPI_STATUS_IGNORE != status) { - *status = ompi_request_empty.req_status; + OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false); } *message = MPI_MESSAGE_NULL; return MPI_SUCCESS; diff --git a/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c b/ompi/mpi/c/neighbor_allgather_init.c similarity index 90% rename from ompi/mpiext/pcollreq/c/neighbor_allgather_init.c rename to ompi/mpi/c/neighbor_allgather_init.c index b0d5a646d5c..5b6042e7a8d 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c +++ b/ompi/mpi/c/neighbor_allgather_init.c @@ -37,21 +37,20 @@ #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" #include "ompi/runtime/ompi_spc.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Neighbor_allgather_init = PMPIX_Neighbor_allgather_init +#pragma weak MPI_Neighbor_allgather_init = PMPI_Neighbor_allgather_init #endif -#define MPIX_Neighbor_allgather_init PMPIX_Neighbor_allgather_init +#define MPI_Neighbor_allgather_init PMPI_Neighbor_allgather_init #endif -static const char FUNC_NAME[] = "MPIX_Neighbor_allgather_init"; +static const char FUNC_NAME[] = "MPI_Neighbor_allgather_init"; -int MPIX_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c b/ompi/mpi/c/neighbor_allgatherv_init.c similarity index 90% rename from ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c rename to ompi/mpi/c/neighbor_allgatherv_init.c index 23da85e67a3..8c3448bcfc2 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c +++ b/ompi/mpi/c/neighbor_allgatherv_init.c @@ -37,22 +37,21 @@ #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" #include "ompi/runtime/ompi_spc.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Neighbor_allgatherv_init = PMPIX_Neighbor_allgatherv_init +#pragma weak MPI_Neighbor_allgatherv_init = PMPI_Neighbor_allgatherv_init #endif -#define MPIX_Neighbor_allgatherv_init PMPIX_Neighbor_allgatherv_init +#define MPI_Neighbor_allgatherv_init PMPI_Neighbor_allgatherv_init #endif -static const char FUNC_NAME[] = "MPIX_Neighbor_allgatherv_init"; +static const char FUNC_NAME[] = "MPI_Neighbor_allgatherv_init"; -int MPIX_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int recvcounts[], const int displs[], - MPI_Datatype recvtype, MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], + MPI_Datatype recvtype, MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int i, size, err; diff --git a/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c b/ompi/mpi/c/neighbor_alltoall_init.c similarity index 90% rename from ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c rename to ompi/mpi/c/neighbor_alltoall_init.c index 68b72f4b4fe..ee6bb510f63 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c +++ b/ompi/mpi/c/neighbor_alltoall_init.c @@ -37,21 +37,20 @@ #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" #include "ompi/runtime/ompi_spc.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Neighbor_alltoall_init = PMPIX_Neighbor_alltoall_init +#pragma weak MPI_Neighbor_alltoall_init = PMPI_Neighbor_alltoall_init #endif -#define MPIX_Neighbor_alltoall_init PMPIX_Neighbor_alltoall_init +#define MPI_Neighbor_alltoall_init PMPI_Neighbor_alltoall_init #endif -static const char FUNC_NAME[] = "MPIX_Neighbor_alltoall_init"; +static const char FUNC_NAME[] = "MPI_Neighbor_alltoall_init"; -int MPIX_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request) { size_t sendtype_size, recvtype_size; int err; diff --git a/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c b/ompi/mpi/c/neighbor_alltoallv_init.c similarity index 90% rename from ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c rename to ompi/mpi/c/neighbor_alltoallv_init.c index 5287dac46b6..57bba5afcb9 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c +++ b/ompi/mpi/c/neighbor_alltoallv_init.c @@ -36,22 +36,21 @@ #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" #include "ompi/runtime/ompi_spc.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Neighbor_alltoallv_init = PMPIX_Neighbor_alltoallv_init +#pragma weak MPI_Neighbor_alltoallv_init = PMPI_Neighbor_alltoallv_init #endif -#define MPIX_Neighbor_alltoallv_init PMPIX_Neighbor_alltoallv_init +#define MPI_Neighbor_alltoallv_init PMPI_Neighbor_alltoallv_init #endif -static const char FUNC_NAME[] = "MPIX_Neighbor_alltoallv_init"; +static const char FUNC_NAME[] = "MPI_Neighbor_alltoallv_init"; -int MPIX_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], - MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], - const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], + MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], + const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int i, err; int indegree, outdegree; diff --git a/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c b/ompi/mpi/c/neighbor_alltoallw_init.c similarity index 89% rename from ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c rename to ompi/mpi/c/neighbor_alltoallw_init.c index bece4e4a97b..c64836ec308 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c +++ b/ompi/mpi/c/neighbor_alltoallw_init.c @@ -36,22 +36,21 @@ #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" #include "ompi/runtime/ompi_spc.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Neighbor_alltoallw_init = PMPIX_Neighbor_alltoallw_init +#pragma weak MPI_Neighbor_alltoallw_init = PMPI_Neighbor_alltoallw_init #endif -#define MPIX_Neighbor_alltoallw_init PMPIX_Neighbor_alltoallw_init +#define MPI_Neighbor_alltoallw_init PMPI_Neighbor_alltoallw_init #endif -static const char FUNC_NAME[] = "MPIX_Neighbor_alltoallw_init"; +static const char FUNC_NAME[] = "MPI_Neighbor_alltoallw_init"; -int MPIX_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], - const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], + const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], + const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int i, err; int indegree, outdegree; diff --git a/ompi/mpi/c/precv_init.c b/ompi/mpi/c/precv_init.c index 2034e54da09..3279cca0aeb 100644 --- a/ompi/mpi/c/precv_init.c +++ b/ompi/mpi/c/precv_init.c @@ -16,6 +16,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,7 +44,7 @@ static const char FUNC_NAME[] = "MPI_Precv_init"; -int MPI_Precv_init(void* buf, int partitions, MPI_Count count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request) +int MPI_Precv_init(void* buf, int partitions, MPI_Count count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Info info, MPI_Request *request) { int rc; @@ -59,6 +60,6 @@ int MPI_Precv_init(void* buf, int partitions, MPI_Count count, MPI_Datatype data OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } - rc = mca_part.part_precv_init(buf, partitions, count, datatype, source, tag, comm, request); + rc = mca_part.part_precv_init(buf, partitions, count, datatype, source, tag, comm, info, request); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/probe.c b/ompi/mpi/c/probe.c index 6c235315876..3944f04a159 100644 --- a/ompi/mpi/c/probe.c +++ b/ompi/mpi/c/probe.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -67,7 +67,7 @@ int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status) if (MPI_PROC_NULL == source) { if (MPI_STATUS_IGNORE != status) { - *status = ompi_request_empty.req_status; + OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false); /* * Per MPI-1, the MPI_ERROR field is not defined for single-completion calls */ diff --git a/ompi/mpi/c/profile/Makefile.am b/ompi/mpi/c/profile/Makefile.am deleted file mode 100644 index 1d93634a190..00000000000 --- a/ompi/mpi/c/profile/Makefile.am +++ /dev/null @@ -1,468 +0,0 @@ -# -*- makefile.am -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2013 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011 Sandia National Laboratories. All rights reserved. -# Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2012-2013 Inria. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2015-2020 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -# If OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols -# to be replaced by PMPI_*. -# In this directory, we definately need it to be 1. - -AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=1 - -# -# This build needs to go through only if profiling is required. -# Further, this build HAS to go through if profiling is required. -# - -noinst_LTLIBRARIES = libmpi_c_pmpi.la - -nodist_libmpi_c_pmpi_la_SOURCES = \ - pabort.c \ - padd_error_class.c \ - padd_error_code.c \ - padd_error_string.c \ - pallgather.c \ - piallgather.c \ - pallgatherv.c \ - piallgatherv.c \ - palloc_mem.c \ - pallreduce.c \ - piallreduce.c \ - palltoall.c \ - pialltoall.c \ - palltoallv.c \ - pialltoallv.c \ - palltoallw.c \ - pialltoallw.c \ - pattr_delete.c \ - pattr_get.c \ - pattr_put.c \ - pbarrier.c \ - pibarrier.c \ - pbcast.c \ - pibcast.c \ - pbsend.c \ - pbsend_init.c \ - pbuffer_attach.c \ - pbuffer_detach.c \ - pcancel.c \ - pcart_coords.c \ - pcart_create.c \ - pcartdim_get.c \ - pcart_get.c \ - pcart_map.c \ - pcart_rank.c \ - pcart_shift.c \ - pcart_sub.c \ - pclose_port.c \ - pcomm_accept.c \ - pcomm_c2f.c \ - pcomm_call_errhandler.c \ - pcomm_compare.c \ - pcomm_connect.c \ - pcomm_create.c \ - pcomm_create_errhandler.c \ - pcomm_create_group.c \ - pcomm_create_keyval.c \ - pcomm_delete_attr.c \ - pcomm_disconnect.c \ - pcomm_dup.c \ - pcomm_dup_with_info.c \ - pcomm_idup.c \ - pcomm_f2c.c \ - pcomm_free.c \ - pcomm_free_keyval.c \ - pcomm_get_attr.c \ - pcomm_get_errhandler.c \ - pcomm_get_info.c \ - pcomm_get_name.c \ - pcomm_get_parent.c \ - pcomm_group.c \ - pcomm_join.c \ - pcomm_rank.c \ - pcomm_remote_group.c \ - pcomm_remote_size.c \ - pcomm_set_attr.c \ - pcomm_set_info.c \ - pdist_graph_create.c \ - pdist_graph_create_adjacent.c \ - pdist_graph_neighbors.c \ - pdist_graph_neighbors_count.c \ - pcomm_set_errhandler.c \ - pcomm_set_name.c \ - pcomm_size.c \ - pcomm_spawn.c \ - pcomm_spawn_multiple.c \ - pcomm_split.c \ - pcomm_split_type.c \ - pcomm_test_inter.c \ - pcompare_and_swap.c \ - pdims_create.c \ - perrhandler_c2f.c \ - perrhandler_f2c.c \ - perrhandler_free.c \ - perror_class.c \ - perror_string.c \ - pexscan.c \ - pfetch_and_op.c \ - piexscan.c \ - pfile_c2f.c \ - pfile_call_errhandler.c \ - pfile_close.c \ - pfile_create_errhandler.c \ - pfile_delete.c \ - pfile_f2c.c \ - pfile_get_amode.c \ - pfile_get_atomicity.c \ - pfile_get_byte_offset.c \ - pfile_get_errhandler.c \ - pfile_get_group.c \ - pfile_get_info.c \ - pfile_get_position.c \ - pfile_get_position_shared.c \ - pfile_get_size.c \ - pfile_get_type_extent.c \ - pfile_get_view.c \ - pfile_iread_at.c \ - pfile_iread.c \ - pfile_iread_at_all.c \ - pfile_iread_all.c \ - pfile_iread_shared.c \ - pfile_iwrite_at.c \ - pfile_iwrite.c \ - pfile_iwrite_at_all.c \ - pfile_iwrite_all.c \ - pfile_iwrite_shared.c \ - pfile_open.c \ - pfile_preallocate.c \ - pfile_read_all_begin.c \ - pfile_read_all.c \ - pfile_read_all_end.c \ - pfile_read_at_all_begin.c \ - pfile_read_at_all.c \ - pfile_read_at_all_end.c \ - pfile_read_at.c \ - pfile_read.c \ - pfile_read_ordered_begin.c \ - pfile_read_ordered.c \ - pfile_read_ordered_end.c \ - pfile_read_shared.c \ - pfile_seek.c \ - pfile_seek_shared.c \ - pfile_set_atomicity.c \ - pfile_set_errhandler.c \ - pfile_set_info.c \ - pfile_set_size.c \ - pfile_set_view.c \ - pfile_sync.c \ - pfile_write_all_begin.c \ - pfile_write_all.c \ - pfile_write_all_end.c \ - pfile_write_at_all_begin.c \ - pfile_write_at_all.c \ - pfile_write_at_all_end.c \ - pfile_write_at.c \ - pfile_write.c \ - pfile_write_ordered_begin.c \ - pfile_write_ordered.c \ - pfile_write_ordered_end.c \ - pfile_write_shared.c \ - pfinalize.c \ - pfinalized.c \ - pfree_mem.c \ - pgather.c \ - pigather.c \ - pgatherv.c \ - pigatherv.c \ - pget_address.c \ - pget_count.c \ - pget_elements.c \ - pget_elements_x.c \ - pget_accumulate.c \ - pget_library_version.c \ - pget_processor_name.c \ - pget_version.c \ - pgraph_create.c \ - pgraph_get.c \ - pgraph_map.c \ - pgraph_neighbors_count.c \ - pgraph_neighbors.c \ - pgraphdims_get.c \ - pgrequest_complete.c \ - pgrequest_start.c \ - pgroup_c2f.c \ - pgroup_compare.c \ - pgroup_difference.c \ - pgroup_excl.c \ - pgroup_f2c.c \ - pgroup_free.c \ - pgroup_incl.c \ - pgroup_intersection.c \ - pgroup_range_excl.c \ - pgroup_range_incl.c \ - pgroup_rank.c \ - pgroup_size.c \ - pgroup_translate_ranks.c \ - pgroup_union.c \ - pibsend.c \ - pimprobe.c \ - pimrecv.c \ - pinfo_c2f.c \ - pinfo_create.c \ - pinfo_delete.c \ - pinfo_dup.c \ - pinfo_f2c.c \ - pinfo_free.c \ - pinfo_get.c \ - pinfo_get_nkeys.c \ - pinfo_get_nthkey.c \ - pinfo_get_valuelen.c \ - pinfo_set.c \ - pinit.c \ - pinit_thread.c \ - pinitialized.c \ - pintercomm_create.c \ - pintercomm_merge.c \ - piprobe.c \ - pirecv.c \ - pirsend.c \ - pis_thread_main.c \ - pisend.c \ - pissend.c \ - plookup_name.c \ - pmessage_f2c.c \ - pmessage_c2f.c \ - pmprobe.c \ - pmrecv.c \ - pneighbor_allgather.c \ - pineighbor_allgather.c \ - pneighbor_allgatherv.c \ - pineighbor_allgatherv.c \ - pneighbor_alltoall.c \ - pineighbor_alltoall.c \ - pneighbor_alltoallv.c \ - pineighbor_alltoallv.c \ - pneighbor_alltoallw.c \ - pineighbor_alltoallw.c \ - pkeyval_create.c \ - pkeyval_free.c \ - pop_c2f.c \ - pop_create.c \ - pop_commutative.c \ - pop_f2c.c \ - pop_free.c \ - popen_port.c \ - ppack_external.c \ - ppack_external_size.c \ - ppack.c \ - ppack_size.c \ - pparrived.c \ - ppcontrol.c \ - ppready.c \ - ppready_list.c \ - ppready_range.c \ - pprecv_init.c \ - pprobe.c \ - ppsend_init.c \ - ppublish_name.c \ - pquery_thread.c \ - praccumulate.c \ - precv_init.c \ - precv.c \ - preduce.c \ - pregister_datarep.c \ - pireduce.c \ - preduce_local.c \ - preduce_scatter.c \ - pireduce_scatter.c \ - preduce_scatter_block.c \ - pireduce_scatter_block.c \ - prequest_c2f.c \ - prequest_f2c.c \ - prequest_free.c \ - prequest_get_status.c \ - prget.c \ - prget_accumulate.c \ - prput.c \ - prsend_init.c \ - prsend.c \ - pscan.c \ - piscan.c \ - pscatter.c \ - piscatter.c \ - pscatterv.c \ - piscatterv.c \ - psend.c \ - psend_init.c \ - psendrecv.c \ - psendrecv_replace.c \ - pssend_init.c \ - pssend.c \ - pstart.c \ - pstartall.c \ - pstatus_c2f.c \ - pstatus_c2f08.c \ - pstatus_f082c.c \ - pstatus_f082f.c \ - pstatus_f2f08.c \ - pstatus_f2c.c \ - pstatus_set_cancelled.c \ - pstatus_set_elements.c \ - pstatus_set_elements_x.c \ - ptestall.c \ - ptestany.c \ - ptest.c \ - ptest_cancelled.c \ - ptestsome.c \ - ptopo_test.c \ - ptype_c2f.c \ - ptype_commit.c \ - ptype_contiguous.c \ - ptype_create_darray.c \ - ptype_create_f90_complex.c \ - ptype_create_f90_integer.c \ - ptype_create_f90_real.c \ - ptype_create_hindexed.c \ - ptype_create_hvector.c \ - ptype_create_indexed_block.c \ - ptype_create_hindexed_block.c \ - ptype_create_keyval.c \ - ptype_create_resized.c \ - ptype_create_struct.c \ - ptype_create_subarray.c \ - ptype_delete_attr.c \ - ptype_dup.c \ - ptype_f2c.c \ - ptype_free.c \ - ptype_free_keyval.c \ - ptype_get_attr.c \ - ptype_get_contents.c \ - ptype_get_envelope.c \ - ptype_get_extent.c \ - ptype_get_extent_x.c \ - ptype_get_name.c \ - ptype_get_true_extent.c \ - ptype_get_true_extent_x.c \ - ptype_indexed.c \ - ptype_match_size.c \ - ptype_set_attr.c \ - ptype_set_name.c \ - ptype_size.c \ - ptype_size_x.c \ - ptype_vector.c \ - punpack_external.c \ - punpack.c \ - punpublish_name.c \ - pwait.c \ - pwaitall.c \ - pwaitany.c \ - pwaitsome.c \ - pwtime.c \ - pwtick.c \ - paccumulate.c \ - pget.c \ - pput.c \ - pwin_allocate.c \ - pwin_allocate_shared.c \ - pwin_attach.c \ - pwin_c2f.c \ - pwin_call_errhandler.c \ - pwin_complete.c \ - pwin_create_errhandler.c \ - pwin_create_keyval.c \ - pwin_create.c \ - pwin_create_dynamic.c \ - pwin_delete_attr.c \ - pwin_detach.c \ - pwin_f2c.c \ - pwin_fence.c \ - pwin_flush.c \ - pwin_flush_all.c \ - pwin_flush_local.c \ - pwin_flush_local_all.c \ - pwin_free_keyval.c \ - pwin_free.c \ - pwin_get_attr.c \ - pwin_get_errhandler.c \ - pwin_get_group.c \ - pwin_get_info.c \ - pwin_get_name.c \ - pwin_lock.c \ - pwin_lock_all.c \ - pwin_post.c \ - pwin_set_attr.c \ - pwin_set_errhandler.c \ - pwin_set_info.c \ - pwin_set_name.c \ - pwin_shared_query.c \ - pwin_start.c \ - pwin_sync.c \ - pwin_test.c \ - pwin_unlock.c \ - pwin_unlock_all.c \ - pwin_wait.c - -if OMPI_ENABLE_MPI1_COMPAT -nodist_libmpi_c_pmpi_la_SOURCES += \ - paddress.c \ - perrhandler_create.c \ - perrhandler_get.c \ - perrhandler_set.c \ - ptype_extent.c \ - ptype_hindexed.c \ - ptype_hvector.c \ - ptype_lb.c \ - ptype_struct.c \ - ptype_ub.c -endif - -# -# Sym link in the sources from the real MPI directory -# -$(nodist_libmpi_c_pmpi_la_SOURCES): - $(OMPI_V_LN_S) if test ! -r $@ ; then \ - pname=`echo $@ | cut -b '2-'` ; \ - $(LN_S) $(top_srcdir)/ompi/mpi/c/$$pname $@ ; \ - fi - -# Conditionally install the header files - -if WANT_INSTALL_HEADERS -ompidir = $(ompiincludedir)/$(subdir) -endif - -# These files were created by targets above - -MAINTAINERCLEANFILES = $(nodist_libmpi_c_pmpi_la_SOURCES) - -# Don't want these targets in here - -tags-recursive: -tags: -TAGS: -GTAGS: -ID: diff --git a/ompi/mpi/c/psend_init.c b/ompi/mpi/c/psend_init.c index 8178b8c98ea..820e02d3fbe 100644 --- a/ompi/mpi/c/psend_init.c +++ b/ompi/mpi/c/psend_init.c @@ -16,6 +16,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,7 +44,7 @@ static const char FUNC_NAME[] = "MPI_Psend_init"; -int MPI_Psend_init(const void* buf, int partitions, MPI_Count count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) +int MPI_Psend_init(const void* buf, int partitions, MPI_Count count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Info info, MPI_Request *request) { int rc; @@ -59,6 +60,6 @@ int MPI_Psend_init(const void* buf, int partitions, MPI_Count count, MPI_Datatyp OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } - rc = mca_part.part_psend_init(buf, partitions, count, datatype, dest, tag, comm, request); + rc = mca_part.part_psend_init(buf, partitions, count, datatype, dest, tag, comm, info, request); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/recv.c b/ompi/mpi/c/recv.c index d51fb4d7acb..56e501034e8 100644 --- a/ompi/mpi/c/recv.c +++ b/ompi/mpi/c/recv.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -89,7 +89,7 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, if (MPI_PROC_NULL == source) { if (MPI_STATUS_IGNORE != status) { - *status = ompi_request_empty.req_status; + OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false); } return MPI_SUCCESS; } diff --git a/ompi/mpi/c/reduce.c b/ompi/mpi/c/reduce.c index b1673576d5b..6eafdcd52c3 100644 --- a/ompi/mpi/c/reduce.c +++ b/ompi/mpi/c/reduce.c @@ -99,7 +99,8 @@ int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, free(msg); return ret; } else if ((ompi_comm_rank(comm) != root && MPI_IN_PLACE == sendbuf) || - (ompi_comm_rank(comm) == root && ((MPI_IN_PLACE == recvbuf) || (sendbuf == recvbuf)))) { + (ompi_comm_rank(comm) == root && ((MPI_IN_PLACE == recvbuf) || + ((sendbuf == recvbuf) && (0 != count))))) { err = MPI_ERR_ARG; } else { OMPI_CHECK_DATATYPE_FOR_SEND(err, datatype, count); diff --git a/ompi/mpiext/pcollreq/c/reduce_init.c b/ompi/mpi/c/reduce_init.c similarity index 93% rename from ompi/mpiext/pcollreq/c/reduce_init.c rename to ompi/mpi/c/reduce_init.c index 87378e86367..09716152365 100644 --- a/ompi/mpiext/pcollreq/c/reduce_init.c +++ b/ompi/mpi/c/reduce_init.c @@ -34,22 +34,21 @@ #include "ompi/op/op.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Reduce_init = PMPIX_Reduce_init +#pragma weak MPI_Reduce_init = PMPI_Reduce_init #endif -#define MPIX_Reduce_init PMPIX_Reduce_init +#define MPI_Reduce_init PMPI_Reduce_init #endif -static const char FUNC_NAME[] = "MPIX_Reduce_init"; +static const char FUNC_NAME[] = "MPI_Reduce_init"; -int MPIX_Reduce_init(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Reduce_init(const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c b/ompi/mpi/c/reduce_scatter_block_init.c similarity index 88% rename from ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c rename to ompi/mpi/c/reduce_scatter_block_init.c index 8c450f449e9..3a1ef34c2db 100644 --- a/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c +++ b/ompi/mpi/c/reduce_scatter_block_init.c @@ -33,22 +33,21 @@ #include "ompi/op/op.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Reduce_scatter_block_init = PMPIX_Reduce_scatter_block_init +#pragma weak MPI_Reduce_scatter_block_init = PMPI_Reduce_scatter_block_init #endif -#define MPIX_Reduce_scatter_block_init PMPIX_Reduce_scatter_block_init +#define MPI_Reduce_scatter_block_init PMPI_Reduce_scatter_block_init #endif -static const char FUNC_NAME[] = "MPIX_Reduce_scatter_block_init"; +static const char FUNC_NAME[] = "MPI_Reduce_scatter_block_init"; -int MPIX_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvcount, - MPI_Datatype datatype, MPI_Op op, - MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvcount, + MPI_Datatype datatype, MPI_Op op, + MPI_Comm comm, MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpiext/pcollreq/c/reduce_scatter_init.c b/ompi/mpi/c/reduce_scatter_init.c similarity index 91% rename from ompi/mpiext/pcollreq/c/reduce_scatter_init.c rename to ompi/mpi/c/reduce_scatter_init.c index 521833071c3..f480fcafae2 100644 --- a/ompi/mpiext/pcollreq/c/reduce_scatter_init.c +++ b/ompi/mpi/c/reduce_scatter_init.c @@ -34,21 +34,20 @@ #include "ompi/op/op.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Reduce_scatter_init = PMPIX_Reduce_scatter_init +#pragma weak MPI_Reduce_scatter_init = PMPI_Reduce_scatter_init #endif -#define MPIX_Reduce_scatter_init PMPIX_Reduce_scatter_init +#define MPI_Reduce_scatter_init PMPI_Reduce_scatter_init #endif -static const char FUNC_NAME[] = "MPIX_Reduce_scatter_init"; +static const char FUNC_NAME[] = "MPI_Reduce_scatter_init"; -int MPIX_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvcounts[], + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request) { int i, err, size, count; diff --git a/ompi/mpi/c/request_get_status.c b/ompi/mpi/c/request_get_status.c index 8c7a2bfaddd..f97e3af4b0b 100644 --- a/ompi/mpi/c/request_get_status.c +++ b/ompi/mpi/c/request_get_status.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -70,7 +70,7 @@ int MPI_Request_get_status(MPI_Request request, int *flag, if( (request == MPI_REQUEST_NULL) || (request->req_state == OMPI_REQUEST_INACTIVE) ) { *flag = true; if( MPI_STATUS_IGNORE != status ) { - *status = ompi_status_empty; + OMPI_COPY_STATUS(status, ompi_status_empty, false); } return MPI_SUCCESS; } @@ -83,7 +83,7 @@ int MPI_Request_get_status(MPI_Request request, int *flag, ompi_grequest_invoke_query(request, &request->req_status); } if (MPI_STATUS_IGNORE != status) { - *status = request->req_status; + OMPI_COPY_STATUS(status, request->req_status, false); } return MPI_SUCCESS; } diff --git a/ompi/mpiext/pcollreq/c/scan_init.c b/ompi/mpi/c/scan_init.c similarity index 90% rename from ompi/mpiext/pcollreq/c/scan_init.c rename to ompi/mpi/c/scan_init.c index 6b4bc0ccad8..6008eb56190 100644 --- a/ompi/mpiext/pcollreq/c/scan_init.c +++ b/ompi/mpi/c/scan_init.c @@ -32,22 +32,21 @@ #include "ompi/op/op.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Scan_init = PMPIX_Scan_init +#pragma weak MPI_Scan_init = PMPI_Scan_init #endif -#define MPIX_Scan_init PMPIX_Scan_init +#define MPI_Scan_init PMPI_Scan_init #endif -static const char FUNC_NAME[] = "MPIX_Scan_init"; +static const char FUNC_NAME[] = "MPI_Scan_init"; -int MPIX_Scan_init(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, - MPI_Info info, MPI_Request *request) +int MPI_Scan_init(const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpiext/pcollreq/c/scatter_init.c b/ompi/mpi/c/scatter_init.c similarity index 93% rename from ompi/mpiext/pcollreq/c/scatter_init.c rename to ompi/mpi/c/scatter_init.c index aa48a0a94a1..dd790b60654 100644 --- a/ompi/mpiext/pcollreq/c/scatter_init.c +++ b/ompi/mpi/c/scatter_init.c @@ -33,22 +33,21 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Scatter_init = PMPIX_Scatter_init +#pragma weak MPI_Scatter_init = PMPI_Scatter_init #endif -#define MPIX_Scatter_init PMPIX_Scatter_init +#define MPI_Scatter_init PMPI_Scatter_init #endif -static const char FUNC_NAME[] = "MPIX_Scatter_init"; +static const char FUNC_NAME[] = "MPI_Scatter_init"; -int MPIX_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int root, MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request) { int err; diff --git a/ompi/mpiext/pcollreq/c/scatterv_init.c b/ompi/mpi/c/scatterv_init.c similarity index 94% rename from ompi/mpiext/pcollreq/c/scatterv_init.c rename to ompi/mpi/c/scatterv_init.c index 1170b79b834..4a7f29d69cf 100644 --- a/ompi/mpiext/pcollreq/c/scatterv_init.c +++ b/ompi/mpi/c/scatterv_init.c @@ -31,22 +31,21 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_Scatterv_init = PMPIX_Scatterv_init +#pragma weak MPI_Scatterv_init = PMPI_Scatterv_init #endif -#define MPIX_Scatterv_init PMPIX_Scatterv_init +#define MPI_Scatterv_init PMPI_Scatterv_init #endif -static const char FUNC_NAME[] = "MPIX_Scatterv_init"; +static const char FUNC_NAME[] = "MPI_Scatterv_init"; -int MPIX_Scatterv_init(const void *sendbuf, const int sendcounts[], const int displs[], - MPI_Datatype sendtype, void *recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request) +int MPI_Scatterv_init(const void *sendbuf, const int sendcounts[], const int displs[], + MPI_Datatype sendtype, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request) { int i, size, err; diff --git a/ompi/mpi/c/sendrecv.c b/ompi/mpi/c/sendrecv.c index bd9dee53c9f..41a205848d1 100644 --- a/ompi/mpi/c/sendrecv.c +++ b/ompi/mpi/c/sendrecv.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -49,9 +50,7 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, { ompi_request_t* req; int rc = MPI_SUCCESS; -#if OPAL_ENABLE_FT_MPI int rcs = MPI_SUCCESS; -#endif SPC_RECORD(OMPI_SPC_SENDRECV, 1); @@ -92,21 +91,27 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, if (dest != MPI_PROC_NULL) { /* send */ rc = MCA_PML_CALL(send(sendbuf, sendcount, sendtype, dest, sendtag, MCA_PML_BASE_SEND_STANDARD, comm)); + if (OPAL_UNLIKELY(MPI_SUCCESS != rc)) { + rcs = rc; #if OPAL_ENABLE_FT_MPI - /* If ULFM is enabled we need to wait for the posted receive to - * complete, hence we cannot return here */ - rcs = rc; -#else - OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); -#endif /* OPAL_ENABLE_FT_MPI */ + /* If this is a PROC_FAILED error, we still need to proceed with + * the receive, so that we do not propagate errors to the sender in + * the case src != dst, and only dst is dead. In this case the + * recv is garanteed to complete (either in error if the source is + * dead, or successfully if the source is live). */ + if (OPAL_UNLIKELY(MPI_ERR_PROC_FAILED != rc)) + /* if intentionally spills outside ifdef */ +#endif + ompi_request_cancel(req); + } } if (source != MPI_PROC_NULL) { /* wait for recv */ rc = ompi_request_wait(&req, status); #if OPAL_ENABLE_FT_MPI /* Sendrecv never returns ERR_PROC_FAILED_PENDING because it is - * blocking. Lets complete now that irecv and promote the error - * to ERR_PROC_FAILED */ + * blocking. Lets cancel that irecv to complete it NOW and promote + * the error to ERR_PROC_FAILED */ if( OPAL_UNLIKELY(MPI_ERR_PROC_FAILED_PENDING == rc) ) { ompi_request_cancel(req); ompi_request_wait(&req, MPI_STATUS_IGNORE); @@ -115,15 +120,13 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, #endif } else { if (MPI_STATUS_IGNORE != status) { - *status = ompi_request_empty.req_status; + OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false); } rc = MPI_SUCCESS; } -#if OPAL_ENABLE_FT_MPI if( OPAL_UNLIKELY(MPI_SUCCESS != rcs && MPI_SUCCESS == rc) ) { rc = rcs; } -#endif OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/session_c2f.c b/ompi/mpi/c/session_c2f.c new file mode 100644 index 00000000000..93b5d7da7f5 --- /dev/null +++ b/ompi/mpi/c/session_c2f.c @@ -0,0 +1,56 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include + +#include "ompi/instance/instance.h" +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mpi/fortran/base/fint_2_int.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_c2f = PMPI_Session_c2f +#endif +#define MPI_Session_c2f PMPI_Session_c2f +#endif + +static const char FUNC_NAME[] = "MPI_Session_c2f"; + + +MPI_Fint MPI_Session_c2f (MPI_Session session) +{ + + if ( MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (NULL == session) { + return OMPI_INT_2_FINT(-1); + } + } + + return OMPI_INT_2_FINT(session->i_f_to_c_index); +} diff --git a/ompi/mpi/c/session_create_errhandler.c b/ompi/mpi/c/session_create_errhandler.c new file mode 100644 index 00000000000..b1634131c88 --- /dev/null +++ b/ompi/mpi/c/session_create_errhandler.c @@ -0,0 +1,51 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_create_errhandler = PMPI_Session_create_errhandler +#endif +#define MPI_Session_create_errhandler PMPI_Session_create_errhandler +#endif + +static const char FUNC_NAME[] = "MPI_Session_create_errhandler"; + + +int MPI_Session_create_errhandler (MPI_Session_errhandler_function *session_errhandler_fn, MPI_Errhandler *errhandler) +{ + int err = MPI_SUCCESS; + + if ( MPI_PARAM_CHECK ) { + if (NULL == errhandler || NULL == session_errhandler_fn) { + return MPI_ERR_ARG; + } + } + + /* Create and cache the errhandler. Sets a refcount of 1. */ + *errhandler = + ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_INSTANCE, + (ompi_errhandler_generic_handler_fn_t *) session_errhandler_fn, + OMPI_ERRHANDLER_LANG_C); + if (NULL == *errhandler) { + err = MPI_ERR_INTERN; + } + + return err; +} diff --git a/ompi/mpi/c/session_f2c.c b/ompi/mpi/c/session_f2c.c new file mode 100644 index 00000000000..cb7c0dbd914 --- /dev/null +++ b/ompi/mpi/c/session_f2c.c @@ -0,0 +1,59 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include "ompi/instance/instance.h" +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mpi/fortran/base/fint_2_int.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_f2c = PMPI_Session_f2c +#endif +#define MPI_Session_f2c PMPI_Session_f2c +#endif + +static const char FUNC_NAME[] = "MPI_Session_f2c"; + + +MPI_Session MPI_Session_f2c(MPI_Fint session) +{ + int o_index= OMPI_FINT_2_INT(session); + + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + } + + /* Per MPI-2:4.12.4, do not invoke an error handler if we get an + invalid fortran handle. If we get an invalid fortran handle, + return an invalid C handle. */ + + if (0 > o_index || o_index >= opal_pointer_array_get_size(&ompi_instance_f_to_c_table)) { + return NULL; + } + + return (MPI_Session) opal_pointer_array_get_item (&ompi_instance_f_to_c_table, o_index); +} diff --git a/ompi/mpi/c/session_finalize.c b/ompi/mpi/c/session_finalize.c new file mode 100644 index 00000000000..4ecf052d974 --- /dev/null +++ b/ompi/mpi/c/session_finalize.c @@ -0,0 +1,45 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" + +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_finalize = PMPI_Session_finalize +#endif +#define MPI_Session_finalize PMPI_Session_finalize +#endif + +static const char FUNC_NAME[] = "MPI_Session_finalize"; + + +int MPI_Session_finalize (MPI_Session *session) +{ + int rc; + + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (NULL == session || NULL == *session || MPI_SESSION_NULL == *session) { + return MPI_ERR_ARG; + } + } + + rc = ompi_mpi_instance_finalize (session); + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_get_info.c b/ompi/mpi/c/session_get_info.c new file mode 100644 index 00000000000..b51188b2584 --- /dev/null +++ b/ompi/mpi/c/session_get_info.c @@ -0,0 +1,66 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/instance/instance.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/info/info.h" +#include +#include + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_get_info = PMPI_Session_get_info +#endif +#define MPI_Session_get_info PMPI_Session_get_info +#endif + +static const char FUNC_NAME[] = "MPI_Session_get_info"; + + +int MPI_Session_get_info (MPI_Session session, MPI_Info *info_used) +{ + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == session || MPI_SESSION_NULL == session) { + return MPI_ERR_ARG; + } + if (NULL == info_used) { + return OMPI_ERRHANDLER_INVOKE (session, MPI_ERR_INFO, FUNC_NAME); + } + } + + if (NULL == session->super.s_info) { + /* + * Setup any defaults if MPI_Win_set_info was never called + */ + opal_infosubscribe_change_info (&session->super, &MPI_INFO_NULL->super); + } + + + *info_used = ompi_info_allocate (); + if (OPAL_UNLIKELY(NULL == *info_used)) { + return OMPI_ERRHANDLER_INVOKE (session, MPI_ERR_NO_MEM, FUNC_NAME); + } + + opal_info_t *opal_info_used = &(*info_used)->super; + + opal_info_dup (session->super.s_info, &opal_info_used); + + return MPI_SUCCESS; +} diff --git a/ompi/mpi/c/session_get_nth_pset.c b/ompi/mpi/c/session_get_nth_pset.c new file mode 100644 index 00000000000..4318979254f --- /dev/null +++ b/ompi/mpi/c/session_get_nth_pset.c @@ -0,0 +1,43 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018-2020 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_get_nth_pset = PMPI_Session_get_nth_pset +#endif +#define MPI_Session_get_nth_pset PMPI_Session_get_nth_pset +#endif + +static const char FUNC_NAME[] = "MPI_Session_get_nth_pset"; + + +int MPI_Session_get_nth_pset (MPI_Session session, MPI_Info info, int n, int *len, char *pset_name) +{ + int rc = MPI_SUCCESS; + + if ( MPI_PARAM_CHECK ) { + if (NULL == session || (NULL == pset_name && *len > 0) || n < 0) { + return OMPI_ERRHANDLER_INVOKE(session, MPI_ERR_ARG, FUNC_NAME); + } + } + + rc = ompi_instance_get_nth_pset (session, n, len, pset_name); + + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_get_num_psets.c b/ompi/mpi/c/session_get_num_psets.c new file mode 100644 index 00000000000..231a01db47a --- /dev/null +++ b/ompi/mpi/c/session_get_num_psets.c @@ -0,0 +1,42 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_get_num_psets = PMPI_Session_get_num_psets +#endif +#define MPI_Session_get_num_psets PMPI_Session_get_num_psets +#endif + +static const char FUNC_NAME[] = "MPI_Session_get_num_psets"; + + +int MPI_Session_get_num_psets (MPI_Session session, MPI_Info info, int *npset_names) +{ + int rc; + + if ( MPI_PARAM_CHECK ) { + if (NULL == session || NULL == npset_names) { + return OMPI_ERRHANDLER_INVOKE(session, MPI_ERR_ARG, FUNC_NAME); + } + } + + rc = ompi_instance_get_num_psets (session, npset_names); + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_get_pset_info.c b/ompi/mpi/c/session_get_pset_info.c new file mode 100644 index 00000000000..441ffa058d5 --- /dev/null +++ b/ompi/mpi/c/session_get_pset_info.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/instance/instance.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/info/info.h" +#include +#include + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_get_pset_info = PMPI_Session_get_pset_info +#endif +#define MPI_Session_get_pset_info PMPI_Session_get_pset_info +#endif + +static const char FUNC_NAME[] = "MPI_Session_get_pset_info"; + + +int MPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used) +{ + int ret; + + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == session || MPI_SESSION_NULL == session) { + return MPI_ERR_ARG; + } + if (NULL == info_used) { + return OMPI_ERRHANDLER_INVOKE (session, MPI_ERR_INFO, FUNC_NAME); + } + } + + ret = ompi_instance_get_pset_info (session, pset_name, (opal_info_t **) info_used); + return OMPI_ERRHANDLER_INVOKE(session, ret, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_init.c b/ompi/mpi/c/session_init.c new file mode 100644 index 00000000000..fc755b27ff0 --- /dev/null +++ b/ompi/mpi/c/session_init.c @@ -0,0 +1,60 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/info/info.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_init = PMPI_Session_init +#endif +#define MPI_Session_init PMPI_Session_init +#endif + +static const char FUNC_NAME[] = "MPI_Session_init"; + + +int MPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, MPI_Session *session) +{ + int rc, flag; + int ts_level = MPI_THREAD_SINGLE; /* for now we default to thread single for OMPI sessions */ + opal_cstring_t *info_value; + const char ts_level_multi[] = "MPI_THREAD_MULTIPLE"; + + if ( MPI_PARAM_CHECK ) { + if (NULL == errhandler || NULL == session) { + return MPI_ERR_ARG; + } + + if (NULL == info || ompi_info_is_freed (info)) { + return MPI_ERR_INFO; + } + } + + if (MPI_INFO_NULL != info) { + (void) ompi_info_get (info, "thread_level", &info_value, &flag); + if (flag) { + if(strncmp(info_value->string, ts_level_multi, strlen(ts_level_multi)) == 0) { + ts_level = MPI_THREAD_MULTIPLE; + } + OBJ_RELEASE(info_value); + } + } + + rc = ompi_mpi_instance_init (ts_level, &info->super, errhandler, session); + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_set_info.c b/ompi/mpi/c/session_set_info.c new file mode 100644 index 00000000000..776bc8f0e63 --- /dev/null +++ b/ompi/mpi/c/session_set_info.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/instance/instance.h" +#include "ompi/errhandler/errhandler.h" +#include "opal/util/info_subscriber.h" +#include +#include + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_set_info = PMPI_Session_set_info +#endif +#define MPI_Session_set_info PMPI_Session_set_info +#endif + +static const char FUNC_NAME[] = "MPI_Session_set_info"; + + +int MPI_Session_set_info (MPI_Session session, MPI_Info info) +{ + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == session || MPI_SESSION_NULL == session) { + return MPI_ERR_ARG; + } + + if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { + return OMPI_ERRHANDLER_INVOKE (session, MPI_ERR_INFO, FUNC_NAME); + } + } + + opal_infosubscribe_change_info (&session->super, &info->super); + + return MPI_SUCCESS; +} diff --git a/ompi/mpi/c/testall.c b/ompi/mpi/c/testall.c index 1f59fbe3377..28d9ffc502c 100644 --- a/ompi/mpi/c/testall.c +++ b/ompi/mpi/c/testall.c @@ -15,6 +15,8 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,6 +58,7 @@ int MPI_Testall(int count, MPI_Request requests[], int *flag, if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if( (NULL == requests) && (0 != count) ) { rc = MPI_ERR_REQUEST; @@ -65,6 +68,20 @@ int MPI_Testall(int count, MPI_Request requests[], int *flag, rc = MPI_ERR_REQUEST; break; } + if (&ompi_request_empty == requests[i]) { + continue; + } else if (NULL == requests[i]->req_mpi_object.comm) { + continue; + } else if (NULL == check_req) { + check_req = requests[i]; + } + else { + if (!ompi_comm_instances_same(requests[i]->req_mpi_object.comm, + check_req->req_mpi_object.comm)) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if ((NULL == flag) || (count < 0)) { diff --git a/ompi/mpi/c/testany.c b/ompi/mpi/c/testany.c index 9740ce7b97e..4993ce729f0 100644 --- a/ompi/mpi/c/testany.c +++ b/ompi/mpi/c/testany.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -15,6 +15,8 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,6 +57,7 @@ int MPI_Testany(int count, MPI_Request requests[], int *indx, int *completed, MP if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == requests) && (0 != count)) { rc = MPI_ERR_REQUEST; @@ -64,6 +67,20 @@ int MPI_Testany(int count, MPI_Request requests[], int *indx, int *completed, MP rc = MPI_ERR_REQUEST; break; } + if (&ompi_request_empty == requests[i]) { + continue; + } else if (NULL == requests[i]->req_mpi_object.comm) { + continue; + } else if (NULL == check_req) { + check_req = requests[i]; + } + else { + if (!ompi_comm_instances_same(requests[i]->req_mpi_object.comm, + check_req->req_mpi_object.comm)) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if (((NULL == indx || NULL == completed) && count > 0) || @@ -77,7 +94,7 @@ int MPI_Testany(int count, MPI_Request requests[], int *indx, int *completed, MP *completed = true; *indx = MPI_UNDEFINED; if (MPI_STATUS_IGNORE != status) { - *status = ompi_status_empty; + OMPI_COPY_STATUS(status, ompi_status_empty, false); } return MPI_SUCCESS; } diff --git a/ompi/mpi/c/testsome.c b/ompi/mpi/c/testsome.c index 9f9782d505b..84ea14fabd7 100644 --- a/ompi/mpi/c/testsome.c +++ b/ompi/mpi/c/testsome.c @@ -15,6 +15,8 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,6 +59,7 @@ int MPI_Testsome(int incount, MPI_Request requests[], if ( MPI_PARAM_CHECK ) { int indx, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == requests) && (0 != incount)) { rc = MPI_ERR_REQUEST; @@ -66,6 +69,20 @@ int MPI_Testsome(int incount, MPI_Request requests[], rc = MPI_ERR_REQUEST; break; } + if (&ompi_request_empty == requests[indx]) { + continue; + } else if (NULL == requests[indx]->req_mpi_object.comm) { + continue; + } else if (NULL == check_req) { + check_req = requests[indx]; + } + else { + if (!ompi_comm_instances_same(requests[indx]->req_mpi_object.comm, + check_req->req_mpi_object.comm)) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if (((NULL == outcount || NULL == indices) && incount > 0) || diff --git a/ompi/mpi/c/type_create_f90_complex.c b/ompi/mpi/c/type_create_f90_complex.c index 638afc2f262..84b254ceae6 100644 --- a/ompi/mpi/c/type_create_f90_complex.c +++ b/ompi/mpi/c/type_create_f90_complex.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2008-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -112,12 +112,8 @@ int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype) */ datatype->super.flags |= OMPI_DATATYPE_FLAG_PREDEFINED; /* Mark the datatype as a special F90 convenience type */ - // Specifically using opal_snprintf() here (instead of - // snprintf()) so that over-eager compilers do not warn us - // that we may be truncating the output. We *know* that the - // output may be truncated, and that's ok. - opal_snprintf(datatype->name, sizeof(datatype->name), - "COMBINER %s", (*newtype)->name); + snprintf(datatype->name, sizeof(datatype->name), + "COMBINER %s", (*newtype)->name); a_i[0] = &p; a_i[1] = &r; diff --git a/ompi/mpi/c/type_create_f90_integer.c b/ompi/mpi/c/type_create_f90_integer.c index cea7d11b394..628c2f3850a 100644 --- a/ompi/mpi/c/type_create_f90_integer.c +++ b/ompi/mpi/c/type_create_f90_integer.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2008-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -104,12 +104,8 @@ int MPI_Type_create_f90_integer(int r, MPI_Datatype *newtype) */ datatype->super.flags |= OMPI_DATATYPE_FLAG_PREDEFINED; /* Mark the datatype as a special F90 convenience type */ - // Specifically using opal_snprintf() here (instead of - // snprintf()) so that over-eager compilers do not warn us - // that we may be truncating the output. We *know* that the - // output may be truncated, and that's ok. - opal_snprintf(datatype->name, sizeof(datatype->name), - "COMBINER %s", (*newtype)->name); + snprintf(datatype->name, sizeof(datatype->name), + "COMBINER %s", (*newtype)->name); a_i[0] = &r; ompi_datatype_set_args( datatype, 1, a_i, 0, NULL, 0, NULL, MPI_COMBINER_F90_INTEGER ); diff --git a/ompi/mpi/c/type_create_f90_real.c b/ompi/mpi/c/type_create_f90_real.c index 5518782cc22..199186ff5aa 100644 --- a/ompi/mpi/c/type_create_f90_real.c +++ b/ompi/mpi/c/type_create_f90_real.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2008-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -112,12 +112,8 @@ int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype) */ datatype->super.flags |= OMPI_DATATYPE_FLAG_PREDEFINED; /* Mark the datatype as a special F90 convenience type */ - // Specifically using opal_snprintf() here (instead of - // snprintf()) so that over-eager compilers do not warn us - // that we may be truncating the output. We *know* that the - // output may be truncated, and it's ok. - opal_snprintf(datatype->name, sizeof(datatype->name), - "COMBINER %s", (*newtype)->name); + snprintf(datatype->name, sizeof(datatype->name), + "COMBINER %s", (*newtype)->name); ompi_datatype_set_args( datatype, 2, a_i, 0, NULL, 0, NULL, MPI_COMBINER_F90_REAL ); diff --git a/ompi/mpi/c/type_create_keyval.c b/ompi/mpi/c/type_create_keyval.c index dc0e764b15d..6e0e033fdb8 100644 --- a/ompi/mpi/c/type_create_keyval.c +++ b/ompi/mpi/c/type_create_keyval.c @@ -56,7 +56,7 @@ int MPI_Type_create_keyval(MPI_Type_copy_attr_function *type_copy_attr_fn, } } - copy_fn.attr_datatype_copy_fn = (MPI_Type_internal_copy_attr_function*)type_copy_attr_fn; + copy_fn.attr_datatype_copy_fn = (MPI_Type_internal_copy_attr_function) type_copy_attr_fn; del_fn.attr_datatype_delete_fn = type_delete_attr_fn; ret = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn, diff --git a/ompi/mpi/c/wait.c b/ompi/mpi/c/wait.c index aad340cc31a..0145eb7187d 100644 --- a/ompi/mpi/c/wait.c +++ b/ompi/mpi/c/wait.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -58,7 +58,7 @@ int MPI_Wait(MPI_Request *request, MPI_Status *status) if (MPI_REQUEST_NULL == *request) { if (MPI_STATUS_IGNORE != status) { - *status = ompi_status_empty; + OMPI_COPY_STATUS(status, ompi_status_empty, false); /* * Per MPI-1, the MPI_ERROR field is not defined for single-completion calls */ diff --git a/ompi/mpi/c/waitall.c b/ompi/mpi/c/waitall.c index 7b12e20cb89..14485de4e70 100644 --- a/ompi/mpi/c/waitall.c +++ b/ompi/mpi/c/waitall.c @@ -14,6 +14,8 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,6 +56,7 @@ int MPI_Waitall(int count, MPI_Request requests[], MPI_Status statuses[]) if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if( (NULL == requests) && (0 != count) ) { rc = MPI_ERR_REQUEST; @@ -63,6 +66,20 @@ int MPI_Waitall(int count, MPI_Request requests[], MPI_Status statuses[]) rc = MPI_ERR_REQUEST; break; } + if (&ompi_request_empty == requests[i]) { + continue; + } else if (NULL == requests[i]->req_mpi_object.comm) { + continue; + } else if (NULL == check_req) { + check_req = requests[i]; + } + else { + if (!ompi_comm_instances_same(requests[i]->req_mpi_object.comm, + check_req->req_mpi_object.comm)) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if (count < 0) { diff --git a/ompi/mpi/c/waitany.c b/ompi/mpi/c/waitany.c index 5878d7d5c35..4b1dd771e3d 100644 --- a/ompi/mpi/c/waitany.c +++ b/ompi/mpi/c/waitany.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -15,6 +15,8 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,6 +57,7 @@ int MPI_Waitany(int count, MPI_Request requests[], int *indx, MPI_Status *status if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == requests) && (0 != count)) { rc = MPI_ERR_REQUEST; @@ -64,6 +67,20 @@ int MPI_Waitany(int count, MPI_Request requests[], int *indx, MPI_Status *status rc = MPI_ERR_REQUEST; break; } + if (requests[i] == &ompi_request_empty) { + continue; + } else if (NULL == requests[i]->req_mpi_object.comm) { + continue; + } else if (NULL == check_req) { + check_req = requests[i]; + } + else { + if (!ompi_comm_instances_same(requests[i]->req_mpi_object.comm, + check_req->req_mpi_object.comm)) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if ((NULL == indx && count > 0) || @@ -76,7 +93,7 @@ int MPI_Waitany(int count, MPI_Request requests[], int *indx, MPI_Status *status if (OPAL_UNLIKELY(0 == count)) { *indx = MPI_UNDEFINED; if (MPI_STATUS_IGNORE != status) { - *status = ompi_status_empty; + OMPI_COPY_STATUS(status, ompi_status_empty, false); } return MPI_SUCCESS; } diff --git a/ompi/mpi/c/waitsome.c b/ompi/mpi/c/waitsome.c index b6beb5da752..169c7e10ec4 100644 --- a/ompi/mpi/c/waitsome.c +++ b/ompi/mpi/c/waitsome.c @@ -15,6 +15,8 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,6 +59,7 @@ int MPI_Waitsome(int incount, MPI_Request requests[], if ( MPI_PARAM_CHECK ) { int indx, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == requests) && (0 != incount)) { rc = MPI_ERR_REQUEST; @@ -66,6 +69,20 @@ int MPI_Waitsome(int incount, MPI_Request requests[], rc = MPI_ERR_REQUEST; break; } + if (&ompi_request_empty == requests[indx]) { + continue; + } else if (NULL == requests[indx]->req_mpi_object.comm) { + continue; + } else if (NULL == check_req) { + check_req = requests[indx]; + } + else { + if (!ompi_comm_instances_same(requests[indx]->req_mpi_object.comm, + check_req->req_mpi_object.comm)) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if (((NULL == outcount || NULL == indices) && incount > 0) || diff --git a/ompi/mpi/c/win_create_errhandler.c b/ompi/mpi/c/win_create_errhandler.c index c18a736857d..16c9262ff4e 100644 --- a/ompi/mpi/c/win_create_errhandler.c +++ b/ompi/mpi/c/win_create_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mpi/c/win_create_keyval.c b/ompi/mpi/c/win_create_keyval.c index 68c45b776ad..87131ab62fb 100644 --- a/ompi/mpi/c/win_create_keyval.c +++ b/ompi/mpi/c/win_create_keyval.c @@ -54,7 +54,7 @@ int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn, } } - copy_fn.attr_win_copy_fn = (MPI_Win_internal_copy_attr_function*)win_copy_attr_fn; + copy_fn.attr_win_copy_fn = (MPI_Win_internal_copy_attr_function) win_copy_attr_fn; del_fn.attr_win_delete_fn = win_delete_attr_fn; ret = ompi_attr_create_keyval(WIN_ATTR, copy_fn, del_fn, diff --git a/ompi/mpi/c/win_get_errhandler.c b/ompi/mpi/c/win_get_errhandler.c index 9196e607b83..292f3c706af 100644 --- a/ompi/mpi/c/win_get_errhandler.c +++ b/ompi/mpi/c/win_get_errhandler.c @@ -15,6 +15,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" +#include "ompi/instance/instance.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -42,6 +45,8 @@ static const char FUNC_NAME[] = "MPI_Win_get_errhandler"; int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler) { + int ret = MPI_SUCCESS; + if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_win_invalid(win)) { @@ -60,6 +65,10 @@ int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler) *errhandler = win->error_handler; OPAL_THREAD_UNLOCK(&win->w_lock); + /* make sure the infrastructure is initialized */ + ret = ompi_mpi_instance_retain (); + + /* All done */ - return MPI_SUCCESS; + return ret; } diff --git a/ompi/mpi/c/win_get_info.c b/ompi/mpi/c/win_get_info.c index 83e4fbaf0ee..7b842391735 100644 --- a/ompi/mpi/c/win_get_info.c +++ b/ompi/mpi/c/win_get_info.c @@ -48,19 +48,19 @@ int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used) } if (NULL == win->super.s_info) { -/* - * Setup any defaults if MPI_Win_set_info was never called - */ - opal_infosubscribe_change_info(&win->super, &MPI_INFO_NULL->super); + /* + * Setup any defaults if MPI_Win_set_info was never called + */ + opal_infosubscribe_change_info(&win->super, &MPI_INFO_NULL->super); } - (*info_used) = OBJ_NEW(ompi_info_t); + *info_used = ompi_info_allocate (); if (NULL == (*info_used)) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_NO_MEM, FUNC_NAME); } opal_info_t *opal_info_used = &(*info_used)->super; - ret = opal_info_dup_mpistandard(win->super.s_info, &opal_info_used); + ret = opal_info_dup(win->super.s_info, &opal_info_used); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/wtick.c b/ompi/mpi/c/wtick.c index f6504dccee2..3136a7e1b08 100644 --- a/ompi/mpi/c/wtick.c +++ b/ompi/mpi/c/wtick.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2022 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -31,10 +31,11 @@ #include #endif -#include MCA_timer_IMPLEMENTATION_HEADER #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/mpiruntime.h" +#include "opal/util/clock_gettime.h" + #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Wtick = PMPI_Wtick @@ -44,38 +45,15 @@ double MPI_Wtick(void) { - /* - * See https://github.com/open-mpi/ompi/issues/3003 - * to get an idea what's going on here. - */ -#if 0 -#if OPAL_TIMER_CYCLE_NATIVE - { - opal_timer_t freq = opal_timer_base_get_freq(); - if (0 == freq) { - /* That should never happen, but if it does, return a bogus value - * rather than crashing with a division by zero */ - return (double)0.0; - } - return (double)1.0 / (double)freq; - } -#elif OPAL_TIMER_USEC_NATIVE - return 0.000001; -#endif -#else -#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME + // We intentionally don't use the OPAL timer framework here. See + // https://github.com/open-mpi/ompi/issues/3003 for more details. struct timespec spec; double wtick = 0.0; - if (0 == clock_getres(CLOCK_MONOTONIC, &spec)){ + if (0 == opal_clock_getres(&spec)){ wtick = spec.tv_sec + spec.tv_nsec * 1.0e-09; } else { /* guess */ wtick = 1.0e-09; } return wtick; -#else - /* Otherwise, we already return usec precision. */ - return 0.000001; -#endif -#endif } diff --git a/ompi/mpi/c/wtime.c b/ompi/mpi/c/wtime.c index e22d26d943c..b7918ad5d0d 100644 --- a/ompi/mpi/c/wtime.c +++ b/ompi/mpi/c/wtime.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2022 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -31,11 +31,12 @@ #include #endif /* HAVE_TIME_H */ -#include MCA_timer_IMPLEMENTATION_HEADER #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/mpiruntime.h" #include "ompi/runtime/ompi_spc.h" +#include "opal/util/clock_gettime.h" + #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Wtime = PMPI_Wtime @@ -46,17 +47,9 @@ * and accuracy of the user visible timer. * More info: https://github.com/mpi-forum/mpi-issues/issues/77#issuecomment-369663119 */ -#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME struct timespec ompi_wtime_time_origin = {.tv_sec = 0}; -#else -struct timeval ompi_wtime_time_origin = {.tv_sec = 0}; -#endif #else /* OMPI_BUILD_MPI_PROFILING */ -#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME extern struct timespec ompi_wtime_time_origin; -#else -extern struct timeval ompi_wtime_time_origin; -#endif #endif double MPI_Wtime(void) @@ -65,36 +58,15 @@ double MPI_Wtime(void) SPC_RECORD(OMPI_SPC_WTIME, 1); - /* - * See https://github.com/open-mpi/ompi/issues/3003 to find out - * what's happening here. - */ -#if 0 -#if OPAL_TIMER_CYCLE_NATIVE - wtime = ((double) opal_timer_base_get_cycles()) / opal_timer_base_get_freq(); -#elif OPAL_TIMER_USEC_NATIVE - wtime = ((double) opal_timer_base_get_usec()) / 1000000.0; -#endif -#else -#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME + // We intentionally don't use the OPAL timer framework here. See + // https://github.com/open-mpi/ompi/issues/3003 for more details. struct timespec tp; - (void) clock_gettime(CLOCK_MONOTONIC, &tp); - if( OPAL_UNLIKELY(0 == ompi_wtime_time_origin.tv_sec) ) { + (void) opal_clock_gettime(&tp); + if (OPAL_UNLIKELY(0 == ompi_wtime_time_origin.tv_sec)) { ompi_wtime_time_origin = tp; } wtime = (double)(tp.tv_nsec - ompi_wtime_time_origin.tv_nsec)/1.0e+9; wtime += (tp.tv_sec - ompi_wtime_time_origin.tv_sec); -#else - /* Fall back to gettimeofday() if we have nothing else */ - struct timeval tv; - gettimeofday(&tv, NULL); - if( OPAL_UNLIKELY(0 == ompi_wtime_time_origin.tv_sec) ) { - ompi_wtime_time_origin = tv; - } - wtime = (double)(tv.tv_usec - ompi_wtime_time_origin.tv_usec) / 1.0e+6; - wtime += (tv.tv_sec - ompi_wtime_time_origin.tv_sec); -#endif -#endif return wtime; } diff --git a/ompi/mpi/fortran/mpif-h/Makefile.am b/ompi/mpi/fortran/mpif-h/Makefile.am index f42a0c97c97..f45af28d55e 100644 --- a/ompi/mpi/fortran/mpif-h/Makefile.am +++ b/ompi/mpi/fortran/mpif-h/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2013 Inria. All rights reserved. # Copyright (c) 2011-2013 Universite Bordeaux 1 # Copyright (c) 2013-2018 Los Alamos National Security, LLC. All rights @@ -18,6 +18,9 @@ # and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. # Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. +# Copyright (c) 2021 Triad National Security, LLC. All rights +# reserved. +# # $COPYRIGHT$ # # Additional copyrights may follow @@ -136,17 +139,25 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ aint_add_f.c \ aint_diff_f.c \ allgather_f.c \ + allgather_init_f.c \ allgatherv_f.c \ + allgatherv_init_f.c \ alloc_mem_f.c \ allreduce_f.c \ + allreduce_init_f.c \ alltoall_f.c \ + alltoall_init_f.c \ alltoallv_f.c \ + alltoallv_init_f.c \ alltoallw_f.c \ + alltoallw_init_f.c \ attr_delete_f.c \ attr_get_f.c \ attr_put_f.c \ barrier_f.c \ + barrier_init_f.c \ bcast_f.c \ + bcast_init_f.c \ bsend_f.c \ bsend_init_f.c \ buffer_attach_f.c \ @@ -167,13 +178,16 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ comm_connect_f.c \ comm_create_errhandler_f.c \ comm_create_f.c \ + comm_create_from_group_f.c \ comm_create_group_f.c \ + comm_create_from_group_f.c \ comm_create_keyval_f.c \ comm_delete_attr_f.c \ comm_disconnect_f.c \ comm_dup_f.c \ comm_dup_with_info_f.c \ comm_idup_f.c \ + comm_idup_with_info_f.c \ comm_free_f.c \ comm_free_keyval_f.c \ comm_get_attr_f.c \ @@ -205,6 +219,7 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ error_class_f.c \ error_string_f.c \ exscan_f.c \ + exscan_init_f.c \ f_sync_reg_f.c \ file_call_errhandler_f.c \ file_close_f.c \ @@ -269,7 +284,9 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ finalize_f.c \ free_mem_f.c \ gather_f.c \ + gather_init_f.c \ gatherv_f.c \ + gatherv_init_f.c \ get_address_f.c \ get_count_f.c \ get_elements_f.c \ @@ -288,6 +305,7 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ group_compare_f.c \ group_difference_f.c \ group_excl_f.c \ + group_from_session_pset_f.c \ group_free_f.c \ group_incl_f.c \ group_intersection_f.c \ @@ -323,12 +341,14 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ info_get_f.c \ info_get_nkeys_f.c \ info_get_nthkey_f.c \ + info_get_string_f.c \ info_get_valuelen_f.c \ info_set_f.c \ init_f.c \ initialized_f.c \ init_thread_f.c \ intercomm_create_f.c \ + intercomm_create_from_groups_f.c \ intercomm_merge_f.c \ iprobe_f.c \ irecv_f.c \ @@ -337,6 +357,8 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ ireduce_scatter_block_f.c \ irsend_f.c \ isend_f.c \ + isendrecv_f.c \ + isendrecv_replace_f.c \ iscan_f.c \ iscatter_f.c \ iscatterv_f.c \ @@ -348,10 +370,15 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ mprobe_f.c \ mrecv_f.c \ neighbor_allgather_f.c \ + neighbor_allgather_init_f.c \ neighbor_allgatherv_f.c \ + neighbor_allgatherv_init_f.c \ neighbor_alltoall_f.c \ + neighbor_alltoall_init_f.c \ neighbor_alltoallv_f.c \ + neighbor_alltoallv_init_f.c \ neighbor_alltoallw_f.c \ + neighbor_alltoallw_init_f.c \ op_commutative_f.c \ op_create_f.c \ open_port_f.c \ @@ -373,21 +400,33 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ recv_f.c \ recv_init_f.c \ reduce_f.c \ + reduce_init_f.c \ reduce_local_f.c \ reduce_scatter_f.c \ + reduce_scatter_init_f.c \ reduce_scatter_block_f.c \ + reduce_scatter_block_init_f.c \ register_datarep_f.c \ request_free_f.c \ request_get_status_f.c \ rsend_f.c \ rsend_init_f.c \ scan_f.c \ + scan_init_f.c \ scatter_f.c \ + scatter_init_f.c \ scatterv_f.c \ + scatterv_init_f.c \ send_f.c \ send_init_f.c \ sendrecv_f.c \ sendrecv_replace_f.c \ + session_get_info_f.c \ + session_get_nth_pset_f.c \ + session_get_num_psets_f.c \ + session_get_pset_info_f.c \ + session_init_f.c \ + session_finalize_f.c \ ssend_f.c \ ssend_init_f.c \ startall_f.c \ diff --git a/ompi/mpiext/pcollreq/mpif-h/alltoall_init_f.c b/ompi/mpi/fortran/mpif-h/allgather_init_f.c similarity index 60% rename from ompi/mpiext/pcollreq/mpif-h/alltoall_init_f.c rename to ompi/mpi/fortran/mpif-h/allgather_init_f.c index 0dd6049d813..784bad0628d 100644 --- a/ompi/mpiext/pcollreq/mpif-h/alltoall_init_f.c +++ b/ompi/mpi/fortran/mpif-h/allgather_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,56 +23,55 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_ALLTOALL_INIT = ompix_alltoall_init_f -#pragma weak pmpix_alltoall_init = ompix_alltoall_init_f -#pragma weak pmpix_alltoall_init_ = ompix_alltoall_init_f -#pragma weak pmpix_alltoall_init__ = ompix_alltoall_init_f +#pragma weak PMPI_ALLGATHER_INIT = ompi_allgather_init_f +#pragma weak pmpi_allgather_init = ompi_allgather_init_f +#pragma weak pmpi_allgather_init_ = ompi_allgather_init_f +#pragma weak pmpi_allgather_init__ = ompi_allgather_init_f -#pragma weak PMPIX_Alltoall_init_f = ompix_alltoall_init_f -#pragma weak PMPIX_Alltoall_init_f08 = ompix_alltoall_init_f +#pragma weak PMPI_Allgather_init_f = ompi_allgather_init_f +#pragma weak PMPI_Allgather_init_f08 = ompi_allgather_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_ALLTOALL_INIT, - pmpix_alltoall_init, - pmpix_alltoall_init_, - pmpix_alltoall_init__, - pompix_alltoall_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_ALLGATHER_INIT, + pmpi_allgather_init, + pmpi_allgather_init_, + pmpi_allgather_init__, + pompi_allgather_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_ALLTOALL_INIT = ompix_alltoall_init_f -#pragma weak mpix_alltoall_init = ompix_alltoall_init_f -#pragma weak mpix_alltoall_init_ = ompix_alltoall_init_f -#pragma weak mpix_alltoall_init__ = ompix_alltoall_init_f +#pragma weak MPI_ALLGATHER_INIT = ompi_allgather_init_f +#pragma weak mpi_allgather_init = ompi_allgather_init_f +#pragma weak mpi_allgather_init_ = ompi_allgather_init_f +#pragma weak mpi_allgather_init__ = ompi_allgather_init_f -#pragma weak MPIX_Alltoall_init_f = ompix_alltoall_init_f -#pragma weak MPIX_Alltoall_init_f08 = ompix_alltoall_init_f +#pragma weak MPI_Allgather_init_f = ompi_allgather_init_f +#pragma weak MPI_Allgather_init_f08 = ompi_allgather_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_ALLTOALL_INIT, - mpix_alltoall_init, - mpix_alltoall_init_, - mpix_alltoall_init__, - ompix_alltoall_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_ALLGATHER_INIT, + mpi_allgather_init, + mpi_allgather_init_, + mpi_allgather_init__, + ompi_allgather_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) #else -#define ompix_alltoall_init_f pompix_alltoall_init_f +#define ompi_allgather_init_f pompi_allgather_init_f #endif #endif -void ompix_alltoall_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, +void ompi_allgather_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { - int c_ierr; + int ierr_c; MPI_Comm c_comm; MPI_Request c_req; MPI_Datatype c_sendtype, c_recvtype; @@ -87,13 +86,14 @@ void ompix_alltoall_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtyp sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Alltoall_init(sendbuf, + ierr_c = PMPI_Allgather_init(sendbuf, OMPI_FINT_2_INT(*sendcount), c_sendtype, recvbuf, OMPI_FINT_2_INT(*recvcount), c_recvtype, c_comm, c_info, &c_req); - if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_req); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); + + if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_req); } diff --git a/ompi/mpiext/pcollreq/mpif-h/allgatherv_init_f.c b/ompi/mpi/fortran/mpif-h/allgatherv_init_f.c similarity index 54% rename from ompi/mpiext/pcollreq/mpif-h/allgatherv_init_f.c rename to ompi/mpi/fortran/mpif-h/allgatherv_init_f.c index 9a1576c9e00..494c25586e0 100644 --- a/ompi/mpiext/pcollreq/mpif-h/allgatherv_init_f.c +++ b/ompi/mpi/fortran/mpif-h/allgatherv_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,55 +23,54 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_ALLGATHERV_INIT = ompix_allgatherv_init_f -#pragma weak pmpix_allgatherv_init = ompix_allgatherv_init_f -#pragma weak pmpix_allgatherv_init_ = ompix_allgatherv_init_f -#pragma weak pmpix_allgatherv_init__ = ompix_allgatherv_init_f +#pragma weak PMPI_ALLGATHERV_INIT = ompi_allgatherv_init_f +#pragma weak pmpi_allgatherv_init = ompi_allgatherv_init_f +#pragma weak pmpi_allgatherv_init_ = ompi_allgatherv_init_f +#pragma weak pmpi_allgatherv_init__ = ompi_allgatherv_init_f -#pragma weak PMPIX_Allgatherv_init_f = ompix_allgatherv_init_f -#pragma weak PMPIX_Allgatherv_init_f08 = ompix_allgatherv_init_f +#pragma weak PMPI_Allgatherv_init_f = ompi_allgatherv_init_f +#pragma weak PMPI_Allgatherv_init_f08 = ompi_allgatherv_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_ALLGATHERV_INIT, - pmpix_allgatherv_init, - pmpix_allgatherv_init_, - pmpix_allgatherv_init__, - pompix_allgatherv_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_ALLGATHERV_INIT, + pmpi_allgatherv_init, + pmpi_allgatherv_init_, + pmpi_allgatherv_init__, + pompi_allgatherv_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_ALLGATHERV_INIT = ompix_allgatherv_init_f -#pragma weak mpix_allgatherv_init = ompix_allgatherv_init_f -#pragma weak mpix_allgatherv_init_ = ompix_allgatherv_init_f -#pragma weak mpix_allgatherv_init__ = ompix_allgatherv_init_f +#pragma weak MPI_ALLGATHERV_INIT = ompi_allgatherv_init_f +#pragma weak mpi_allgatherv_init = ompi_allgatherv_init_f +#pragma weak mpi_allgatherv_init_ = ompi_allgatherv_init_f +#pragma weak mpi_allgatherv_init__ = ompi_allgatherv_init_f -#pragma weak MPIX_Allgatherv_init_f = ompix_allgatherv_init_f -#pragma weak MPIX_Allgatherv_init_f08 = ompix_allgatherv_init_f +#pragma weak MPI_Allgatherv_init_f = ompi_allgatherv_init_f +#pragma weak MPI_Allgatherv_init_f08 = ompi_allgatherv_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_ALLGATHERV_INIT, - mpix_allgatherv_init, - mpix_allgatherv_init_, - mpix_allgatherv_init__, - ompix_allgatherv_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_ALLGATHERV_INIT, + mpi_allgatherv_init, + mpi_allgatherv_init_, + mpi_allgatherv_init__, + ompi_allgatherv_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, info, request, ierr) ) #else -#define ompix_allgatherv_init_f pompix_allgatherv_init_f +#define ompi_allgatherv_init_f pompi_allgatherv_init_f #endif #endif -void ompix_allgatherv_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, - MPI_Fint *recvtype, MPI_Fint *comm, - MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_allgatherv_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, + char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, + MPI_Fint *recvtype, MPI_Fint *comm, + MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; @@ -94,13 +93,13 @@ void ompix_allgatherv_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendt sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = PMPIX_Allgatherv_init(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(displs), - c_recvtype, c_comm, c_info, &c_request); + ierr_c = PMPI_Allgatherv_init(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(displs), + c_recvtype, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_request); diff --git a/ompi/mpiext/pcollreq/mpif-h/allreduce_init_f.c b/ompi/mpi/fortran/mpif-h/allreduce_init_f.c similarity index 53% rename from ompi/mpiext/pcollreq/mpif-h/allreduce_init_f.c rename to ompi/mpi/fortran/mpif-h/allreduce_init_f.c index 3ed7f2748bb..48c250d8078 100644 --- a/ompi/mpiext/pcollreq/mpif-h/allreduce_init_f.c +++ b/ompi/mpi/fortran/mpif-h/allreduce_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,54 +23,53 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_ALLREDUCE_INIT = ompix_allreduce_init_f -#pragma weak pmpix_allreduce_init = ompix_allreduce_init_f -#pragma weak pmpix_allreduce_init_ = ompix_allreduce_init_f -#pragma weak pmpix_allreduce_init__ = ompix_allreduce_init_f +#pragma weak PMPI_ALLREDUCE_INIT = ompi_allreduce_init_f +#pragma weak pmpi_allreduce_init = ompi_allreduce_init_f +#pragma weak pmpi_allreduce_init_ = ompi_allreduce_init_f +#pragma weak pmpi_allreduce_init__ = ompi_allreduce_init_f -#pragma weak PMPIX_Allreduce_init_f = ompix_allreduce_init_f -#pragma weak PMPIX_Allreduce_init_f08 = ompix_allreduce_init_f +#pragma weak PMPI_Allreduce_init_f = ompi_allreduce_init_f +#pragma weak PMPI_Allreduce_init_f08 = ompi_allreduce_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_ALLREDUCE_INIT, - pmpix_allreduce_init, - pmpix_allreduce_init_, - pmpix_allreduce_init__, - pompix_allreduce_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_ALLREDUCE_INIT, + pmpi_allreduce_init, + pmpi_allreduce_init_, + pmpi_allreduce_init__, + pompi_allreduce_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_ALLREDUCE_INIT = ompix_allreduce_init_f -#pragma weak mpix_allreduce_init = ompix_allreduce_init_f -#pragma weak mpix_allreduce_init_ = ompix_allreduce_init_f -#pragma weak mpix_allreduce_init__ = ompix_allreduce_init_f +#pragma weak MPI_ALLREDUCE_INIT = ompi_allreduce_init_f +#pragma weak mpi_allreduce_init = ompi_allreduce_init_f +#pragma weak mpi_allreduce_init_ = ompi_allreduce_init_f +#pragma weak mpi_allreduce_init__ = ompi_allreduce_init_f -#pragma weak MPIX_Allreduce_init_f = ompix_allreduce_init_f -#pragma weak MPIX_Allreduce_init_f08 = ompix_allreduce_init_f +#pragma weak MPI_Allreduce_init_f = ompi_allreduce_init_f +#pragma weak MPI_Allreduce_init_f08 = ompi_allreduce_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_ALLREDUCE_INIT, - mpix_allreduce_init, - mpix_allreduce_init_, - mpix_allreduce_init__, - ompix_allreduce_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_ALLREDUCE_INIT, + mpi_allreduce_init, + mpi_allreduce_init_, + mpi_allreduce_init__, + ompi_allreduce_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, info, request, ierr) ) #else -#define ompix_allreduce_init_f pompix_allreduce_init_f +#define ompi_allreduce_init_f pompi_allreduce_init_f #endif #endif -void ompix_allreduce_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, - MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, - MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_allreduce_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, + MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, + MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { int ierr_c; MPI_Comm c_comm; @@ -88,9 +87,9 @@ void ompix_allreduce_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = PMPIX_Allreduce_init(sendbuf, recvbuf, - OMPI_FINT_2_INT(*count), - c_type, c_op, c_comm, c_info, &c_request); + ierr_c = PMPI_Allreduce_init(sendbuf, recvbuf, + OMPI_FINT_2_INT(*count), + c_type, c_op, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/alltoall_init_f.c b/ompi/mpi/fortran/mpif-h/alltoall_init_f.c new file mode 100644 index 00000000000..baa268533e2 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/alltoall_init_f.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_ALLTOALL_INIT = ompi_alltoall_init_f +#pragma weak pmpi_alltoall_init = ompi_alltoall_init_f +#pragma weak pmpi_alltoall_init_ = ompi_alltoall_init_f +#pragma weak pmpi_alltoall_init__ = ompi_alltoall_init_f + +#pragma weak PMPI_Alltoall_init_f = ompi_alltoall_init_f +#pragma weak PMPI_Alltoall_init_f08 = ompi_alltoall_init_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALL_INIT, + pmpi_alltoall_init, + pmpi_alltoall_init_, + pmpi_alltoall_init__, + pompi_alltoall_init_f, + (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), + (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_ALLTOALL_INIT = ompi_alltoall_init_f +#pragma weak mpi_alltoall_init = ompi_alltoall_init_f +#pragma weak mpi_alltoall_init_ = ompi_alltoall_init_f +#pragma weak mpi_alltoall_init__ = ompi_alltoall_init_f + +#pragma weak MPI_Alltoall_init_f = ompi_alltoall_init_f +#pragma weak MPI_Alltoall_init_f08 = ompi_alltoall_init_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_ALLTOALL_INIT, + mpi_alltoall_init, + mpi_alltoall_init_, + mpi_alltoall_init__, + ompi_alltoall_init_f, + (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), + (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) +#else +#define ompi_alltoall_init_f pompi_alltoall_init_f +#endif +#endif + + +void ompi_alltoall_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, + char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, + MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Comm c_comm; + MPI_Request c_req; + MPI_Datatype c_sendtype, c_recvtype; + MPI_Info c_info; + + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); + c_info = PMPI_Info_f2c(*info); + + sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); + sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); + recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); + + c_ierr = PMPI_Alltoall_init(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_FINT_2_INT(*recvcount), + c_recvtype, c_comm, c_info, &c_req); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_req); +} diff --git a/ompi/mpiext/pcollreq/mpif-h/alltoallv_init_f.c b/ompi/mpi/fortran/mpif-h/alltoallv_init_f.c similarity index 56% rename from ompi/mpiext/pcollreq/mpif-h/alltoallv_init_f.c rename to ompi/mpi/fortran/mpif-h/alltoallv_init_f.c index 562b10863ea..bfd44f97927 100644 --- a/ompi/mpiext/pcollreq/mpif-h/alltoallv_init_f.c +++ b/ompi/mpi/fortran/mpif-h/alltoallv_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,55 +23,54 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_ALLTOALLV_INIT = ompix_alltoallv_init_f -#pragma weak pmpix_alltoallv_init = ompix_alltoallv_init_f -#pragma weak pmpix_alltoallv_init_ = ompix_alltoallv_init_f -#pragma weak pmpix_alltoallv_init__ = ompix_alltoallv_init_f +#pragma weak PMPI_ALLTOALLV_INIT = ompi_alltoallv_init_f +#pragma weak pmpi_alltoallv_init = ompi_alltoallv_init_f +#pragma weak pmpi_alltoallv_init_ = ompi_alltoallv_init_f +#pragma weak pmpi_alltoallv_init__ = ompi_alltoallv_init_f -#pragma weak PMPIX_Alltoallv_init_f = ompix_alltoallv_init_f -#pragma weak PMPIX_Alltoallv_init_f08 = ompix_alltoallv_init_f +#pragma weak PMPI_Alltoallv_init_f = ompi_alltoallv_init_f +#pragma weak PMPI_Alltoallv_init_f08 = ompi_alltoallv_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_ALLTOALLV_INIT, - pmpix_alltoallv_init, - pmpix_alltoallv_init_, - pmpix_alltoallv_init__, - pompix_alltoallv_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALLV_INIT, + pmpi_alltoallv_init, + pmpi_alltoallv_init_, + pmpi_alltoallv_init__, + pompi_alltoallv_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_ALLTOALLV_INIT = ompix_alltoallv_init_f -#pragma weak mpix_alltoallv_init = ompix_alltoallv_init_f -#pragma weak mpix_alltoallv_init_ = ompix_alltoallv_init_f -#pragma weak mpix_alltoallv_init__ = ompix_alltoallv_init_f +#pragma weak MPI_ALLTOALLV_INIT = ompi_alltoallv_init_f +#pragma weak mpi_alltoallv_init = ompi_alltoallv_init_f +#pragma weak mpi_alltoallv_init_ = ompi_alltoallv_init_f +#pragma weak mpi_alltoallv_init__ = ompi_alltoallv_init_f -#pragma weak MPIX_Alltoallv_init_f = ompix_alltoallv_init_f -#pragma weak MPIX_Alltoallv_init_f08 = ompix_alltoallv_init_f +#pragma weak MPI_Alltoallv_init_f = ompi_alltoallv_init_f +#pragma weak MPI_Alltoallv_init_f08 = ompi_alltoallv_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_ALLTOALLV_INIT, - mpix_alltoallv_init, - mpix_alltoallv_init_, - mpix_alltoallv_init__, - ompix_alltoallv_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_ALLTOALLV_INIT, + mpi_alltoallv_init, + mpi_alltoallv_init_, + mpi_alltoallv_init__, + ompi_alltoallv_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, info, request, ierr) ) #else -#define ompix_alltoallv_init_f pompix_alltoallv_init_f +#define ompi_alltoallv_init_f pompi_alltoallv_init_f #endif #endif -void ompix_alltoallv_init_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, - MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, - MPI_Fint *rdispls, MPI_Fint *recvtype, - MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_alltoallv_init_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, + MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, + MPI_Fint *rdispls, MPI_Fint *recvtype, + MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; @@ -98,14 +97,14 @@ void ompix_alltoallv_init_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdisp sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Alltoallv_init(sendbuf, - OMPI_ARRAY_NAME_CONVERT(sendcounts), - OMPI_ARRAY_NAME_CONVERT(sdispls), - c_sendtype, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(rdispls), - c_recvtype, c_comm, c_info, &c_request); + c_ierr = PMPI_Alltoallv_init(sendbuf, + OMPI_ARRAY_NAME_CONVERT(sendcounts), + OMPI_ARRAY_NAME_CONVERT(sdispls), + c_sendtype, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(rdispls), + c_recvtype, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); diff --git a/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c b/ompi/mpi/fortran/mpif-h/alltoallw_init_f.c similarity index 61% rename from ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c rename to ompi/mpi/fortran/mpif-h/alltoallw_init_f.c index 0fae1e194db..62bee5441be 100644 --- a/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c +++ b/ompi/mpi/fortran/mpif-h/alltoallw_init_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2019 Research Organization for Information Science + * Copyright (c) 2015-2021 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -24,56 +24,55 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" #include "ompi/communicator/communicator.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_ALLTOALLW_INIT = ompix_alltoallw_init_f -#pragma weak pmpix_alltoallw_init = ompix_alltoallw_init_f -#pragma weak pmpix_alltoallw_init_ = ompix_alltoallw_init_f -#pragma weak pmpix_alltoallw_init__ = ompix_alltoallw_init_f +#pragma weak PMPI_ALLTOALLW_INIT = ompi_alltoallw_init_f +#pragma weak pmpi_alltoallw_init = ompi_alltoallw_init_f +#pragma weak pmpi_alltoallw_init_ = ompi_alltoallw_init_f +#pragma weak pmpi_alltoallw_init__ = ompi_alltoallw_init_f -#pragma weak PMPIX_Alltoallw_init_f = ompix_alltoallw_init_f -#pragma weak PMPIX_Alltoallw_init_f08 = ompix_alltoallw_init_f +#pragma weak PMPI_Alltoallw_init_f = ompi_alltoallw_init_f +#pragma weak PMPI_Alltoallw_init_f08 = ompi_alltoallw_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_ALLTOALLW_INIT, - pmpix_alltoallw_init, - pmpix_alltoallw_init_, - pmpix_alltoallw_init__, - pompix_alltoallw_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALLW_INIT, + pmpi_alltoallw_init, + pmpi_alltoallw_init_, + pmpi_alltoallw_init__, + pompi_alltoallw_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_ALLTOALLW_INIT = ompix_alltoallw_init_f -#pragma weak mpix_alltoallw_init = ompix_alltoallw_init_f -#pragma weak mpix_alltoallw_init_ = ompix_alltoallw_init_f -#pragma weak mpix_alltoallw_init__ = ompix_alltoallw_init_f +#pragma weak MPI_ALLTOALLW_INIT = ompi_alltoallw_init_f +#pragma weak mpi_alltoallw_init = ompi_alltoallw_init_f +#pragma weak mpi_alltoallw_init_ = ompi_alltoallw_init_f +#pragma weak mpi_alltoallw_init__ = ompi_alltoallw_init_f -#pragma weak MPIX_Alltoallw_init_f = ompix_alltoallw_init_f -#pragma weak MPIX_Alltoallw_init_f08 = ompix_alltoallw_init_f +#pragma weak MPI_Alltoallw_init_f = ompi_alltoallw_init_f +#pragma weak MPI_Alltoallw_init_f08 = ompi_alltoallw_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_ALLTOALLW_INIT, - mpix_alltoallw_init, - mpix_alltoallw_init_, - mpix_alltoallw_init__, - ompix_alltoallw_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_ALLTOALLW_INIT, + mpi_alltoallw_init, + mpi_alltoallw_init_, + mpi_alltoallw_init__, + ompi_alltoallw_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, info, request, ierr) ) #else -#define ompix_alltoallw_init_f pompix_alltoallw_init_f +#define ompi_alltoallw_init_f pompi_alltoallw_init_f #endif #endif -void ompix_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, - MPI_Fint *sdispls, MPI_Fint *sendtypes, - char *recvbuf, MPI_Fint *recvcounts, - MPI_Fint *rdispls, MPI_Fint *recvtypes, - MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, + MPI_Fint *sdispls, MPI_Fint *sendtypes, + char *recvbuf, MPI_Fint *recvcounts, + MPI_Fint *rdispls, MPI_Fint *recvtypes, + MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; MPI_Datatype *c_sendtypes = NULL, *c_recvtypes; @@ -109,14 +108,14 @@ void ompix_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Alltoallw_init(sendbuf, - OMPI_ARRAY_NAME_CONVERT(sendcounts), - OMPI_ARRAY_NAME_CONVERT(sdispls), - c_sendtypes, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(rdispls), - c_recvtypes, c_comm, c_info, &c_request); + c_ierr = PMPI_Alltoallw_init(sendbuf, + OMPI_ARRAY_NAME_CONVERT(sendcounts), + OMPI_ARRAY_NAME_CONVERT(sdispls), + c_sendtypes, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(rdispls), + c_recvtypes, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); diff --git a/ompi/mpiext/pcollreq/mpif-h/barrier_init_f.c b/ompi/mpi/fortran/mpif-h/barrier_init_f.c similarity index 51% rename from ompi/mpiext/pcollreq/mpif-h/barrier_init_f.c rename to ompi/mpi/fortran/mpif-h/barrier_init_f.c index 3b6b6d828ce..f82ac1df1c8 100644 --- a/ompi/mpiext/pcollreq/mpif-h/barrier_init_f.c +++ b/ompi/mpi/fortran/mpif-h/barrier_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,52 +22,51 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_BARRIER_INIT = ompix_barrier_init_f -#pragma weak pmpix_barrier_init = ompix_barrier_init_f -#pragma weak pmpix_barrier_init_ = ompix_barrier_init_f -#pragma weak pmpix_barrier_init__ = ompix_barrier_init_f +#pragma weak PMPI_BARRIER_INIT = ompi_barrier_init_f +#pragma weak pmpi_barrier_init = ompi_barrier_init_f +#pragma weak pmpi_barrier_init_ = ompi_barrier_init_f +#pragma weak pmpi_barrier_init__ = ompi_barrier_init_f -#pragma weak PMPIX_Barrier_init_f = ompix_barrier_init_f -#pragma weak PMPIX_Barrier_init_f08 = ompix_barrier_init_f +#pragma weak PMPI_Barrier_init_f = ompi_barrier_init_f +#pragma weak PMPI_Barrier_init_f08 = ompi_barrier_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_BARRIER_INIT, - pmpix_barrier_init, - pmpix_barrier_init_, - pmpix_barrier_init__, - pompix_barrier_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_BARRIER_INIT, + pmpi_barrier_init, + pmpi_barrier_init_, + pmpi_barrier_init__, + pompi_barrier_init_f, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_BARRIER_INIT = ompix_barrier_init_f -#pragma weak mpix_barrier_init = ompix_barrier_init_f -#pragma weak mpix_barrier_init_ = ompix_barrier_init_f -#pragma weak mpix_barrier_init__ = ompix_barrier_init_f +#pragma weak MPI_BARRIER_INIT = ompi_barrier_init_f +#pragma weak mpi_barrier_init = ompi_barrier_init_f +#pragma weak mpi_barrier_init_ = ompi_barrier_init_f +#pragma weak mpi_barrier_init__ = ompi_barrier_init_f -#pragma weak MPIX_Barrier_init_f = ompix_barrier_init_f -#pragma weak MPIX_Barrier_init_f08 = ompix_barrier_init_f +#pragma weak MPI_Barrier_init_f = ompi_barrier_init_f +#pragma weak MPI_Barrier_init_f08 = ompi_barrier_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_BARRIER_INIT, - mpix_barrier_init, - mpix_barrier_init_, - mpix_barrier_init__, - ompix_barrier_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_BARRIER_INIT, + mpi_barrier_init, + mpi_barrier_init_, + mpi_barrier_init__, + ompi_barrier_init_f, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (comm, info, request, ierr) ) #else -#define ompix_barrier_init_f pompix_barrier_init_f +#define ompi_barrier_init_f pompi_barrier_init_f #endif #endif -void ompix_barrier_init_f(MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_barrier_init_f(MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { int ierr_c; MPI_Comm c_comm; @@ -77,7 +76,7 @@ void ompix_barrier_init_f(MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI c_comm = PMPI_Comm_f2c(*comm); c_info = PMPI_Info_f2c(*info); - ierr_c = PMPIX_Barrier_init(c_comm, c_info, &c_req); + ierr_c = PMPI_Barrier_init(c_comm, c_info, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_req); diff --git a/ompi/mpiext/pcollreq/mpif-h/bcast_init_f.c b/ompi/mpi/fortran/mpif-h/bcast_init_f.c similarity index 51% rename from ompi/mpiext/pcollreq/mpif-h/bcast_init_f.c rename to ompi/mpi/fortran/mpif-h/bcast_init_f.c index f9385123a3a..8cd12934870 100644 --- a/ompi/mpiext/pcollreq/mpif-h/bcast_init_f.c +++ b/ompi/mpi/fortran/mpif-h/bcast_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,54 +23,53 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_BCAST_INIT = ompix_bcast_init_f -#pragma weak pmpix_bcast_init = ompix_bcast_init_f -#pragma weak pmpix_bcast_init_ = ompix_bcast_init_f -#pragma weak pmpix_bcast_init__ = ompix_bcast_init_f +#pragma weak PMPI_BCAST_INIT = ompi_bcast_init_f +#pragma weak pmpi_bcast_init = ompi_bcast_init_f +#pragma weak pmpi_bcast_init_ = ompi_bcast_init_f +#pragma weak pmpi_bcast_init__ = ompi_bcast_init_f -#pragma weak PMPIX_Bcast_init_f = ompix_bcast_init_f -#pragma weak PMPIX_Bcast_init_f08 = ompix_bcast_init_f +#pragma weak PMPI_Bcast_init_f = ompi_bcast_init_f +#pragma weak PMPI_Bcast_init_f08 = ompi_bcast_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_BCAST_INIT, - pmpix_bcast_init, - pmpix_bcast_init_, - pmpix_bcast_init__, - pompix_bcast_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_BCAST_INIT, + pmpi_bcast_init, + pmpi_bcast_init_, + pmpi_bcast_init__, + pompi_bcast_init_f, (char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (buffer, count, datatype, root, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_BCAST_INIT = ompix_bcast_init_f -#pragma weak mpix_bcast_init = ompix_bcast_init_f -#pragma weak mpix_bcast_init_ = ompix_bcast_init_f -#pragma weak mpix_bcast_init__ = ompix_bcast_init_f +#pragma weak MPI_BCAST_INIT = ompi_bcast_init_f +#pragma weak mpi_bcast_init = ompi_bcast_init_f +#pragma weak mpi_bcast_init_ = ompi_bcast_init_f +#pragma weak mpi_bcast_init__ = ompi_bcast_init_f -#pragma weak MPIX_Bcast_init_f = ompix_bcast_init_f -#pragma weak MPIX_Bcast_init_f08 = ompix_bcast_init_f +#pragma weak MPI_Bcast_init_f = ompi_bcast_init_f +#pragma weak MPI_Bcast_init_f08 = ompi_bcast_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_BCAST_INIT, - mpix_bcast_init, - mpix_bcast_init_, - mpix_bcast_init__, - ompix_bcast_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_BCAST_INIT, + mpi_bcast_init, + mpi_bcast_init_, + mpi_bcast_init__, + ompi_bcast_init_f, (char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (buffer, count, datatype, root, comm, info, request, ierr) ) #else -#define ompix_bcast_init_f pompix_bcast_init_f +#define ompi_bcast_init_f pompi_bcast_init_f #endif #endif -void ompix_bcast_init_f(char *buffer, MPI_Fint *count, MPI_Fint *datatype, - MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, - MPI_Fint *ierr) +void ompi_bcast_init_f(char *buffer, MPI_Fint *count, MPI_Fint *datatype, + MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, + MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; @@ -82,13 +81,13 @@ void ompix_bcast_init_f(char *buffer, MPI_Fint *count, MPI_Fint *datatype, c_type = PMPI_Type_f2c(*datatype); c_info = PMPI_Info_f2c(*info); - c_ierr = PMPIX_Bcast_init(OMPI_F2C_BOTTOM(buffer), - OMPI_FINT_2_INT(*count), - c_type, - OMPI_FINT_2_INT(*root), - c_comm, - c_info, - &c_req); + c_ierr = PMPI_Bcast_init(OMPI_F2C_BOTTOM(buffer), + OMPI_FINT_2_INT(*count), + c_type, + OMPI_FINT_2_INT(*root), + c_comm, + c_info, + &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_req); } diff --git a/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c b/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c index 480e832242f..9881590dd11 100644 --- a/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c @@ -74,8 +74,9 @@ void ompi_comm_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t *functio MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; - MPI_Errhandler c_errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, + MPI_Errhandler c_errhandler; + + c_errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_FORTRAN); if (MPI_ERRHANDLER_NULL != c_errhandler) { diff --git a/ompi/mpi/fortran/mpif-h/comm_create_from_group_f.c b/ompi/mpi/fortran/mpif-h/comm_create_from_group_f.c new file mode 100644 index 00000000000..58e75c9af12 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/comm_create_from_group_f.c @@ -0,0 +1,112 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019-2021 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" +#include "ompi/instance/instance.h" +#include "ompi/group/group.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_COMM_CREATE_FROM_GROUP = ompi_comm_create_from_group_f +#pragma weak pmpi_comm_create_from_group = ompi_comm_create_from_group_f +#pragma weak pmpi_comm_create_from_group_ = ompi_comm_create_from_group_f +#pragma weak pmpi_comm_create_from_group__ = ompi_comm_create_from_group_f + +#pragma weak PMPI_Comm_create_from_group_f = ompi_comm_create_from_group_f +#pragma weak PMPI_Comm_create_from_group_f08 = ompi_comm_create_from_group_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_FROM_GROUP, + pmpi_comm_create_from_group, + pmpi_comm_create_from_group_, + pmpi_comm_create_from_group__, + pmpi_comm_create_from_group_f, + (MPI_Fint *goup, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newcomm, MPI_Fint *ierr, int name_len), + (group, stringtag, info, errhandler, newcomm, ierr, name_len) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_COMM_CREATE_FROM_GROUP = ompi_comm_create_from_group_f +#pragma weak mpi_comm_create_from_group = ompi_comm_create_from_group_f +#pragma weak mpi_comm_create_from_group_ = ompi_comm_create_from_group_f +#pragma weak mpi_comm_create_from_group__ = ompi_comm_create_from_group_f + +#pragma weak MPI_Comm_create_from_group_f = ompi_comm_create_from_group_f +#pragma weak MPI_Comm_create_from_group_f08 = ompi_comm_create_from_group_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_FROM_GROUP, + mpi_comm_create_from_group, + mpi_comm_create_from_group_, + mpi_comm_create_from_group__, + ompi_comm_create_from_group_f, + (MPI_Fint *goup, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newcomm, MPI_Fint *ierr, int name_len), + (group, stringtag, info, errhandler, newcomm, ierr, name_len) ) +#else +#define ompi_comm_create_from_group_f pompi_comm_create_from_group_f +#endif +#endif + +void ompi_comm_create_from_group_f(MPI_Fint *group, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newcomm, MPI_Fint *ierr, int name_len) +{ + int c_ierr, ret; + MPI_Group c_group; + char *c_tag; + MPI_Comm c_comm; + MPI_Info c_info; + MPI_Errhandler c_err; + + c_group = PMPI_Group_f2c(*group); + c_info = PMPI_Info_f2c(*info); + c_err = PMPI_Errhandler_f2c(*errhandler); + + /* Convert the fortran string */ + + /* Convert the fortran string */ + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(stringtag, name_len, + &c_tag))) { + c_ierr = OMPI_ERRHANDLER_INVOKE(ompi_group_get_instance(c_group), ret, "MPI_COMM_CREATE_FROM_GROUP"); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + return; + } + + c_ierr = PMPI_Comm_create_from_group(c_group, c_tag, c_info, c_err, &c_comm); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *newcomm = PMPI_Comm_c2f (c_comm); + } + + /* Free the C tag */ + + free(c_tag); +} + diff --git a/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c b/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c index 51bb77363ff..fb069b7a47e 100644 --- a/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c @@ -34,13 +34,13 @@ #pragma weak PMPI_Comm_create_keyval_f = ompi_comm_create_keyval_f #pragma weak PMPI_Comm_create_keyval_f08 = ompi_comm_create_keyval_f #else -OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_KEYVAL, - pmpi_comm_create_keyval, - pmpi_comm_create_keyval_, - pmpi_comm_create_keyval__, +OMPI_GENERATE_F77_BINDINGS(PMPI_COMM_CREATE_KEYVAL, pmpi_comm_create_keyval, + pmpi_comm_create_keyval_, pmpi_comm_create_keyval__, pompi_comm_create_keyval_f, - (ompi_aint_copy_attr_function* comm_copy_attr_fn, ompi_aint_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), - (comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierr) ) + (ompi_aint_copy_attr_function comm_copy_attr_fn, + ompi_aint_delete_attr_function comm_delete_attr_fn, + MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierr)) #endif #endif @@ -54,13 +54,12 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_KEYVAL, #pragma weak MPI_Comm_create_keyval_f08 = ompi_comm_create_keyval_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_KEYVAL, - mpi_comm_create_keyval, - mpi_comm_create_keyval_, - mpi_comm_create_keyval__, - ompi_comm_create_keyval_f, - (ompi_aint_copy_attr_function* comm_copy_attr_fn, ompi_aint_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), - (comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierr) ) +OMPI_GENERATE_F77_BINDINGS(MPI_COMM_CREATE_KEYVAL, mpi_comm_create_keyval, mpi_comm_create_keyval_, + mpi_comm_create_keyval__, ompi_comm_create_keyval_f, + (ompi_aint_copy_attr_function comm_copy_attr_fn, + ompi_aint_delete_attr_function comm_delete_attr_fn, + MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierr)) #else #define ompi_comm_create_keyval_f pompi_comm_create_keyval_f #endif @@ -68,11 +67,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_KEYVAL, static const char FUNC_NAME[] = "MPI_Comm_create_keyval_f"; - -void ompi_comm_create_keyval_f(ompi_aint_copy_attr_function* comm_copy_attr_fn, - ompi_aint_delete_attr_function* comm_delete_attr_fn, - MPI_Fint *comm_keyval, - MPI_Aint *extra_state, MPI_Fint *ierr) +void ompi_comm_create_keyval_f(ompi_aint_copy_attr_function comm_copy_attr_fn, + ompi_aint_delete_attr_function comm_delete_attr_fn, + MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr) { int ret, c_ierr; OMPI_SINGLE_NAME_DECL(comm_keyval); diff --git a/ompi/mpi/fortran/mpif-h/comm_idup_with_info_f.c b/ompi/mpi/fortran/mpif-h/comm_idup_with_info_f.c new file mode 100644 index 00000000000..c021e502ffe --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/comm_idup_with_info_f.c @@ -0,0 +1,92 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_COMM_IDUP_WITH_INFO = ompi_comm_idup_with_info_f +#pragma weak pmpi_comm_idup_with_info = ompi_comm_idup_with_info_f +#pragma weak pmpi_comm_idup_with_info_ = ompi_comm_idup_with_info_f +#pragma weak pmpi_comm_idup_with_info__ = ompi_comm_idup_with_info_f + +#pragma weak PMPI_Comm_idup_with_info_f = ompi_comm_idup_with_info_f +#pragma weak PMPI_Comm_idup_with_info_f08 = ompi_comm_idup_with_info_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_IDUP_WITH_INFO, + pmpi_comm_idup_with_info, + pmpi_comm_idup_with_info_, + pmpi_comm_idup_with_info__, + pompi_comm_idup_with_info_f, + (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *request, MPI_Fint *ierr), + (comm, info, newcomm, request, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_COMM_IDUP_WITH_INFO = ompi_comm_idup_with_info_f +#pragma weak mpi_comm_idup_with_info = ompi_comm_idup_with_info_f +#pragma weak mpi_comm_idup_with_info_ = ompi_comm_idup_with_info_f +#pragma weak mpi_comm_idup_with_info__ = ompi_comm_idup_with_info_f + +#pragma weak MPI_Comm_idup_with_info_f = ompi_comm_idup_with_info_f +#pragma weak MPI_Comm_idup_with_info_f08 = ompi_comm_idup_with_info_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_COMM_IDUP_WITH_INFO, + mpi_comm_idup_with_info, + mpi_comm_idup_with_info_, + mpi_comm_idup_with_info__, + ompi_comm_idup_with_info_f, + (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *request, MPI_Fint *ierr), + (comm, info, newcomm, request, ierr) ) +#else +#define ompi_comm_idup_with_info_f pompi_comm_idup_with_info_f +#endif +#endif + + +void ompi_comm_idup_with_info_f(MPI_Fint *comm, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *request, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Comm c_newcomm; + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + MPI_Info c_info; + MPI_Request c_req; + + c_info = PMPI_Info_f2c(*info); + + c_ierr = PMPI_Comm_idup_with_info(c_comm, c_info, &c_newcomm, &c_req); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *newcomm = PMPI_Comm_c2f(c_newcomm); + *request = PMPI_Request_c2f(c_req); + } +} diff --git a/ompi/mpiext/pcollreq/mpif-h/exscan_init_f.c b/ompi/mpi/fortran/mpif-h/exscan_init_f.c similarity index 55% rename from ompi/mpiext/pcollreq/mpif-h/exscan_init_f.c rename to ompi/mpi/fortran/mpif-h/exscan_init_f.c index d44594e7c36..142944aa5c5 100644 --- a/ompi/mpiext/pcollreq/mpif-h/exscan_init_f.c +++ b/ompi/mpi/fortran/mpif-h/exscan_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,54 +23,53 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_EXSCAN_INIT = ompix_exscan_init_f -#pragma weak pmpix_exscan_init = ompix_exscan_init_f -#pragma weak pmpix_exscan_init_ = ompix_exscan_init_f -#pragma weak pmpix_exscan_init__ = ompix_exscan_init_f +#pragma weak PMPI_EXSCAN_INIT = ompi_exscan_init_f +#pragma weak pmpi_exscan_init = ompi_exscan_init_f +#pragma weak pmpi_exscan_init_ = ompi_exscan_init_f +#pragma weak pmpi_exscan_init__ = ompi_exscan_init_f -#pragma weak PMPIX_Exscan_init_f = ompix_exscan_init_f -#pragma weak PMPIX_Exscan_init_f08 = ompix_exscan_init_f +#pragma weak PMPI_Exscan_init_f = ompi_exscan_init_f +#pragma weak PMPI_Exscan_init_f08 = ompi_exscan_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_EXSCAN_INIT, - pmpix_exscan_init, - pmpix_exscan_init_, - pmpix_exscan_init__, - pompix_exscan_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_EXSCAN_INIT, + pmpi_exscan_init, + pmpi_exscan_init_, + pmpi_exscan_init__, + pompi_exscan_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_EXSCAN_INIT = ompix_exscan_init_f -#pragma weak mpix_exscan_init = ompix_exscan_init_f -#pragma weak mpix_exscan_init_ = ompix_exscan_init_f -#pragma weak mpix_exscan_init__ = ompix_exscan_init_f +#pragma weak MPI_EXSCAN_INIT = ompi_exscan_init_f +#pragma weak mpi_exscan_init = ompi_exscan_init_f +#pragma weak mpi_exscan_init_ = ompi_exscan_init_f +#pragma weak mpi_exscan_init__ = ompi_exscan_init_f -#pragma weak MPIX_Exscan_init_f = ompix_exscan_init_f -#pragma weak MPIX_Exscan_init_f08 = ompix_exscan_init_f +#pragma weak MPI_Exscan_init_f = ompi_exscan_init_f +#pragma weak MPI_Exscan_init_f08 = ompi_exscan_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_EXSCAN_INIT, - mpix_exscan_init, - mpix_exscan_init_, - mpix_exscan_init__, - ompix_exscan_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_EXSCAN_INIT, + mpi_exscan_init, + mpi_exscan_init_, + mpi_exscan_init__, + ompi_exscan_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, info, request, ierr) ) #else -#define ompix_exscan_init_f pompix_exscan_init_f +#define ompi_exscan_init_f pompi_exscan_init_f #endif #endif -void ompix_exscan_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, - MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, - MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_exscan_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, + MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, + MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; @@ -88,9 +87,9 @@ void ompix_exscan_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, sendbuf = (char *) OMPI_F2C_BOTTOM (sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM (recvbuf); - c_ierr = PMPIX_Exscan_init(sendbuf, recvbuf, - OMPI_FINT_2_INT(*count), - c_type, c_op, c_comm, c_info, &c_request); + c_ierr = PMPI_Exscan_init(sendbuf, recvbuf, + OMPI_FINT_2_INT(*count), + c_type, c_op, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c b/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c index 29ac3ffe365..a20af467621 100644 --- a/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c @@ -74,8 +74,8 @@ void ompi_file_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t* functio MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; - MPI_Errhandler c_errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, + MPI_Errhandler c_errhandler; + c_errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_FORTRAN); if (MPI_ERRHANDLER_NULL != c_errhandler) { diff --git a/ompi/mpiext/pcollreq/mpif-h/scatter_init_f.c b/ompi/mpi/fortran/mpif-h/gather_init_f.c similarity index 51% rename from ompi/mpiext/pcollreq/mpif-h/scatter_init_f.c rename to ompi/mpi/fortran/mpif-h/gather_init_f.c index ae48bac6929..19e7197cc87 100644 --- a/ompi/mpiext/pcollreq/mpif-h/scatter_init_f.c +++ b/ompi/mpi/fortran/mpif-h/gather_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,76 +23,76 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_SCATTER_INIT = ompix_scatter_init_f -#pragma weak pmpix_scatter_init = ompix_scatter_init_f -#pragma weak pmpix_scatter_init_ = ompix_scatter_init_f -#pragma weak pmpix_scatter_init__ = ompix_scatter_init_f +#pragma weak PMPI_GATHER_INIT = ompi_gather_init_f +#pragma weak pmpi_gather_init = ompi_gather_init_f +#pragma weak pmpi_gather_init_ = ompi_gather_init_f +#pragma weak pmpi_gather_init__ = ompi_gather_init_f -#pragma weak PMPIX_Scatter_init_f = ompix_scatter_init_f -#pragma weak PMPIX_Scatter_init_f08 = ompix_scatter_init_f +#pragma weak PMPI_Gather_init_f = ompi_gather_init_f +#pragma weak PMPI_Gather_init_f08 = ompi_gather_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_SCATTER_INIT, - pmpix_scatter_init, - pmpix_scatter_init_, - pmpix_scatter_init__, - pompix_scatter_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_GATHER_INIT, + pmpi_gather_init, + pmpi_gather_init_, + pmpi_gather_init__, + pompi_gather_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_SCATTER_INIT = ompix_scatter_init_f -#pragma weak mpix_scatter_init = ompix_scatter_init_f -#pragma weak mpix_scatter_init_ = ompix_scatter_init_f -#pragma weak mpix_scatter_init__ = ompix_scatter_init_f +#pragma weak MPI_GATHER_INIT = ompi_gather_init_f +#pragma weak mpi_gather_init = ompi_gather_init_f +#pragma weak mpi_gather_init_ = ompi_gather_init_f +#pragma weak mpi_gather_init__ = ompi_gather_init_f -#pragma weak MPIX_Scatter_init_f = ompix_scatter_init_f -#pragma weak MPIX_Scatter_init_f08 = ompix_scatter_init_f +#pragma weak MPI_Gather_init_f = ompi_gather_init_f +#pragma weak MPI_Gather_init_f08 = ompi_gather_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_SCATTER_INIT, - mpix_scatter_init, - mpix_scatter_init_, - mpix_scatter_init__, - ompix_scatter_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_GATHER_INIT, + mpi_gather_init, + mpi_gather_init_, + mpi_gather_init__, + ompi_gather_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, ierr) ) #else -#define ompix_scatter_init_f pompix_scatter_init_f +#define ompi_gather_init_f pompi_gather_init_f #endif #endif -void ompix_scatter_init_f(char *sendbuf, MPI_Fint *sendcount, - MPI_Fint *sendtype, char *recvbuf, - MPI_Fint *recvcount, MPI_Fint *recvtype, - MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, - MPI_Fint *ierr) +void ompi_gather_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, + char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, + MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, + MPI_Fint *ierr) { int c_ierr; + MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; MPI_Info c_info; MPI_Request c_request; - MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); c_sendtype = PMPI_Type_f2c(*sendtype); c_recvtype = PMPI_Type_f2c(*recvtype); c_info = PMPI_Info_f2c(*info); + sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); - recvbuf = (char *) OMPI_F2C_IN_PLACE(recvbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Scatter_init(sendbuf,OMPI_FINT_2_INT(*sendcount), - c_sendtype, recvbuf, - OMPI_FINT_2_INT(*recvcount), - c_recvtype, - OMPI_FINT_2_INT(*root), c_comm, c_info, &c_request); + c_ierr = PMPI_Gather_init(sendbuf, OMPI_FINT_2_INT(*sendcount), + c_sendtype, recvbuf, + OMPI_FINT_2_INT(*recvcount), + c_recvtype, + OMPI_FINT_2_INT(*root), + c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpiext/pcollreq/mpif-h/gatherv_init_f.c b/ompi/mpi/fortran/mpif-h/gatherv_init_f.c similarity index 55% rename from ompi/mpiext/pcollreq/mpif-h/gatherv_init_f.c rename to ompi/mpi/fortran/mpif-h/gatherv_init_f.c index 96376b78ca0..4f4415d7603 100644 --- a/ompi/mpiext/pcollreq/mpif-h/gatherv_init_f.c +++ b/ompi/mpi/fortran/mpif-h/gatherv_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,55 +23,54 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_GATHERV_INIT = ompix_gatherv_init_f -#pragma weak pmpix_gatherv_init = ompix_gatherv_init_f -#pragma weak pmpix_gatherv_init_ = ompix_gatherv_init_f -#pragma weak pmpix_gatherv_init__ = ompix_gatherv_init_f +#pragma weak PMPI_GATHERV_INIT = ompi_gatherv_init_f +#pragma weak pmpi_gatherv_init = ompi_gatherv_init_f +#pragma weak pmpi_gatherv_init_ = ompi_gatherv_init_f +#pragma weak pmpi_gatherv_init__ = ompi_gatherv_init_f -#pragma weak PMPIX_Gatherv_init_f = ompix_gatherv_init_f -#pragma weak PMPIX_Gatherv_init_f08 = ompix_gatherv_init_f +#pragma weak PMPI_Gatherv_init_f = ompi_gatherv_init_f +#pragma weak PMPI_Gatherv_init_f08 = ompi_gatherv_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_GATHERV_INIT, - pmpix_gatherv_init, - pmpix_gatherv_init_, - pmpix_gatherv_init__, - pompix_gatherv_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_GATHERV_INIT, + pmpi_gatherv_init, + pmpi_gatherv_init_, + pmpi_gatherv_init__, + pompi_gatherv_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_GATHERV_INIT = ompix_gatherv_init_f -#pragma weak mpix_gatherv_init = ompix_gatherv_init_f -#pragma weak mpix_gatherv_init_ = ompix_gatherv_init_f -#pragma weak mpix_gatherv_init__ = ompix_gatherv_init_f +#pragma weak MPI_GATHERV_INIT = ompi_gatherv_init_f +#pragma weak mpi_gatherv_init = ompi_gatherv_init_f +#pragma weak mpi_gatherv_init_ = ompi_gatherv_init_f +#pragma weak mpi_gatherv_init__ = ompi_gatherv_init_f -#pragma weak MPIX_Gatherv_init_f = ompix_gatherv_init_f -#pragma weak MPIX_Gatherv_init_f08 = ompix_gatherv_init_f +#pragma weak MPI_Gatherv_init_f = ompi_gatherv_init_f +#pragma weak MPI_Gatherv_init_f08 = ompi_gatherv_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_GATHERV_INIT, - mpix_gatherv_init, - mpix_gatherv_init_, - mpix_gatherv_init__, - ompix_gatherv_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_GATHERV_INIT, + mpi_gatherv_init, + mpi_gatherv_init_, + mpi_gatherv_init__, + ompi_gatherv_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request,MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, info, request, ierr) ) #else -#define ompix_gatherv_init_f pompix_gatherv_init_f +#define ompi_gatherv_init_f pompi_gatherv_init_f #endif #endif -void ompix_gatherv_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, - MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, - MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_gatherv_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, + char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, + MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, + MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; @@ -94,13 +93,13 @@ void ompix_gatherv_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Gatherv_init(sendbuf, OMPI_FINT_2_INT(*sendcount), - c_sendtype, recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(displs), - c_recvtype, - OMPI_FINT_2_INT(*root), - c_comm, c_info, &c_request); + c_ierr = PMPI_Gatherv_init(sendbuf, OMPI_FINT_2_INT(*sendcount), + c_sendtype, recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(displs), + c_recvtype, + OMPI_FINT_2_INT(*root), + c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/group_from_session_pset_f.c b/ompi/mpi/fortran/mpif-h/group_from_session_pset_f.c new file mode 100644 index 00000000000..3bbb0d65691 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/group_from_session_pset_f.c @@ -0,0 +1,108 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" +#include "ompi/instance/instance.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_GROUP_FROM_SESSION_PSET = ompi_group_from_session_pset_f +#pragma weak pmpi_group_from_session_pset = ompi_group_from_session_pset_f +#pragma weak pmpi_group_from_session_pset_ = ompi_group_from_session_pset_f +#pragma weak pmpi_group_from_session_pset__ = ompi_group_from_session_pset_f + +#pragma weak PMPI_Group_from_session_pset_f = ompi_group_from_session_pset_f +#pragma weak PMPI_Group_from_session_pset_f08 = ompi_group_from_session_pset_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_FROM_SESSION_PSET, + pmpi_group_from_session_pset, + pmpi_group_from_session_pset_, + pmpi_group_from_session_pset__, + pmpi_group_from_session_pset_f, + (MPI_Fint *session, char *pset_name, MPI_Fint *newgroup, MPI_Fint *ierr), + (session, pset_name, newgroup, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_GROUP_FROM_SESSION_PSET = ompi_group_from_session_pset_f +#pragma weak mpi_group_from_session_pset = ompi_group_from_session_pset_f +#pragma weak mpi_group_from_session_pset_ = ompi_group_from_session_pset_f +#pragma weak mpi_group_from_session_pset__ = ompi_group_from_session_pset_f + +#pragma weak MPI_Group_from_session_pset_f = ompi_group_from_session_pset_f +#pragma weak MPI_Group_from_session_pset_f08 = ompi_group_from_session_pset_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_FROM_SESSION_PSET, + mpi_group_from_session_pset, + mpi_group_from_session_pset_, + mpi_group_from_session_pset__, + ompi_group_from_session_pset_f, + (MPI_Fint *session, char *pset_name, MPI_Fint *newgroup, MPI_Fint *ierr), + (session, pset_name, newgroup, ierr) ) +#else +#define ompi_group_from_session_pset_f pompi_group_from_session_pset_f +#endif +#endif + +void ompi_group_from_session_pset_f(MPI_Fint *session,char *pset_name, MPI_Fint *newgroup, MPI_Fint *ierr, int name_len) +{ + int c_ierr, ret; + MPI_Session c_session; + char *c_name; + MPI_Group c_newgroup; + + c_session = PMPI_Session_f2c(*session); + + /* Convert the fortran string */ + + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(pset_name, name_len, + &c_name))) { + c_ierr = OMPI_ERRHANDLER_INVOKE((ompi_instance_t *)c_session, ret, + "MPI_GROUP_FROM_SESSION_PSET"); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + return; + } + + c_ierr = PMPI_Group_from_session_pset(c_session, c_name, &c_newgroup); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *newgroup = PMPI_Group_c2f (c_newgroup); + } + + /* Free the C name */ + + free(c_name); +} + + diff --git a/ompi/mpi/fortran/mpif-h/improbe_f.c b/ompi/mpi/fortran/mpif-h/improbe_f.c index 936cc4e399c..af5fa2b01fe 100644 --- a/ompi/mpi/fortran/mpif-h/improbe_f.c +++ b/ompi/mpi/fortran/mpif-h/improbe_f.c @@ -95,7 +95,7 @@ void ompi_improbe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, if (MPI_SUCCESS == c_ierr) { OMPI_SINGLE_INT_2_LOGICAL(flag); - if (OMPI_FORTRAN_VALUE_TRUE == *flag) { + if (1 == OMPI_LOGICAL_2_INT(*flag)) { OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) *message = PMPI_Message_c2f(c_message); } diff --git a/ompi/mpi/fortran/mpif-h/info_get_string_f.c b/ompi/mpi/fortran/mpif-h/info_get_string_f.c new file mode 100644 index 00000000000..76919ba3d85 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/info_get_string_f.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/constants.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/info/info.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_INFO_GET_STRING = ompi_info_get_string_f +#pragma weak pmpi_info_get_string = ompi_info_get_string_f +#pragma weak pmpi_info_get_string_ = ompi_info_get_string_f +#pragma weak pmpi_info_get_string__ = ompi_info_get_string_f + +#pragma weak PMPI_Info_get_string_f = ompi_info_get_string_f +#pragma weak PMPI_Info_get_string_f08 = ompi_info_get_string_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_STRING, + pmpi_info_get_string, + pmpi_info_get_string_, + pmpi_info_get_string__, + pompi_info_get_string_f, + (MPI_Fint *info, char *key, MPI_Fint *buflen, char *value, ompi_fortran_logical_t *flag, MPI_Fint *ierr, int key_len, int value_len), + (info, key, buflen, value, flag, ierr, key_len, value_len) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_INFO_GET_STRING = ompi_info_get_string_f +#pragma weak mpi_info_get_string = ompi_info_get_string_f +#pragma weak mpi_info_get_string_ = ompi_info_get_string_f +#pragma weak mpi_info_get_string__ = ompi_info_get_string_f + +#pragma weak MPI_Info_get_string_f = ompi_info_get_string_f +#pragma weak MPI_Info_get_string_f08 = ompi_info_get_string_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_INFO_GET_STRING, + mpi_info_get_string, + mpi_info_get_string_, + mpi_info_get_string__, + ompi_info_get_string_f, + (MPI_Fint *info, char *key, MPI_Fint *buflen, char *value, ompi_fortran_logical_t *flag, MPI_Fint *ierr, int key_len, int value_len), + (info, key, buflen, value, flag, ierr, key_len, value_len) ) +#else +#define ompi_info_get_string_f pompi_info_get_string_f +#endif +#endif + + +static const char FUNC_NAME[] = "MPI_INFO_GET_STRING"; + +/* Note that the key_len and value_len parameters are silently added + by the Fortran compiler, and will be filled in with the actual + length of the character array from the caller. Hence, it's the max + length of the string that we can use. */ + +void ompi_info_get_string_f(MPI_Fint *info, char *key, MPI_Fint *buflen, + char *value, ompi_fortran_logical_t *flag, MPI_Fint *ierr, + int key_len, int value_len) +{ + int c_ierr, ret; + MPI_Info c_info; + char *c_key = NULL; + OMPI_SINGLE_NAME_DECL(buflen); + OMPI_LOGICAL_NAME_DECL(flag); + opal_cstring_t *info_str; + + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(key, key_len, &c_key))) { + c_ierr = OMPI_ERRHANDLER_NOHANDLE_INVOKE(ret, FUNC_NAME); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + return; + } + + c_info = PMPI_Info_f2c(*info); + + if (0 == *buflen) { + c_ierr = ompi_info_get_valuelen(c_info, c_key, + OMPI_SINGLE_NAME_CONVERT(buflen), + OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + OMPI_SINGLE_INT_2_FINT(buflen); + OMPI_SINGLE_INT_2_LOGICAL(flag); + } + } else { + c_ierr = ompi_info_get(c_info, c_key, &info_str, + OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + if (NULL != ierr) { + *ierr = OMPI_INT_2_FINT(c_ierr); + } + + + if (MPI_SUCCESS == c_ierr) { + OMPI_SINGLE_INT_2_LOGICAL(flag); + + /* If we found the info key, copy the value back to the + Fortran string (note: all Fortran compilers have FALSE == + 0, so just check for any nonzero value, because not all + Fortran compilers have TRUE == 1). Note: use the full + length of the Fortran string, which means adding one to the 3rd arg + to ompi_fortran_string_c2f */ + if (*flag) { + if (OMPI_SUCCESS != + (ret = ompi_fortran_string_c2f(info_str->string, value, value_len + 1))) { + c_ierr = OMPI_ERRHANDLER_NOHANDLE_INVOKE(ret, FUNC_NAME); + if (NULL != ierr) { + *ierr = OMPI_INT_2_FINT(c_ierr); + } + } + *buflen = info_str->length; + OBJ_RELEASE(info_str); + } + } + } + + if (NULL != c_key) { + free(c_key); + } +} diff --git a/ompi/mpi/fortran/mpif-h/init_f.c b/ompi/mpi/fortran/mpif-h/init_f.c index 087d967f534..8ed309f6827 100644 --- a/ompi/mpi/fortran/mpif-h/init_f.c +++ b/ompi/mpi/fortran/mpif-h/init_f.c @@ -22,17 +22,6 @@ #include "ompi_config.h" -#if (OPAL_HAVE_WEAK_SYMBOLS || ! OMPI_BUILD_MPI_PROFILING) -#if OPAL_CC_USE_PRAGMA_IDENT -#pragma ident OMPI_IDENT_STRING -#elif OPAL_CC_USE_IDENT -#ident OMPI_IDENT_STRING -#else -const char ident[] = OMPI_IDENT_STRING; -#endif -#endif - - #include "ompi/mpi/fortran/mpif-h/bindings.h" #if OMPI_BUILD_MPI_PROFILING diff --git a/ompi/mpi/fortran/mpif-h/intercomm_create_from_groups_f.c b/ompi/mpi/fortran/mpif-h/intercomm_create_from_groups_f.c new file mode 100644 index 00000000000..61e129ff25e --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/intercomm_create_from_groups_f.c @@ -0,0 +1,123 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" +#include "ompi/instance/instance.h" +#include "ompi/group/group.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_INTERCOMM_CREATE_FROM_GROUPS = ompi_intercomm_create_from_groups_f +#pragma weak pmpi_intercomm_create_from_groups = ompi_intercomm_create_from_groups_f +#pragma weak pmpi_intercomm_create_from_groups_ = ompi_intercomm_create_from_groups_f +#pragma weak pmpi_intercomm_create_from_groups__ = ompi_intercomm_create_from_groups_f + +#pragma weak PMPI_Intercomm_create_from_groups_f = ompi_intercomm_create_from_groups_f +#pragma weak PMPI_Intercomm_create_from_groups_f08 = ompi_intercomm_create_from_groups_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_INTERCOMM_CREATE_FROM_GROUPS, + pmpi_intercomm_create_from_groups, + pmpi_intercomm_create_from_groups_, + pmpi_intercomm_create_from_groups__, + pmpi_intercomm_create_from_groups_f, + (MPI_Fint *local_group, MPI_Fint *local_leader, MPI_Fint *remote_group, + MPI_Fint *remote_leader, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, + MPI_Fint *internewcom, MPI_Fint *ierr, int name_len), + (local_group, local_leader, remote_group, + remote_leader, stringtag, info, errhandler, internewcomm, ierr, name_len) ) + +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_INTERCOMM_CREATE_FROM_GROUPS = ompi_intercomm_create_from_groups_f +#pragma weak mpi_intercomm_create_from_groups = ompi_intercomm_create_from_groups_f +#pragma weak mpi_intercomm_create_from_groups_ = ompi_intercomm_create_from_groups_f +#pragma weak mpi_intercomm_create_from_groups__ = ompi_intercomm_create_from_groups_f + +#pragma weak MPI_Intercomm_create_from_groups_f = ompi_intercomm_create_from_groups_f +#pragma weak MPI_Intercomm_create_from_groups_f08 = ompi_intercomm_create_from_groups_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_INTERCOMM_CREATE_FROM_GROUPS, + mpi_intercomm_create_from_groups, + mpi_intercomm_create_from_groups_, + mpi_intercomm_create_from_groups__, + ompi_intercomm_create_from_groups_f, + (MPI_Fint *local_group, MPI_Fint *local_leader, MPI_Fint *remote_group, + MPI_Fint *remote_leader, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, + MPI_Fint *internewcom, MPI_Fint *ierr, int name_len), + (local_group, local_leader, remote_group, + remote_leader, stringtag, info, errhandler, internewcomm, ierr, name_len) ) +#else +#define ompi_intercomm_create_from_groups_f pompi_intercomm_create_from_groups_f +#endif +#endif + +void ompi_intercomm_create_from_groups_f(MPI_Fint *local_group, MPI_Fint *local_leader, MPI_Fint *remote_group, + MPI_Fint *remote_leader, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, + MPI_Fint *internewcomm, MPI_Fint *ierr, int name_len) +{ + int c_ierr, ret; + MPI_Group c_lgroup, c_rgroup; + char *c_tag; + MPI_Comm c_intercomm; + MPI_Info c_info; + MPI_Errhandler c_err; + + c_lgroup = PMPI_Group_f2c(*local_group); + c_rgroup = PMPI_Group_f2c(*remote_group); + c_info = PMPI_Info_f2c(*info); + c_err = PMPI_Errhandler_f2c(*errhandler); + + /* Convert the fortran string */ + + /* Convert the fortran string */ + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(stringtag, name_len, + &c_tag))) { + c_ierr = OMPI_ERRHANDLER_INVOKE(ompi_group_get_instance(c_lgroup), ret, "MPI_INTERCOMM_CREATE_FROM_GROUPS"); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + return; + } + + c_ierr = PMPI_Intercomm_create_from_groups(c_lgroup, OMPI_FINT_2_INT(*local_leader), + c_rgroup, OMPI_FINT_2_INT(*remote_leader), + c_tag, c_info, c_err, &c_intercomm); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *internewcomm = PMPI_Comm_c2f (c_intercomm); + } + + /* Free the C tag */ + + free(c_tag); +} diff --git a/ompi/mpi/fortran/mpif-h/isendrecv_f.c b/ompi/mpi/fortran/mpif-h/isendrecv_f.c new file mode 100644 index 00000000000..cf010ad0803 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/isendrecv_f.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_ISENDRECV = ompi_isendrecv_f +#pragma weak pmpi_isendrecv = ompi_isendrecv_f +#pragma weak pmpi_isendrecv_ = ompi_isendrecv_f +#pragma weak pmpi_isendrecv__ = ompi_isendrecv_f + +#pragma weak PMPI_Isendrecv_f = ompi_isendrecv_f +#pragma weak PMPI_Isendrecv_f08 = ompi_isendrecv_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_ISENDRECV, + pmpi_isendrecv, + pmpi_isendrecv_, + pmpi_isendrecv__, + pompi_isendrecv_f, + (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Fint *dest, MPI_Fint *sendtag, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), + (sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, request, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_ISENDRECV = ompi_isendrecv_f +#pragma weak mpi_isendrecv = ompi_isendrecv_f +#pragma weak mpi_isendrecv_ = ompi_isendrecv_f +#pragma weak mpi_isendrecv__ = ompi_isendrecv_f + +#pragma weak MPI_Isendrecv_f = ompi_isendrecv_f +#pragma weak MPI_Isendrecv_f08 = ompi_isendrecv_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_ISENDRECV, + mpi_isendrecv, + mpi_isendrecv_, + mpi_isendrecv__, + ompi_isendrecv_f, + (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Fint *dest, MPI_Fint *sendtag, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), + (sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, request, ierr) ) +#else +#define ompi_isendrecv_f pompi_isendrecv_f +#endif +#endif + + +void ompi_isendrecv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, + MPI_Fint *dest, MPI_Fint *sendtag, char *recvbuf, + MPI_Fint *recvcount, MPI_Fint *recvtype, + MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, + MPI_Fint *request, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Comm c_comm; + MPI_Datatype c_sendtype = PMPI_Type_f2c(*sendtype); + MPI_Datatype c_recvtype = PMPI_Type_f2c(*recvtype); + MPI_Request c_req; + + c_comm = PMPI_Comm_f2c (*comm); + + c_ierr = PMPI_Isendrecv(OMPI_F2C_BOTTOM(sendbuf), OMPI_FINT_2_INT(*sendcount), + c_sendtype, + OMPI_FINT_2_INT(*dest), + OMPI_FINT_2_INT(*sendtag), + OMPI_F2C_BOTTOM(recvbuf), OMPI_FINT_2_INT(*recvcount), + c_recvtype, OMPI_FINT_2_INT(*source), + OMPI_FINT_2_INT(*recvtag), + c_comm, &c_req); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *request = PMPI_Request_c2f(c_req); + } +} diff --git a/ompi/mpi/fortran/mpif-h/isendrecv_replace_f.c b/ompi/mpi/fortran/mpif-h/isendrecv_replace_f.c new file mode 100644 index 00000000000..37d69588d2b --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/isendrecv_replace_f.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_ISENDRECV_REPLACE = ompi_isendrecv_replace_f +#pragma weak pmpi_isendrecv_replace = ompi_isendrecv_replace_f +#pragma weak pmpi_isendrecv_replace_ = ompi_isendrecv_replace_f +#pragma weak pmpi_isendrecv_replace__ = ompi_isendrecv_replace_f + +#pragma weak PMPI_Isendrecv_replace_f = ompi_isendrecv_replace_f +#pragma weak PMPI_Isendrecv_replace_f08 = ompi_isendrecv_replace_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_ISENDRECV_REPLACE, + pmpi_isendrecv_replace, + pmpi_isendrecv_replace_, + pmpi_isendrecv_replace__, + pompi_isendrecv_replace_f, + (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *sendtag, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), + (buf, count, datatype, dest, sendtag, source, recvtag, comm, request, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_ISENDRECV_REPLACE = ompi_isendrecv_replace_f +#pragma weak mpi_isendrecv_replace = ompi_isendrecv_replace_f +#pragma weak mpi_isendrecv_replace_ = ompi_isendrecv_replace_f +#pragma weak mpi_isendrecv_replace__ = ompi_isendrecv_replace_f + +#pragma weak MPI_Isendrecv_replace_f = ompi_isendrecv_replace_f +#pragma weak MPI_Isendrecv_replace_f08 = ompi_isendrecv_replace_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_ISENDRECV_REPLACE, + mpi_isendrecv_replace, + mpi_isendrecv_replace_, + mpi_isendrecv_replace__, + ompi_isendrecv_replace_f, + (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *sendtag, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr), + (buf, count, datatype, dest, sendtag, source, recvtag, comm, status, ierr) ) +#else +#define ompi_isendrecv_replace_f pompi_isendrecv_replace_f +#endif +#endif + + +void ompi_isendrecv_replace_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, + MPI_Fint *dest, MPI_Fint *sendtag, + MPI_Fint *source, MPI_Fint *recvtag, + MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); + MPI_Comm c_comm; + MPI_Request c_req; + + c_comm = PMPI_Comm_f2c (*comm); + + c_ierr = PMPI_Isendrecv_replace(OMPI_F2C_BOTTOM(buf), + OMPI_FINT_2_INT(*count), + c_type, + OMPI_FINT_2_INT(*dest), + OMPI_FINT_2_INT(*sendtag), + OMPI_FINT_2_INT(*source), + OMPI_FINT_2_INT(*recvtag), + c_comm, &c_req); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *request = PMPI_Request_c2f(c_req); + } + +} diff --git a/ompi/mpi/fortran/mpif-h/keyval_create_f.c b/ompi/mpi/fortran/mpif-h/keyval_create_f.c index 039c777129c..4ad5c51ca67 100644 --- a/ompi/mpi/fortran/mpif-h/keyval_create_f.c +++ b/ompi/mpi/fortran/mpif-h/keyval_create_f.c @@ -34,13 +34,12 @@ #pragma weak PMPI_Keyval_create_f = ompi_keyval_create_f #pragma weak PMPI_Keyval_create_f08 = ompi_keyval_create_f #else -OMPI_GENERATE_F77_BINDINGS (PMPI_KEYVAL_CREATE, - pmpi_keyval_create, - pmpi_keyval_create_, - pmpi_keyval_create__, - pompi_keyval_create_f, - (ompi_fint_copy_attr_function* copy_fn, ompi_fint_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr), - (copy_fn, delete_fn, keyval, extra_state, ierr) ) +OMPI_GENERATE_F77_BINDINGS(PMPI_KEYVAL_CREATE, pmpi_keyval_create, pmpi_keyval_create_, + pmpi_keyval_create__, pompi_keyval_create_f, + (ompi_fint_copy_attr_function copy_fn, + ompi_fint_delete_attr_function delete_fn, MPI_Fint *keyval, + MPI_Fint *extra_state, MPI_Fint *ierr), + (copy_fn, delete_fn, keyval, extra_state, ierr)) #endif #endif @@ -54,13 +53,12 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_KEYVAL_CREATE, #pragma weak MPI_Keyval_create_f08 = ompi_keyval_create_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPI_KEYVAL_CREATE, - mpi_keyval_create, - mpi_keyval_create_, - mpi_keyval_create__, - ompi_keyval_create_f, - (ompi_fint_copy_attr_function* copy_fn, ompi_fint_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr), - (copy_fn, delete_fn, keyval, extra_state, ierr) ) +OMPI_GENERATE_F77_BINDINGS(MPI_KEYVAL_CREATE, mpi_keyval_create, mpi_keyval_create_, + mpi_keyval_create__, ompi_keyval_create_f, + (ompi_fint_copy_attr_function copy_fn, + ompi_fint_delete_attr_function delete_fn, MPI_Fint *keyval, + MPI_Fint *extra_state, MPI_Fint *ierr), + (copy_fn, delete_fn, keyval, extra_state, ierr)) #else #define ompi_keyval_create_f pompi_keyval_create_f #endif @@ -68,10 +66,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_KEYVAL_CREATE, static const char FUNC_NAME[] = "MPI_keyval_create_f"; -void ompi_keyval_create_f(ompi_fint_copy_attr_function* copy_attr_fn, - ompi_fint_delete_attr_function* delete_attr_fn, - MPI_Fint *keyval, MPI_Fint *extra_state, - MPI_Fint *ierr) +void ompi_keyval_create_f(ompi_fint_copy_attr_function copy_attr_fn, + ompi_fint_delete_attr_function delete_attr_fn, MPI_Fint *keyval, + MPI_Fint *extra_state, MPI_Fint *ierr) { int ret, c_ierr; OMPI_SINGLE_NAME_DECL(keyval); diff --git a/ompi/mpiext/pcollreq/mpif-h/neighbor_alltoall_init_f.c b/ompi/mpi/fortran/mpif-h/neighbor_allgather_init_f.c similarity index 58% rename from ompi/mpiext/pcollreq/mpif-h/neighbor_alltoall_init_f.c rename to ompi/mpi/fortran/mpif-h/neighbor_allgather_init_f.c index 561d96a55ea..ab6f9df6605 100644 --- a/ompi/mpiext/pcollreq/mpif-h/neighbor_alltoall_init_f.c +++ b/ompi/mpi/fortran/mpif-h/neighbor_allgather_init_f.c @@ -13,9 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,56 +26,55 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_NEIGHBOR_ALLTOALL_INIT = ompix_neighbor_alltoall_init_f -#pragma weak pmpix_neighbor_alltoall_init = ompix_neighbor_alltoall_init_f -#pragma weak pmpix_neighbor_alltoall_init_ = ompix_neighbor_alltoall_init_f -#pragma weak pmpix_neighbor_alltoall_init__ = ompix_neighbor_alltoall_init_f +#pragma weak PMPI_NEIGHBOR_ALLGATHER_INIT = ompi_neighbor_allgather_init_f +#pragma weak pmpi_neighbor_allgather_init = ompi_neighbor_allgather_init_f +#pragma weak pmpi_neighbor_allgather_init_ = ompi_neighbor_allgather_init_f +#pragma weak pmpi_neighbor_allgather_init__ = ompi_neighbor_allgather_init_f -#pragma weak PMPIX_Neighbor_alltoall_init_f = ompix_neighbor_alltoall_init_f -#pragma weak PMPIX_Neighbor_alltoall_init_f08 = ompix_neighbor_alltoall_init_f +#pragma weak PMPI_Neighbor_allgather_init_f = ompi_neighbor_allgather_init_f +#pragma weak PMPI_Neighbor_allgather_init_f08 = ompi_neighbor_allgather_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_NEIGHBOR_ALLTOALL_INIT, - pmpix_neighbor_alltoall_init, - pmpix_neighbor_alltoall_init_, - pmpix_neighbor_alltoall_init__, - pompix_neighbor_alltoall_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLGATHER_INIT, + pmpi_neighbor_allgather_init, + pmpi_neighbor_allgather_init_, + pmpi_neighbor_allgather_init__, + pompi_neighbor_allgather_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_NEIGHBOR_ALLTOALL_INIT = ompix_neighbor_alltoall_init_f -#pragma weak mpix_neighbor_alltoall_init = ompix_neighbor_alltoall_init_f -#pragma weak mpix_neighbor_alltoall_init_ = ompix_neighbor_alltoall_init_f -#pragma weak mpix_neighbor_alltoall_init__ = ompix_neighbor_alltoall_init_f +#pragma weak MPI_NEIGHBOR_ALLGATHER_INIT = ompi_neighbor_allgather_init_f +#pragma weak mpi_neighbor_allgather_init = ompi_neighbor_allgather_init_f +#pragma weak mpi_neighbor_allgather_init_ = ompi_neighbor_allgather_init_f +#pragma weak mpi_neighbor_allgather_init__ = ompi_neighbor_allgather_init_f -#pragma weak MPIX_Neighbor_alltoall_init_f = ompix_neighbor_alltoall_init_f -#pragma weak MPIX_Neighbor_alltoall_init_f08 = ompix_neighbor_alltoall_init_f +#pragma weak MPI_Neighbor_allgather_init_f = ompi_neighbor_allgather_init_f +#pragma weak MPI_Neighbor_allgather_init_f08 = ompi_neighbor_allgather_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_NEIGHBOR_ALLTOALL_INIT, - mpix_neighbor_alltoall_init, - mpix_neighbor_alltoall_init_, - mpix_neighbor_alltoall_init__, - ompix_neighbor_alltoall_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLGATHER_INIT, + mpi_neighbor_allgather_init, + mpi_neighbor_allgather_init_, + mpi_neighbor_allgather_init__, + ompi_neighbor_allgather_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) #else -#define ompix_neighbor_alltoall_init_f pompix_neighbor_alltoall_init_f +#define ompi_neighbor_allgather_init_f pompi_neighbor_allgather_init_f #endif #endif -void ompix_neighbor_alltoall_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, +void ompi_neighbor_allgather_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { - int c_ierr; + int ierr_c; MPI_Comm c_comm; MPI_Info c_info; MPI_Request c_req; @@ -91,13 +89,14 @@ void ompix_neighbor_alltoall_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Neighbor_alltoall_init(sendbuf, + ierr_c = PMPI_Neighbor_allgather_init(sendbuf, OMPI_FINT_2_INT(*sendcount), c_sendtype, recvbuf, OMPI_FINT_2_INT(*recvcount), c_recvtype, c_comm, c_info, &c_req); - if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_req); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); + + if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_req); } diff --git a/ompi/mpiext/pcollreq/mpif-h/neighbor_allgatherv_init_f.c b/ompi/mpi/fortran/mpif-h/neighbor_allgatherv_init_f.c similarity index 53% rename from ompi/mpiext/pcollreq/mpif-h/neighbor_allgatherv_init_f.c rename to ompi/mpi/fortran/mpif-h/neighbor_allgatherv_init_f.c index dae12203180..16609f023e7 100644 --- a/ompi/mpiext/pcollreq/mpif-h/neighbor_allgatherv_init_f.c +++ b/ompi/mpi/fortran/mpif-h/neighbor_allgatherv_init_f.c @@ -13,8 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,55 +26,54 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_NEIGHBOR_ALLGATHERV_INIT = ompix_neighbor_allgatherv_init_f -#pragma weak pmpix_neighbor_allgatherv_init = ompix_neighbor_allgatherv_init_f -#pragma weak pmpix_neighbor_allgatherv_init_ = ompix_neighbor_allgatherv_init_f -#pragma weak pmpix_neighbor_allgatherv_init__ = ompix_neighbor_allgatherv_init_f +#pragma weak PMPI_NEIGHBOR_ALLGATHERV_INIT = ompi_neighbor_allgatherv_init_f +#pragma weak pmpi_neighbor_allgatherv_init = ompi_neighbor_allgatherv_init_f +#pragma weak pmpi_neighbor_allgatherv_init_ = ompi_neighbor_allgatherv_init_f +#pragma weak pmpi_neighbor_allgatherv_init__ = ompi_neighbor_allgatherv_init_f -#pragma weak PMPIX_Neighbor_allgatherv_init_f = ompix_neighbor_allgatherv_init_f -#pragma weak PMPIX_Neighbor_allgatherv_init_f08 = ompix_neighbor_allgatherv_init_f +#pragma weak PMPI_Neighbor_allgatherv_init_f = ompi_neighbor_allgatherv_init_f +#pragma weak PMPI_Neighbor_allgatherv_init_f08 = ompi_neighbor_allgatherv_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_NEIGHBOR_ALLGATHERV_INIT, - pmpix_neighbor_allgatherv_init, - pmpix_neighbor_allgatherv_init_, - pmpix_neighbor_allgatherv_init__, - pompix_neighbor_allgatherv_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLGATHERV_INIT, + pmpi_neighbor_allgatherv_init, + pmpi_neighbor_allgatherv_init_, + pmpi_neighbor_allgatherv_init__, + pompi_neighbor_allgatherv_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_NEIGHBOR_ALLGATHERV_INIT = ompix_neighbor_allgatherv_init_f -#pragma weak mpix_neighbor_allgatherv_init = ompix_neighbor_allgatherv_init_f -#pragma weak mpix_neighbor_allgatherv_init_ = ompix_neighbor_allgatherv_init_f -#pragma weak mpix_neighbor_allgatherv_init__ = ompix_neighbor_allgatherv_init_f +#pragma weak MPI_NEIGHBOR_ALLGATHERV_INIT = ompi_neighbor_allgatherv_init_f +#pragma weak mpi_neighbor_allgatherv_init = ompi_neighbor_allgatherv_init_f +#pragma weak mpi_neighbor_allgatherv_init_ = ompi_neighbor_allgatherv_init_f +#pragma weak mpi_neighbor_allgatherv_init__ = ompi_neighbor_allgatherv_init_f -#pragma weak MPIX_Neighbor_allgatherv_init_f = ompix_neighbor_allgatherv_init_f -#pragma weak MPIX_Neighbor_allgatherv_init_f08 = ompix_neighbor_allgatherv_init_f +#pragma weak MPI_Neighbor_allgatherv_init_f = ompi_neighbor_allgatherv_init_f +#pragma weak MPI_Neighbor_allgatherv_init_f08 = ompi_neighbor_allgatherv_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_NEIGHBOR_ALLGATHERV_INIT, - mpix_neighbor_allgatherv_init, - mpix_neighbor_allgatherv_init_, - mpix_neighbor_allgatherv_init__, - ompix_neighbor_allgatherv_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLGATHERV_INIT, + mpi_neighbor_allgatherv_init, + mpi_neighbor_allgatherv_init_, + mpi_neighbor_allgatherv_init__, + ompi_neighbor_allgatherv_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, info, request, ierr) ) #else -#define ompix_neighbor_allgatherv_init_f pompix_neighbor_allgatherv_init_f +#define ompi_neighbor_allgatherv_init_f pompi_neighbor_allgatherv_init_f #endif #endif -void ompix_neighbor_allgatherv_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, - MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, - MPI_Fint *ierr) +void ompi_neighbor_allgatherv_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, + char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, + MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, + MPI_Fint *ierr) { MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; @@ -97,13 +96,13 @@ void ompix_neighbor_allgatherv_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fi sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = PMPIX_Neighbor_allgatherv_init(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(displs), - c_recvtype, c_comm, c_info, &c_request); + ierr_c = PMPI_Neighbor_allgatherv_init(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(displs), + c_recvtype, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_request); diff --git a/ompi/mpi/fortran/mpif-h/neighbor_alltoall_init_f.c b/ompi/mpi/fortran/mpif-h/neighbor_alltoall_init_f.c new file mode 100644 index 00000000000..509cba401ca --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/neighbor_alltoall_init_f.c @@ -0,0 +1,102 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_NEIGHBOR_ALLTOALL_INIT = ompi_neighbor_alltoall_init_f +#pragma weak pmpi_neighbor_alltoall_init = ompi_neighbor_alltoall_init_f +#pragma weak pmpi_neighbor_alltoall_init_ = ompi_neighbor_alltoall_init_f +#pragma weak pmpi_neighbor_alltoall_init__ = ompi_neighbor_alltoall_init_f + +#pragma weak PMPI_Neighbor_alltoall_init_f = ompi_neighbor_alltoall_init_f +#pragma weak PMPI_Neighbor_alltoall_init_f08 = ompi_neighbor_alltoall_init_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALL_INIT, + pmpi_neighbor_alltoall_init, + pmpi_neighbor_alltoall_init_, + pmpi_neighbor_alltoall_init__, + pompi_neighbor_alltoall_init_f, + (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), + (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_NEIGHBOR_ALLTOALL_INIT = ompi_neighbor_alltoall_init_f +#pragma weak mpi_neighbor_alltoall_init = ompi_neighbor_alltoall_init_f +#pragma weak mpi_neighbor_alltoall_init_ = ompi_neighbor_alltoall_init_f +#pragma weak mpi_neighbor_alltoall_init__ = ompi_neighbor_alltoall_init_f + +#pragma weak MPI_Neighbor_alltoall_init_f = ompi_neighbor_alltoall_init_f +#pragma weak MPI_Neighbor_alltoall_init_f08 = ompi_neighbor_alltoall_init_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLTOALL_INIT, + mpi_neighbor_alltoall_init, + mpi_neighbor_alltoall_init_, + mpi_neighbor_alltoall_init__, + ompi_neighbor_alltoall_init_f, + (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), + (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) +#else +#define ompi_neighbor_alltoall_init_f pompi_neighbor_alltoall_init_f +#endif +#endif + + +void ompi_neighbor_alltoall_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, + char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, + MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Comm c_comm; + MPI_Info c_info; + MPI_Request c_req; + MPI_Datatype c_sendtype, c_recvtype; + + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); + c_info = PMPI_Info_f2c(*info); + + sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); + sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); + recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); + + c_ierr = PMPI_Neighbor_alltoall_init(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_FINT_2_INT(*recvcount), + c_recvtype, c_comm, c_info, &c_req); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_req); +} diff --git a/ompi/mpiext/pcollreq/mpif-h/neighbor_alltoallv_init_f.c b/ompi/mpi/fortran/mpif-h/neighbor_alltoallv_init_f.c similarity index 54% rename from ompi/mpiext/pcollreq/mpif-h/neighbor_alltoallv_init_f.c rename to ompi/mpi/fortran/mpif-h/neighbor_alltoallv_init_f.c index b11679ac411..d1657b2a4bd 100644 --- a/ompi/mpiext/pcollreq/mpif-h/neighbor_alltoallv_init_f.c +++ b/ompi/mpi/fortran/mpif-h/neighbor_alltoallv_init_f.c @@ -13,8 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,55 +26,54 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_NEIGHBOR_ALLTOALLV_INIT = ompix_neighbor_alltoallv_init_f -#pragma weak pmpix_neighbor_alltoallv_init = ompix_neighbor_alltoallv_init_f -#pragma weak pmpix_neighbor_alltoallv_init_ = ompix_neighbor_alltoallv_init_f -#pragma weak pmpix_neighbor_alltoallv_init__ = ompix_neighbor_alltoallv_init_f +#pragma weak PMPI_NEIGHBOR_ALLTOALLV_INIT = ompi_neighbor_alltoallv_init_f +#pragma weak pmpi_neighbor_alltoallv_init = ompi_neighbor_alltoallv_init_f +#pragma weak pmpi_neighbor_alltoallv_init_ = ompi_neighbor_alltoallv_init_f +#pragma weak pmpi_neighbor_alltoallv_init__ = ompi_neighbor_alltoallv_init_f -#pragma weak PMPIX_Neighbor_alltoallv_init_f = ompix_neighbor_alltoallv_init_f -#pragma weak PMPIX_Neighbor_alltoallv_init_f08 = ompix_neighbor_alltoallv_init_f +#pragma weak PMPI_Neighbor_alltoallv_init_f = ompi_neighbor_alltoallv_init_f +#pragma weak PMPI_Neighbor_alltoallv_init_f08 = ompi_neighbor_alltoallv_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_NEIGHBOR_ALLTOALLV_INIT, - pmpix_neighbor_alltoallv_init, - pmpix_neighbor_alltoallv_init_, - pmpix_neighbor_alltoallv_init__, - pompix_neighbor_alltoallv_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALLV_INIT, + pmpi_neighbor_alltoallv_init, + pmpi_neighbor_alltoallv_init_, + pmpi_neighbor_alltoallv_init__, + pompi_neighbor_alltoallv_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_NEIGHBOR_ALLTOALLV_INIT = ompix_neighbor_alltoallv_init_f -#pragma weak mpix_neighbor_alltoallv_init = ompix_neighbor_alltoallv_init_f -#pragma weak mpix_neighbor_alltoallv_init_ = ompix_neighbor_alltoallv_init_f -#pragma weak mpix_neighbor_alltoallv_init__ = ompix_neighbor_alltoallv_init_f +#pragma weak MPI_NEIGHBOR_ALLTOALLV_INIT = ompi_neighbor_alltoallv_init_f +#pragma weak mpi_neighbor_alltoallv_init = ompi_neighbor_alltoallv_init_f +#pragma weak mpi_neighbor_alltoallv_init_ = ompi_neighbor_alltoallv_init_f +#pragma weak mpi_neighbor_alltoallv_init__ = ompi_neighbor_alltoallv_init_f -#pragma weak MPIX_Neighbor_alltoallv_init_f = ompix_neighbor_alltoallv_init_f -#pragma weak MPIX_Neighbor_alltoallv_init_f08 = ompix_neighbor_alltoallv_init_f +#pragma weak MPI_Neighbor_alltoallv_init_f = ompi_neighbor_alltoallv_init_f +#pragma weak MPI_Neighbor_alltoallv_init_f08 = ompi_neighbor_alltoallv_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_NEIGHBOR_ALLTOALLV_INIT, - mpix_neighbor_alltoallv_init, - mpix_neighbor_alltoallv_init_, - mpix_neighbor_alltoallv_init__, - ompix_neighbor_alltoallv_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLTOALLV_INIT, + mpi_neighbor_alltoallv_init, + mpi_neighbor_alltoallv_init_, + mpi_neighbor_alltoallv_init__, + ompi_neighbor_alltoallv_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, info, request, ierr) ) #else -#define ompix_neighbor_alltoallv_init_f pompix_neighbor_alltoallv_init_f +#define ompi_neighbor_alltoallv_init_f pompi_neighbor_alltoallv_init_f #endif #endif -void ompix_neighbor_alltoallv_init_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, - MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, - MPI_Fint *rdispls, MPI_Fint *recvtype, - MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_neighbor_alltoallv_init_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, + MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, + MPI_Fint *rdispls, MPI_Fint *recvtype, + MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; @@ -101,14 +100,14 @@ void ompix_neighbor_alltoallv_init_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fi sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Neighbor_alltoallv_init(sendbuf, - OMPI_ARRAY_NAME_CONVERT(sendcounts), - OMPI_ARRAY_NAME_CONVERT(sdispls), - c_sendtype, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(rdispls), - c_recvtype, c_comm, c_info, &c_request); + c_ierr = PMPI_Neighbor_alltoallv_init(sendbuf, + OMPI_ARRAY_NAME_CONVERT(sendcounts), + OMPI_ARRAY_NAME_CONVERT(sdispls), + c_sendtype, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(rdispls), + c_recvtype, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); diff --git a/ompi/mpiext/pcollreq/mpif-h/neighbor_alltoallw_init_f.c b/ompi/mpi/fortran/mpif-h/neighbor_alltoallw_init_f.c similarity index 55% rename from ompi/mpiext/pcollreq/mpif-h/neighbor_alltoallw_init_f.c rename to ompi/mpi/fortran/mpif-h/neighbor_alltoallw_init_f.c index d7a0fbf6bf8..69c90953f0a 100644 --- a/ompi/mpiext/pcollreq/mpif-h/neighbor_alltoallw_init_f.c +++ b/ompi/mpi/fortran/mpif-h/neighbor_alltoallw_init_f.c @@ -13,8 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,56 +26,55 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_NEIGHBOR_ALLTOALLW_INIT = ompix_neighbor_alltoallw_init_f -#pragma weak pmpix_neighbor_alltoallw_init = ompix_neighbor_alltoallw_init_f -#pragma weak pmpix_neighbor_alltoallw_init_ = ompix_neighbor_alltoallw_init_f -#pragma weak pmpix_neighbor_alltoallw_init__ = ompix_neighbor_alltoallw_init_f +#pragma weak PMPI_NEIGHBOR_ALLTOALLW_INIT = ompi_neighbor_alltoallw_init_f +#pragma weak pmpi_neighbor_alltoallw_init = ompi_neighbor_alltoallw_init_f +#pragma weak pmpi_neighbor_alltoallw_init_ = ompi_neighbor_alltoallw_init_f +#pragma weak pmpi_neighbor_alltoallw_init__ = ompi_neighbor_alltoallw_init_f -#pragma weak PMPIX_Neighbor_alltoallw_init_f = ompix_neighbor_alltoallw_init_f -#pragma weak PMPIX_Neighbor_alltoallw_init_f08 = ompix_neighbor_alltoallw_init_f +#pragma weak PMPI_Neighbor_alltoallw_init_f = ompi_neighbor_alltoallw_init_f +#pragma weak PMPI_Neighbor_alltoallw_init_f08 = ompi_neighbor_alltoallw_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_NEIGHBOR_ALLTOALLW_INIT, - pmpix_neighbor_alltoallw_init, - pmpix_neighbor_alltoallw_init_, - pmpix_neighbor_alltoallw_init__, - pompix_neighbor_alltoallw_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALLW_INIT, + pmpi_neighbor_alltoallw_init, + pmpi_neighbor_alltoallw_init_, + pmpi_neighbor_alltoallw_init__, + pompi_neighbor_alltoallw_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Aint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_NEIGHBOR_ALLTOALLW_INIT = ompix_neighbor_alltoallw_init_f -#pragma weak mpix_neighbor_alltoallw_init = ompix_neighbor_alltoallw_init_f -#pragma weak mpix_neighbor_alltoallw_init_ = ompix_neighbor_alltoallw_init_f -#pragma weak mpix_neighbor_alltoallw_init__ = ompix_neighbor_alltoallw_init_f +#pragma weak MPI_NEIGHBOR_ALLTOALLW_INIT = ompi_neighbor_alltoallw_init_f +#pragma weak mpi_neighbor_alltoallw_init = ompi_neighbor_alltoallw_init_f +#pragma weak mpi_neighbor_alltoallw_init_ = ompi_neighbor_alltoallw_init_f +#pragma weak mpi_neighbor_alltoallw_init__ = ompi_neighbor_alltoallw_init_f -#pragma weak MPIX_Neighbor_alltoallw_init_f = ompix_neighbor_alltoallw_init_f -#pragma weak MPIX_Neighbor_alltoallw_init_f08 = ompix_neighbor_alltoallw_init_f +#pragma weak MPI_Neighbor_alltoallw_init_f = ompi_neighbor_alltoallw_init_f +#pragma weak MPI_Neighbor_alltoallw_init_f08 = ompi_neighbor_alltoallw_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_NEIGHBOR_ALLTOALLW_INIT, - mpix_neighbor_alltoallw_init, - mpix_neighbor_alltoallw_init_, - mpix_neighbor_alltoallw_init__, - ompix_neighbor_alltoallw_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLTOALLW_INIT, + mpi_neighbor_alltoallw_init, + mpi_neighbor_alltoallw_init_, + mpi_neighbor_alltoallw_init__, + ompi_neighbor_alltoallw_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Aint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, info, request, ierr) ) #else -#define ompix_neighbor_alltoallw_init_f pompix_neighbor_alltoallw_init_f +#define ompi_neighbor_alltoallw_init_f pompi_neighbor_alltoallw_init_f #endif #endif -void ompix_neighbor_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, - MPI_Aint *sdispls, MPI_Fint *sendtypes, - char *recvbuf, MPI_Fint *recvcounts, - MPI_Aint *rdispls, MPI_Fint *recvtypes, - MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_neighbor_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, + MPI_Aint *sdispls, MPI_Fint *sendtypes, + char *recvbuf, MPI_Fint *recvcounts, + MPI_Aint *rdispls, MPI_Fint *recvtypes, + MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; MPI_Datatype *c_sendtypes, *c_recvtypes; @@ -105,14 +104,14 @@ void ompix_neighbor_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Neighbor_alltoallw_init(sendbuf, - OMPI_ARRAY_NAME_CONVERT(sendcounts), - sdispls, - c_sendtypes, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - rdispls, - c_recvtypes, c_comm, c_info, &c_request); + c_ierr = PMPI_Neighbor_alltoallw_init(sendbuf, + OMPI_ARRAY_NAME_CONVERT(sendcounts), + sdispls, + c_sendtypes, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + rdispls, + c_recvtypes, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); diff --git a/ompi/mpi/fortran/mpif-h/precv_init_f.c b/ompi/mpi/fortran/mpif-h/precv_init_f.c index 7b8b32158ee..b411e06a460 100644 --- a/ompi/mpi/fortran/mpif-h/precv_init_f.c +++ b/ompi/mpi/fortran/mpif-h/precv_init_f.c @@ -13,6 +13,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,8 +41,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PRECV_INIT, pmpi_precv_init_, pmpi_precv_init__, pompi_precv_init_f, - (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), - (buf, partitions, count, datatype, dest, tag, comm, request, ierr) ) + (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), + (buf, partitions, count, datatype, dest, tag, comm, info, request, ierr) ) #endif #endif @@ -60,21 +61,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PRECV_INIT, mpi_precv_init_, mpi_precv_init__, ompi_precv_init_f, - (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), - (buf, partitions, count, datatype, dest, tag, comm, request, ierr) ) + (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), + (buf, partitions, count, datatype, dest, tag, comm, info, request, ierr) ) #else #define ompi_precv_init_f pompi_precv_init_f #endif #endif -void ompi_precv_init_f(char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) +void ompi_precv_init_f(char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; + MPI_Info c_info; MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; + c_info = PMPI_Info_f2c(*info); c_comm = PMPI_Comm_f2c (*comm); c_ierr = PMPI_Precv_init(OMPI_F2C_BOTTOM(buf), @@ -82,7 +85,7 @@ void ompi_precv_init_f(char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fin OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), - c_comm, &c_req); + c_comm, c_info, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/profile/Makefile.am b/ompi/mpi/fortran/mpif-h/profile/Makefile.am index 0dffef8a843..73367d3e501 100644 --- a/ompi/mpi/fortran/mpif-h/profile/Makefile.am +++ b/ompi/mpi/fortran/mpif-h/profile/Makefile.am @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2021 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2013 Inria. All rights reserved. # Copyright (c) 2011-2013 Universite Bordeaux 1 # Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights @@ -18,6 +18,8 @@ # Copyright (c) 2015-2021 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. +# Copyright (c) 2019 Triad National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -49,17 +51,25 @@ linked_files = \ paint_add_f.c \ paint_diff_f.c \ pallgather_f.c \ + pallgather_init_f.c \ pallgatherv_f.c \ + pallgatherv_init_f.c \ palloc_mem_f.c \ pallreduce_f.c \ + pallreduce_init_f.c \ palltoall_f.c \ + palltoall_init_f.c \ palltoallv_f.c \ + palltoallv_init_f.c \ palltoallw_f.c \ + palltoallw_init_f.c \ pattr_delete_f.c \ pattr_get_f.c \ pattr_put_f.c \ pbarrier_f.c \ + pbarrier_init_f.c \ pbcast_f.c \ + pbcast_init_f.c \ pbsend_f.c \ pbsend_init_f.c \ pbuffer_attach_f.c \ @@ -80,6 +90,7 @@ linked_files = \ pcomm_connect_f.c \ pcomm_create_errhandler_f.c \ pcomm_create_f.c \ + pcomm_create_from_group_f.c \ pcomm_create_group_f.c \ pcomm_create_keyval_f.c \ pcomm_delete_attr_f.c \ @@ -87,6 +98,7 @@ linked_files = \ pcomm_dup_f.c \ pcomm_dup_with_info_f.c \ pcomm_idup_f.c \ + pcomm_idup_with_info_f.c \ pcomm_free_f.c \ pcomm_free_keyval_f.c \ pcomm_get_attr_f.c \ @@ -118,6 +130,7 @@ linked_files = \ perror_class_f.c \ perror_string_f.c \ pexscan_f.c \ + pexscan_init_f.c \ pf_sync_reg_f.c \ pfile_call_errhandler_f.c \ pfile_close_f.c \ @@ -182,7 +195,9 @@ linked_files = \ pfinalize_f.c \ pfree_mem_f.c \ pgather_f.c \ + pgather_init_f.c \ pgatherv_f.c \ + pgatherv_init_f.c \ pget_address_f.c \ pget_count_f.c \ pget_elements_f.c \ @@ -202,6 +217,7 @@ linked_files = \ pgroup_difference_f.c \ pgroup_excl_f.c \ pgroup_free_f.c \ + pgroup_from_session_pset_f.c \ pgroup_incl_f.c \ pgroup_intersection_f.c \ pgroup_range_excl_f.c \ @@ -236,12 +252,14 @@ linked_files = \ pinfo_get_f.c \ pinfo_get_nkeys_f.c \ pinfo_get_nthkey_f.c \ + pinfo_get_string_f.c \ pinfo_get_valuelen_f.c \ pinfo_set_f.c \ pinit_f.c \ pinitialized_f.c \ pinit_thread_f.c \ pintercomm_create_f.c \ + pintercomm_create_from_groups_f.c \ pintercomm_merge_f.c \ piprobe_f.c \ pirecv_f.c \ @@ -253,6 +271,8 @@ linked_files = \ piscatter_f.c \ piscatterv_f.c \ pisend_f.c \ + pisendrecv_f.c \ + pisendrecv_replace_f.c \ pissend_f.c \ pis_thread_main_f.c \ pkeyval_create_f.c \ @@ -261,10 +281,15 @@ linked_files = \ pmprobe_f.c \ pmrecv_f.c \ pneighbor_allgather_f.c \ + pneighbor_allgather_init_f.c \ pneighbor_allgatherv_f.c \ + pneighbor_allgatherv_init_f.c \ pneighbor_alltoall_f.c \ + pneighbor_alltoall_init_f.c \ pneighbor_alltoallv_f.c \ + pneighbor_alltoallv_init_f.c \ pneighbor_alltoallw_f.c \ + pneighbor_alltoallw_init_f.c \ pop_commutative_f.c \ pop_create_f.c \ popen_port_f.c \ @@ -286,20 +311,32 @@ linked_files = \ precv_f.c \ precv_init_f.c \ preduce_f.c \ + preduce_init_f.c \ preduce_local_f.c \ preduce_scatter_f.c \ + preduce_scatter_init_f.c \ preduce_scatter_block_f.c \ + preduce_scatter_block_init_f.c \ prequest_free_f.c \ prequest_get_status_f.c \ prsend_f.c \ prsend_init_f.c \ pscan_f.c \ + pscan_init_f.c \ pscatter_f.c \ + pscatter_init_f.c \ pscatterv_f.c \ + pscatterv_init_f.c \ psend_f.c \ psend_init_f.c \ psendrecv_f.c \ psendrecv_replace_f.c \ + psession_get_info_f.c \ + psession_get_nth_pset_f.c \ + psession_get_num_psets_f.c \ + psession_get_pset_info_f.c \ + psession_init_f.c \ + psession_finalize_f.c \ pssend_f.c \ pssend_init_f.c \ pstartall_f.c \ @@ -355,8 +392,8 @@ linked_files = \ pwaitany_f.c \ pwait_f.c \ pwaitsome_f.c \ - pwtick_f.c \ - pwtime_f.c \ + pwtick_f.c \ + pwtime_f.c \ paccumulate_f.c \ praccumulate_f.c \ pregister_datarep_f.c \ @@ -472,11 +509,3 @@ endif # These files were created by targets above MAINTAINERCLEANFILES = $(nodist_libmpi_mpifh_pmpi_la_SOURCES) - -# Don't want these targets in here - -tags-recursive: -tags: -TAGS: -GTAGS: -ID: diff --git a/ompi/mpi/fortran/mpif-h/prototypes_mpi.h b/ompi/mpi/fortran/mpif-h/prototypes_mpi.h index 01aa388862c..b85fdb27ffd 100644 --- a/ompi/mpi/fortran/mpif-h/prototypes_mpi.h +++ b/ompi/mpi/fortran/mpif-h/prototypes_mpi.h @@ -1,4 +1,4 @@ -/* +/* clang-format off * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. @@ -16,6 +16,9 @@ * reserved. * Copyright (c) 2016-2020 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights + * reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -90,19 +93,27 @@ PN2(void, MPI_Address, mpi_address, MPI_ADDRESS, (char *location, MPI_Fint *addr PN2(MPI_Aint, MPI_Aint_add, mpi_aint_add, MPI_AINT_ADD, (MPI_Aint *base, MPI_Aint *diff)); PN2(MPI_Aint, MPI_Aint_diff, mpi_aint_diff, MPI_AINT_DIFF, (MPI_Aint *addr1, MPI_Aint *addr2)); PN2(void, MPI_Allgather, mpi_allgather, MPI_ALLGATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Allgather_init, mpi_allgather_init, MPI_ALLGATHER_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Allgatherv, mpi_allgatherv, MPI_ALLGATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Allgatherv_init, mpi_allgatherv_init, MPI_ALLGATHERV_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Alloc_mem, mpi_alloc_mem, MPI_ALLOC_MEM, (MPI_Aint *size, MPI_Fint *info, char *baseptr, MPI_Fint *ierr)); /* Extra Alloc_mem prototype for the _cptr variant added in MPI-3.0 errata */ PN2(void, MPI_Alloc_mem_cptr, mpi_alloc_mem_cptr, MPI_ALLOC_MEM_CPTR, (MPI_Aint *size, MPI_Fint *info, char *baseptr, MPI_Fint *ierr)); PN2(void, MPI_Allreduce, mpi_allreduce, MPI_ALLREDUCE, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Allreduce_init, mpi_allreduce_init, MPI_ALLREDUCE_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Alltoall, mpi_alltoall, MPI_ALLTOALL, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Alltoall_init, mpi_alltoall_init, MPI_ALLTOALL_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Alltoallv, mpi_alltoallv, MPI_ALLTOALLV, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Alltoallv_init, mpi_alltoallv_init, MPI_ALLTOALLV_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Alltoallw, mpi_alltoallw, MPI_ALLTOALLW, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Alltoallw_init, mpi_alltoallw_init, MPI_ALLTOALLW_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Attr_delete, mpi_attr_delete, MPI_ATTR_DELETE, (MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *ierr)); PN2(void, MPI_Attr_get, mpi_attr_get, MPI_ATTR_GET, (MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr)); PN2(void, MPI_Attr_put, mpi_attr_put, MPI_ATTR_PUT, (MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, MPI_Fint *ierr)); PN2(void, MPI_Barrier, mpi_barrier, MPI_BARRIER, (MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Barrier_init, mpi_barrier_init, MPI_BARRIER_INIT, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Bcast, mpi_bcast, MPI_BCAST, (char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Bcast_init, mpi_bcast_init, MPI_BCAST_INIT, (char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Bsend, mpi_bsend, MPI_BSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Bsend_init, mpi_bsend_init, MPI_BSEND_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Buffer_attach, mpi_buffer_attach, MPI_BUFFER_ATTACH, (char *buffer, MPI_Fint *size, MPI_Fint *ierr)); @@ -122,14 +133,16 @@ PN2(void, MPI_Comm_call_errhandler, mpi_comm_call_errhandler, MPI_COMM_CALL_ERRH PN2(void, MPI_Comm_compare, mpi_comm_compare, MPI_COMM_COMPARE, (MPI_Fint *comm1, MPI_Fint *comm2, MPI_Fint *result, MPI_Fint *ierr)); PN2(void, MPI_Comm_connect, mpi_comm_connect, MPI_COMM_CONNECT, (char *port_name, MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr, int port_name_len)); PN2(void, MPI_Comm_create_errhandler, mpi_comm_create_errhandler, MPI_COMM_CREATE_ERRHANDLER, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr)); -PN2(void, MPI_Comm_create_keyval, mpi_comm_create_keyval, MPI_COMM_CREATE_KEYVAL, (ompi_aint_copy_attr_function* comm_copy_attr_fn, ompi_aint_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); +PN2(void, MPI_Comm_create_keyval, mpi_comm_create_keyval, MPI_COMM_CREATE_KEYVAL, (ompi_aint_copy_attr_function comm_copy_attr_fn, ompi_aint_delete_attr_function comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); PN2(void, MPI_Comm_create, mpi_comm_create, MPI_COMM_CREATE, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *newcomm, MPI_Fint *ierr)); +PN2(void, MPI_Comm_create_from_group, mpi_comm_create_from_group, MPI_COMM_CREATE_FROM_GROUP, (MPI_Fint *group, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newcomm, MPI_Fint *ierr, int name_len)); PN2(void, MPI_Comm_create_group, mpi_comm_create_group, MPI_COMM_CREATE_GROUP, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *tag, MPI_Fint *newcomm, MPI_Fint *ierr)); PN2(void, MPI_Comm_delete_attr, mpi_comm_delete_attr, MPI_COMM_DELETE_ATTR, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Fint *ierr)); PN2(void, MPI_Comm_disconnect, mpi_comm_disconnect, MPI_COMM_DISCONNECT, (MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Comm_dup, mpi_comm_dup, MPI_COMM_DUP, (MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr)); PN2(void, MPI_Comm_dup_with_info, mpi_comm_dup_with_info, MPI_COMM_DUP_WITH_INFO, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *ierr)); PN2(void, MPI_Comm_idup, mpi_comm_idup, MPI_COMM_IDUP, (MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_Comm_idup_with_info, mpi_comm_idup_with_info, MPI_COMM_IDUP_WITH_INFO, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Comm_free_keyval, mpi_comm_free_keyval, MPI_COMM_FREE_KEYVAL, (MPI_Fint *comm_keyval, MPI_Fint *ierr)); PN2(void, MPI_Comm_free, mpi_comm_free, MPI_COMM_FREE, (MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Comm_get_attr, mpi_comm_get_attr, MPI_COMM_GET_ATTR, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Aint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr)); @@ -165,6 +178,7 @@ PN2(void, MPI_Errhandler_set, mpi_errhandler_set, MPI_ERRHANDLER_SET, (MPI_Fint PN2(void, MPI_Error_class, mpi_error_class, MPI_ERROR_CLASS, (MPI_Fint *errorcode, MPI_Fint *errorclass, MPI_Fint *ierr)); PN2(void, MPI_Error_string, mpi_error_string, MPI_ERROR_STRING, (MPI_Fint *errorcode, char *string, MPI_Fint *resultlen, MPI_Fint *ierr, int string_len)); PN2(void, MPI_Exscan, mpi_exscan, MPI_EXSCAN, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Exscan_init, mpi_exscan_init, MPI_EXSCAN_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_F_sync_reg, mpi_f_sync_reg, MPI_F_SYNC_REG, (char *buf)); PN2(void, MPI_Fetch_and_op, mpi_fetch_and_op, MPI_FETCH_AND_OP, (char *origin_addr, char *result_addr, MPI_Fint *datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr)); PN2(void, MPI_File_call_errhandler, mpi_file_call_errhandler, MPI_FILE_CALL_ERRHANDLER, (MPI_Fint *fh, MPI_Fint *errorcode, MPI_Fint *ierr)); @@ -230,7 +244,9 @@ PN2(void, MPI_Finalize, mpi_finalize, MPI_FINALIZE, (MPI_Fint *ierr)); PN2(void, MPI_Finalized, mpi_finalized, MPI_FINALIZED, (ompi_fortran_logical_t *flag, MPI_Fint *ierr)); PN2(void, MPI_Free_mem, mpi_free_mem, MPI_FREE_MEM, (char *base, MPI_Fint *ierr)); PN2(void, MPI_Gather, mpi_gather, MPI_GATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Gather_init, mpi_gather_init, MPI_GATHER_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Gatherv, mpi_gatherv, MPI_GATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Gatherv_init, mpi_gatherv_init, MPI_GATHERV_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Get_accumulate, mpi_get_accumulate, MPI_GET_ACCUMULATE, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, char *result_addr, MPI_Fint *result_count, MPI_Fint *result_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr)); PN2(void, MPI_Get_address, mpi_get_address, MPI_GET_ADDRESS, (char *location, MPI_Aint *address, MPI_Fint *ierr)); PN2(void, MPI_Get_count, mpi_get_count, MPI_GET_COUNT, (MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr)); @@ -247,11 +263,12 @@ PN2(void, MPI_Graph_neighbors_count, mpi_graph_neighbors_count, MPI_GRAPH_NEIGHB PN2(void, MPI_Graph_neighbors, mpi_graph_neighbors, MPI_GRAPH_NEIGHBORS, (MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *maxneighbors, MPI_Fint *neighbors, MPI_Fint *ierr)); PN2(void, MPI_Graphdims_get, mpi_graphdims_get, MPI_GRAPHDIMS_GET, (MPI_Fint *comm, MPI_Fint *nnodes, MPI_Fint *nedges, MPI_Fint *ierr)); PN2(void, MPI_Grequest_complete, mpi_grequest_complete, MPI_GREQUEST_COMPLETE, (MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPI_Grequest_start, mpi_grequest_start, MPI_GREQUEST_START, (MPI_F_Grequest_query_function* query_fn, MPI_F_Grequest_free_function* free_fn, MPI_F_Grequest_cancel_function* cancel_fn, MPI_Aint *extra_state, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_Grequest_start, mpi_grequest_start, MPI_GREQUEST_START, (MPI_F_Grequest_query_function query_fn, MPI_F_Grequest_free_function free_fn, MPI_F_Grequest_cancel_function cancel_fn, MPI_Aint *extra_state, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Group_compare, mpi_group_compare, MPI_GROUP_COMPARE, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *result, MPI_Fint *ierr)); PN2(void, MPI_Group_difference, mpi_group_difference, MPI_GROUP_DIFFERENCE, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr)); PN2(void, MPI_Group_excl, mpi_group_excl, MPI_GROUP_EXCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr)); PN2(void, MPI_Group_free, mpi_group_free, MPI_GROUP_FREE, (MPI_Fint *group, MPI_Fint *ierr)); +PN2(void, MPI_Group_from_session_pset, mpi_group_from_session_pset, MPI_GROUP_FROM_SESSION_PSET, (MPI_Fint *group, char *pset_name, MPI_Fint *newgroup, MPI_Fint *ierr, int name_len)); PN2(void, MPI_Group_incl, mpi_group_incl, MPI_GROUP_INCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr)); PN2(void, MPI_Group_intersection, mpi_group_intersection, MPI_GROUP_INTERSECTION, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr)); PN2(void, MPI_Group_range_excl, mpi_group_range_excl, MPI_GROUP_RANGE_EXCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], MPI_Fint *newgroup, MPI_Fint *ierr)); @@ -292,29 +309,38 @@ PN2(void, MPI_Info_free, mpi_info_free, MPI_INFO_FREE, (MPI_Fint *info, MPI_Fint PN2(void, MPI_Info_get, mpi_info_get, MPI_INFO_GET, (MPI_Fint *info, char *key, MPI_Fint *valuelen, char *value, ompi_fortran_logical_t *flag, MPI_Fint *ierr, int key_len, int value_len)); PN2(void, MPI_Info_get_nkeys, mpi_info_get_nkeys, MPI_INFO_GET_NKEYS, (MPI_Fint *info, MPI_Fint *nkeys, MPI_Fint *ierr)); PN2(void, MPI_Info_get_nthkey, mpi_info_get_nthkey, MPI_INFO_GET_NTHKEY, (MPI_Fint *info, MPI_Fint *n, char *key, MPI_Fint *ierr, int key_len)); +PN2(void, MPI_Info_get_string, mpi_info_get_string, MPI_INFO_GET_STRING, (MPI_Fint *info, char *key, MPI_Fint *buflen, char *value, ompi_fortran_logical_t *flag, MPI_Fint *ierr, int key_len, int value_len)); PN2(void, MPI_Info_get_valuelen, mpi_info_get_valuelen, MPI_INFO_GET_VALUELEN, (MPI_Fint *info, char *key, MPI_Fint *valuelen, ompi_fortran_logical_t *flag, MPI_Fint *ierr, int key_len)); PN2(void, MPI_Info_set, mpi_info_set, MPI_INFO_SET, (MPI_Fint *info, char *key, char *value, MPI_Fint *ierr, int key_len, int value_len)); PN2(void, MPI_Init, mpi_init, MPI_INIT, (MPI_Fint *ierr)); PN2(void, MPI_Initialized, mpi_initialized, MPI_INITIALIZED, (ompi_fortran_logical_t *flag, MPI_Fint *ierr)); PN2(void, MPI_Init_thread, mpi_init_thread, MPI_INIT_THREAD, (MPI_Fint *required, MPI_Fint *provided, MPI_Fint *ierr)); PN2(void, MPI_Intercomm_create, mpi_intercomm_create, MPI_INTERCOMM_CREATE, (MPI_Fint *local_comm, MPI_Fint *local_leader, MPI_Fint *bridge_comm, MPI_Fint *remote_leader, MPI_Fint *tag, MPI_Fint *newintercomm, MPI_Fint *ierr)); +PN2(void, MPI_Intercomm_create_from_groups, mpi_intercomm_create_from_groups, MPI_INTERCOMM_CREATE_FROM_GROUPS, (MPI_Fint *local_group, MPI_Fint *local_leader, MPI_Fint *remote_group, MPI_Fint *remote_leader, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newintercomm, MPI_Fint *ierr, int name_len)); PN2(void, MPI_Intercomm_merge, mpi_intercomm_merge, MPI_INTERCOMM_MERGE, (MPI_Fint *intercomm, ompi_fortran_logical_t *high, MPI_Fint *newintercomm, MPI_Fint *ierr)); PN2(void, MPI_Iprobe, mpi_iprobe, MPI_IPROBE, (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_Irecv, mpi_irecv, MPI_IRECV, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Irsend, mpi_irsend, MPI_IRSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Isend, mpi_isend, MPI_ISEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_Isendrecv, mpi_isendrecv, MPI_ISENDRECV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Fint *dest, MPI_Fint *sendtag, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_Isendrecv_replace, mpi_isendrecv_replace, MPI_ISENDRECV_REPLACE, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *sendtag, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Issend, mpi_issend, MPI_ISSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Is_thread_main, mpi_is_thread_main, MPI_IS_THREAD_MAIN, (ompi_fortran_logical_t *flag, MPI_Fint *ierr)); -PN2(void, MPI_Keyval_create, mpi_keyval_create, MPI_KEYVAL_CREATE, (ompi_fint_copy_attr_function* copy_fn, ompi_fint_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr)); +PN2(void, MPI_Keyval_create, mpi_keyval_create, MPI_KEYVAL_CREATE, (ompi_fint_copy_attr_function copy_fn, ompi_fint_delete_attr_function delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr)); PN2(void, MPI_Keyval_free, mpi_keyval_free, MPI_KEYVAL_FREE, (MPI_Fint *keyval, MPI_Fint *ierr)); PN2(void, MPI_Lookup_name, mpi_lookup_name, MPI_LOOKUP_NAME, (char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len)); PN2(void, MPI_Mprobe, mpi_mprobe, MPI_MPROBE, (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_Mrecv, mpi_mrecv, MPI_MRECV, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_Neighbor_allgather, mpi_neighbor_allgather, MPI_NEIGHBOR_ALLGATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Neighbor_allgather_init, mpi_neighbor_allgather_init, MPI_NEIGHBOR_ALLGATHER_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Neighbor_allgatherv, mpi_neighbor_allgatherv, MPI_NEIGHBOR_ALLGATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Neighbor_allgatherv_init, mpi_neighbor_allgatherv_init, MPI_NEIGHBOR_ALLGATHERV_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Neighbor_alltoall, mpi_neighbor_alltoall, MPI_NEIGHBOR_ALLTOALL, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Neighbor_alltoall_init, mpi_neighbor_alltoall_init, MPI_NEIGHBOR_ALLTOALL_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Neighbor_alltoallv, mpi_neighbor_alltoallv, MPI_NEIGHBOR_ALLTOALLV, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Neighbor_alltoallv_init, mpi_neighbor_alltoallv_init, MPI_NEIGHBOR_ALLTOALLV_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Neighbor_alltoallw, mpi_neighbor_alltoallw, MPI_NEIGHBOR_ALLTOALLW, (char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Aint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Neighbor_alltoallw_init, mpi_neighbor_alltoallw_init, MPI_NEIGHBOR_ALLTOALLW_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Aint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Op_commutative, mpi_op_commutative, MPI_OP_COMMUTATIVE, (MPI_Fint *op, MPI_Fint *commute, MPI_Fint *ierr)); PN2(void, MPI_Op_create, mpi_op_create, MPI_OP_CREATE, (ompi_op_fortran_handler_fn_t* function, ompi_fortran_logical_t *commute, MPI_Fint *op, MPI_Fint *ierr)); PN2(void, MPI_Open_port, mpi_open_port, MPI_OPEN_PORT, (MPI_Fint *info, char *port_name, MPI_Fint *ierr, int port_name_len)); @@ -328,8 +354,8 @@ PN2(void, MPI_Pcontrol, mpi_pcontrol, MPI_PCONTROL, (MPI_Fint *level)); PN2(void, MPI_Pready, mpi_pready, MPI_PREADY, (MPI_Fint *partition, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Pready_list, mpi_pready_list, MPI_PREADY_LIST, (MPI_Fint *length, MPI_Fint *partition, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Pready_range, mpi_pready_range, MPI_PREADY_RANGE, (MPI_Fint *partition_low, MPI_Fint *partition_high, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPI_Precv_init, mpi_precv_init, MPI_PRECV_INIT, (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *src, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPI_Psend_init, mpi_psend_init, MPI_PSEND_INIT, (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_Precv_init, mpi_precv_init, MPI_PRECV_INIT, (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *src, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_Psend_init, mpi_psend_init, MPI_PSEND_INIT, (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Probe, mpi_probe, MPI_PROBE, (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_Publish_name, mpi_publish_name, MPI_PUBLISH_NAME, (char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len)); PN2(void, MPI_Put, mpi_put, MPI_PUT, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *ierr)); @@ -338,9 +364,12 @@ PN2(void, MPI_Raccumulate, mpi_raccumulate, MPI_RACCUMULATE, (char *origin_addr, PN2(void, MPI_Recv_init, mpi_recv_init, MPI_RECV_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Recv, mpi_recv, MPI_RECV, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_Reduce, mpi_reduce, MPI_REDUCE, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Reduce_init, mpi_reduce_init, MPI_REDUCE_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Reduce_local, mpi_reduce_local, MPI_REDUCE_LOCAL, (char *inbuf, char *inoutbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr)); PN2(void, MPI_Reduce_scatter, mpi_reduce_scatter, MPI_REDUCE_SCATTER, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Reduce_scatter_init, mpi_reduce_scatter_init, MPI_REDUCE_SCATTER_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Reduce_scatter_block, mpi_reduce_scatter_block, MPI_REDUCE_SCATTER_BLOCK, (char *sendbuf, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Reduce_scatter_block_init, mpi_reduce_scatter_block_init, MPI_REDUCE_SCATTER_BLOCK_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Register_datarep, mpi_register_datarep, MPI_REGISTER_DATAREP, (char *datarep, ompi_mpi2_fortran_datarep_conversion_fn_t *read_conversion_fn, ompi_mpi2_fortran_datarep_conversion_fn_t *write_conversion_fn, ompi_mpi2_fortran_datarep_extent_fn_t *dtype_file_extent_fn, MPI_Aint *extra_state, MPI_Fint *ierr, int datarep_len)); PN2(void, MPI_Request_free, mpi_request_free, MPI_REQUEST_FREE, (MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Request_get_status, mpi_request_get_status, MPI_REQUEST_GET_STATUS, (MPI_Fint *request, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr)); @@ -350,12 +379,21 @@ PN2(void, MPI_Rput, mpi_rput, MPI_RPUT, (char *origin_addr, MPI_Fint *origin_cou PN2(void, MPI_Rsend, mpi_rsend, MPI_RSEND, (char *ibuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Rsend_init, mpi_rsend_init, MPI_RSEND_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Scan, mpi_scan, MPI_SCAN, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Scan_init, mpi_scan_init, MPI_SCAN_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Scatter, mpi_scatter, MPI_SCATTER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Scatter_init, mpi_scatter_init, MPI_SCATTER_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Scatterv, mpi_scatterv, MPI_SCATTERV, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr)); +PN2(void, MPI_Scatterv_init, mpi_scatterv_init, MPI_SCATTERV_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Send_init, mpi_send_init, MPI_SEND_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Send, mpi_send, MPI_SEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Sendrecv, mpi_sendrecv, MPI_SENDRECV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Fint *dest, MPI_Fint *sendtag, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_Sendrecv_replace, mpi_sendrecv_replace, MPI_SENDRECV_REPLACE, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *sendtag, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr)); +PN2(void, MPI_Session_get_info, mpi_session_get_info, MPI_SESSION_GET_INFO, (MPI_Fint *session, MPI_Fint *info, MPI_Fint *ierr)); +PN2(void, MPI_Session_get_nth_pset, mpi_session_get_nth_pset, MPI_SESSION_GET_NTH_PSET, (MPI_Fint *session, MPI_Fint *info, MPI_Fint *n, MPI_Fint *pset_len, char *pset_name, MPI_Fint *ierr)); +PN2(void, MPI_Session_get_num_psets, mpi_session_get_num_psets, MPI_SESSION_GET_NUM_PSETS, (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr)); +PN2(void, MPI_Session_get_pset_info, mpi_session_get_pset_info, MPI_SESSION_GET_PSET_INFO, (MPI_Fint *session, char *pset_name, MPI_Fint *info, MPI_Fint *ierr, int name_len)); +PN2(void, MPI_Session_init, mpi_session_init, MPI_SESSION_INIT, (MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *session, MPI_Fint *ierr)); +PN2(void, MPI_Session_finalize, mpi_session_finalize, MPI_SESSION_FINALIZE, (MPI_Fint *session, MPI_Fint *ierr)); PN2(void, MPI_Ssend_init, mpi_ssend_init, MPI_SSEND_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Ssend, mpi_ssend, MPI_SSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Start, mpi_start, MPI_START, (MPI_Fint *request, MPI_Fint *ierr)); @@ -379,7 +417,7 @@ PN2(void, MPI_Type_create_f90_integer, mpi_type_create_f90_integer, MPI_TYPE_CRE PN2(void, MPI_Type_create_f90_real, mpi_type_create_f90_real, MPI_TYPE_CREATE_F90_REAL, (MPI_Fint *p, MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr)); PN2(void, MPI_Type_create_hindexed, mpi_type_create_hindexed, MPI_TYPE_CREATE_HINDEXED, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr)); PN2(void, MPI_Type_create_hvector, mpi_type_create_hvector, MPI_TYPE_CREATE_HVECTOR, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Aint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr)); -PN2(void, MPI_Type_create_keyval, mpi_type_create_keyval, MPI_TYPE_CREATE_KEYVAL, (ompi_aint_copy_attr_function* type_copy_attr_fn, ompi_aint_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); +PN2(void, MPI_Type_create_keyval, mpi_type_create_keyval, MPI_TYPE_CREATE_KEYVAL, (ompi_aint_copy_attr_function type_copy_attr_fn, ompi_aint_delete_attr_function type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); PN2(void, MPI_Type_create_indexed_block, mpi_type_create_indexed_block, MPI_TYPE_CREATE_INDEXED_BLOCK, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr)); PN2(void, MPI_Type_create_hindexed_block, mpi_type_create_hindexed_block, MPI_TYPE_CREATE_HINDEXED_BLOCK, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr)); PN2(void, MPI_Type_create_struct, mpi_type_create_struct, MPI_TYPE_CREATE_STRUCT, (MPI_Fint *count, MPI_Fint *array_of_block_lengths, MPI_Aint *array_of_displacements, MPI_Fint *array_of_types, MPI_Fint *newtype, MPI_Fint *ierr)); @@ -427,7 +465,7 @@ PN2(void, MPI_Win_complete, mpi_win_complete, MPI_WIN_COMPLETE, (MPI_Fint *win, PN2(void, MPI_Win_create, mpi_win_create, MPI_WIN_CREATE, (char *base, MPI_Aint *size, MPI_Fint *disp_unit, MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr)); PN2(void, MPI_Win_create_dynamic, mpi_win_create_dynamic, MPI_WIN_CREATE_DYNAMIC, (MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr)); PN2(void, MPI_Win_create_errhandler, mpi_win_create_errhandler, MPI_WIN_CREATE_ERRHANDLER, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr)); -PN2(void, MPI_Win_create_keyval, mpi_win_create_keyval, MPI_WIN_CREATE_KEYVAL, (ompi_aint_copy_attr_function* win_copy_attr_fn, ompi_aint_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); +PN2(void, MPI_Win_create_keyval, mpi_win_create_keyval, MPI_WIN_CREATE_KEYVAL,(ompi_aint_copy_attr_function win_copy_attr_fn, ompi_aint_delete_attr_function win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); PN2(void, MPI_Win_delete_attr, mpi_win_delete_attr, MPI_WIN_DELETE_ATTR, (MPI_Fint *win, MPI_Fint *win_keyval, MPI_Fint *ierr)); PN2(void, MPI_Win_detach, mpi_win_detach, MPI_WIN_DETACH, (MPI_Fint *win, char *base, MPI_Fint *ierr)); PN2(void, MPI_Win_fence, mpi_win_fence, MPI_WIN_FENCE, (MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr)); diff --git a/ompi/mpi/fortran/mpif-h/psend_init_f.c b/ompi/mpi/fortran/mpif-h/psend_init_f.c index c7804cfac27..655ce8d7945 100644 --- a/ompi/mpi/fortran/mpif-h/psend_init_f.c +++ b/ompi/mpi/fortran/mpif-h/psend_init_f.c @@ -13,6 +13,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,8 +41,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PSEND_INIT, pmpi_psend_init_, pmpi_psend_init__, pompi_psend_init_f, - (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), - (buf, partitions, count, datatype, dest, tag, comm, request, ierr) ) + (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), + (buf, partitions, count, datatype, dest, tag, comm, info, request, ierr) ) #endif #endif @@ -60,21 +61,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PSEND_INIT, mpi_psend_init_, mpi_psend_init__, ompi_psend_init_f, - (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), - (buf, partitions, count, datatype, dest, tag, comm, request, ierr) ) + (char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), + (buf, partitions, count, datatype, dest, tag, comm, info, request, ierr) ) #else #define ompi_psend_init_f pompi_psend_init_f #endif #endif -void ompi_psend_init_f(char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) +void ompi_psend_init_f(char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; + MPI_Info c_info; MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; + c_info = PMPI_Info_f2c(*info); c_comm = PMPI_Comm_f2c (*comm); c_ierr = PMPI_Psend_init(OMPI_F2C_BOTTOM(buf), @@ -82,7 +85,7 @@ void ompi_psend_init_f(char *buf, MPI_Fint *partitions, MPI_Fint *count, MPI_Fin OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), - c_comm, &c_req); + c_comm, c_info, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpiext/pcollreq/mpif-h/reduce_init_f.c b/ompi/mpi/fortran/mpif-h/reduce_init_f.c similarity index 54% rename from ompi/mpiext/pcollreq/mpif-h/reduce_init_f.c rename to ompi/mpi/fortran/mpif-h/reduce_init_f.c index 2b3ed4c99c0..01437078cdc 100644 --- a/ompi/mpiext/pcollreq/mpif-h/reduce_init_f.c +++ b/ompi/mpi/fortran/mpif-h/reduce_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,55 +23,54 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_REDUCE_INIT = ompix_reduce_init_f -#pragma weak pmpix_reduce_init = ompix_reduce_init_f -#pragma weak pmpix_reduce_init_ = ompix_reduce_init_f -#pragma weak pmpix_reduce_init__ = ompix_reduce_init_f +#pragma weak PMPI_REDUCE_INIT = ompi_reduce_init_f +#pragma weak pmpi_reduce_init = ompi_reduce_init_f +#pragma weak pmpi_reduce_init_ = ompi_reduce_init_f +#pragma weak pmpi_reduce_init__ = ompi_reduce_init_f -#pragma weak PMPIX_Reduce_init_f = ompix_reduce_init_f -#pragma weak PMPIX_Reduce_init_f08 = ompix_reduce_init_f +#pragma weak PMPI_Reduce_init_f = ompi_reduce_init_f +#pragma weak PMPI_Reduce_init_f08 = ompi_reduce_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_REDUCE_INIT, - pmpix_reduce_init, - pmpix_reduce_init_, - pmpix_reduce_init__, - pompix_reduce_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_INIT, + pmpi_reduce_init, + pmpi_reduce_init_, + pmpi_reduce_init__, + pompi_reduce_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, root, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_REDUCE_INIT = ompix_reduce_init_f -#pragma weak mpix_reduce_init = ompix_reduce_init_f -#pragma weak mpix_reduce_init_ = ompix_reduce_init_f -#pragma weak mpix_reduce_init__ = ompix_reduce_init_f +#pragma weak MPI_REDUCE_INIT = ompi_reduce_init_f +#pragma weak mpi_reduce_init = ompi_reduce_init_f +#pragma weak mpi_reduce_init_ = ompi_reduce_init_f +#pragma weak mpi_reduce_init__ = ompi_reduce_init_f -#pragma weak MPIX_Reduce_init_f = ompix_reduce_init_f -#pragma weak MPIX_Reduce_init_f08 = ompix_reduce_init_f +#pragma weak MPI_Reduce_init_f = ompi_reduce_init_f +#pragma weak MPI_Reduce_init_f08 = ompi_reduce_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_REDUCE_INIT, - mpix_reduce_init, - mpix_reduce_init_, - mpix_reduce_init__, - ompix_reduce_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_INIT, + mpi_reduce_init, + mpi_reduce_init_, + mpi_reduce_init__, + ompi_reduce_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, root, comm, info, request, ierr) ) #else -#define ompix_reduce_init_f pompix_reduce_init_f +#define ompi_reduce_init_f pompi_reduce_init_f #endif #endif -void ompix_reduce_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, - MPI_Fint *datatype, MPI_Fint *op, - MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, - MPI_Fint *ierr) +void ompi_reduce_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, + MPI_Fint *datatype, MPI_Fint *op, + MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, + MPI_Fint *ierr) { int c_ierr; MPI_Datatype c_type; @@ -89,11 +88,11 @@ void ompix_reduce_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Reduce_init(sendbuf, recvbuf, - OMPI_FINT_2_INT(*count), - c_type, c_op, - OMPI_FINT_2_INT(*root), - c_comm, c_info, &c_request); + c_ierr = PMPI_Reduce_init(sendbuf, recvbuf, + OMPI_FINT_2_INT(*count), + c_type, c_op, + OMPI_FINT_2_INT(*root), + c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpiext/pcollreq/mpif-h/reduce_scatter_block_init_f.c b/ompi/mpi/fortran/mpif-h/reduce_scatter_block_init_f.c similarity index 50% rename from ompi/mpiext/pcollreq/mpif-h/reduce_scatter_block_init_f.c rename to ompi/mpi/fortran/mpif-h/reduce_scatter_block_init_f.c index 953e0cb9771..1bc36af0f7f 100644 --- a/ompi/mpiext/pcollreq/mpif-h/reduce_scatter_block_init_f.c +++ b/ompi/mpi/fortran/mpif-h/reduce_scatter_block_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,55 +23,54 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_REDUCE_SCATTER_BLOCK_INIT = ompix_reduce_scatter_block_init_f -#pragma weak pmpix_reduce_scatter_block_init = ompix_reduce_scatter_block_init_f -#pragma weak pmpix_reduce_scatter_block_init_ = ompix_reduce_scatter_block_init_f -#pragma weak pmpix_reduce_scatter_block_init__ = ompix_reduce_scatter_block_init_f +#pragma weak PMPI_REDUCE_SCATTER_BLOCK_INIT = ompi_reduce_scatter_block_init_f +#pragma weak pmpi_reduce_scatter_block_init = ompi_reduce_scatter_block_init_f +#pragma weak pmpi_reduce_scatter_block_init_ = ompi_reduce_scatter_block_init_f +#pragma weak pmpi_reduce_scatter_block_init__ = ompi_reduce_scatter_block_init_f -#pragma weak PMPIX_Reduce_scatter_block_init_f = ompix_reduce_scatter_block_init_f -#pragma weak PMPIX_Reduce_scatter_block_init_f08 = ompix_reduce_scatter_block_init_f +#pragma weak PMPI_Reduce_scatter_block_init_f = ompi_reduce_scatter_block_init_f +#pragma weak PMPI_Reduce_scatter_block_init_f08 = ompi_reduce_scatter_block_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_REDUCE_SCATTER_BLOCK_INIT, - pmpix_reduce_scatter_block_init, - pmpix_reduce_scatter_block_init_, - pmpix_reduce_scatter_block_init__, - pompix_reduce_scatter_block_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_SCATTER_BLOCK_INIT, + pmpi_reduce_scatter_block_init, + pmpi_reduce_scatter_block_init_, + pmpi_reduce_scatter_block_init__, + pompi_reduce_scatter_block_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_REDUCE_SCATTER_BLOCK_INIT = ompix_reduce_scatter_block_init_f -#pragma weak mpix_reduce_scatter_block_init = ompix_reduce_scatter_block_init_f -#pragma weak mpix_reduce_scatter_block_init_ = ompix_reduce_scatter_block_init_f -#pragma weak mpix_reduce_scatter_block_init__ = ompix_reduce_scatter_block_init_f +#pragma weak MPI_REDUCE_SCATTER_BLOCK_INIT = ompi_reduce_scatter_block_init_f +#pragma weak mpi_reduce_scatter_block_init = ompi_reduce_scatter_block_init_f +#pragma weak mpi_reduce_scatter_block_init_ = ompi_reduce_scatter_block_init_f +#pragma weak mpi_reduce_scatter_block_init__ = ompi_reduce_scatter_block_init_f -#pragma weak MPIX_Reduce_scatter_block_init_f = ompix_reduce_scatter_block_init_f -#pragma weak MPIX_Reduce_scatter_block_init_f08 = ompix_reduce_scatter_block_init_f +#pragma weak MPI_Reduce_scatter_block_init_f = ompi_reduce_scatter_block_init_f +#pragma weak MPI_Reduce_scatter_block_init_f08 = ompi_reduce_scatter_block_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_REDUCE_SCATTER_BLOCK_INIT, - mpix_reduce_scatter_block_init, - mpix_reduce_scatter_block_init_, - mpix_reduce_scatter_block_init__, - ompix_reduce_scatter_block_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_SCATTER_BLOCK_INIT, + mpi_reduce_scatter_block_init, + mpi_reduce_scatter_block_init_, + mpi_reduce_scatter_block_init__, + ompi_reduce_scatter_block_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, info, request, ierr) ) #else -#define ompix_reduce_scatter_block_init_f pompix_reduce_scatter_block_init_f +#define ompi_reduce_scatter_block_init_f pompi_reduce_scatter_block_init_f #endif #endif -void ompix_reduce_scatter_block_init_f(char *sendbuf, char *recvbuf, - MPI_Fint *recvcount, MPI_Fint *datatype, - MPI_Fint *op, MPI_Fint *comm, - MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_reduce_scatter_block_init_f(char *sendbuf, char *recvbuf, + MPI_Fint *recvcount, MPI_Fint *datatype, + MPI_Fint *op, MPI_Fint *comm, + MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; @@ -92,9 +91,9 @@ void ompix_reduce_scatter_block_init_f(char *sendbuf, char *recvbuf, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Reduce_scatter_block_init(sendbuf, recvbuf, - OMPI_FINT_2_INT(*recvcount), - c_type, c_op, c_comm, c_info, &c_request); + c_ierr = PMPI_Reduce_scatter_block_init(sendbuf, recvbuf, + OMPI_FINT_2_INT(*recvcount), + c_type, c_op, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpiext/pcollreq/mpif-h/reduce_scatter_init_f.c b/ompi/mpi/fortran/mpif-h/reduce_scatter_init_f.c similarity index 52% rename from ompi/mpiext/pcollreq/mpif-h/reduce_scatter_init_f.c rename to ompi/mpi/fortran/mpif-h/reduce_scatter_init_f.c index 1d9c619258f..f1222b2c8e2 100644 --- a/ompi/mpiext/pcollreq/mpif-h/reduce_scatter_init_f.c +++ b/ompi/mpi/fortran/mpif-h/reduce_scatter_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,55 +23,54 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_REDUCE_SCATTER_INIT = ompix_reduce_scatter_init_f -#pragma weak pmpix_reduce_scatter_init = ompix_reduce_scatter_init_f -#pragma weak pmpix_reduce_scatter_init_ = ompix_reduce_scatter_init_f -#pragma weak pmpix_reduce_scatter_init__ = ompix_reduce_scatter_init_f +#pragma weak PMPI_REDUCE_SCATTER_INIT = ompi_reduce_scatter_init_f +#pragma weak pmpi_reduce_scatter_init = ompi_reduce_scatter_init_f +#pragma weak pmpi_reduce_scatter_init_ = ompi_reduce_scatter_init_f +#pragma weak pmpi_reduce_scatter_init__ = ompi_reduce_scatter_init_f -#pragma weak PMPIX_Reduce_scatter_init_f = ompix_reduce_scatter_init_f -#pragma weak PMPIX_Reduce_scatter_init_f08 = ompix_reduce_scatter_init_f +#pragma weak PMPI_Reduce_scatter_init_f = ompi_reduce_scatter_init_f +#pragma weak PMPI_Reduce_scatter_init_f08 = ompi_reduce_scatter_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_REDUCE_SCATTER_INIT, - pmpix_reduce_scatter_init, - pmpix_reduce_scatter_init_, - pmpix_reduce_scatter_init__, - pompix_reduce_scatter_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_SCATTER_INIT, + pmpi_reduce_scatter_init, + pmpi_reduce_scatter_init_, + pmpi_reduce_scatter_init__, + pompi_reduce_scatter_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_REDUCE_SCATTER_INIT = ompix_reduce_scatter_init_f -#pragma weak mpix_reduce_scatter_init = ompix_reduce_scatter_init_f -#pragma weak mpix_reduce_scatter_init_ = ompix_reduce_scatter_init_f -#pragma weak mpix_reduce_scatter_init__ = ompix_reduce_scatter_init_f +#pragma weak MPI_REDUCE_SCATTER_INIT = ompi_reduce_scatter_init_f +#pragma weak mpi_reduce_scatter_init = ompi_reduce_scatter_init_f +#pragma weak mpi_reduce_scatter_init_ = ompi_reduce_scatter_init_f +#pragma weak mpi_reduce_scatter_init__ = ompi_reduce_scatter_init_f -#pragma weak MPIX_Reduce_scatter_init_f = ompix_reduce_scatter_init_f -#pragma weak MPIX_Reduce_scatter_init_f08 = ompix_reduce_scatter_init_f +#pragma weak MPI_Reduce_scatter_init_f = ompi_reduce_scatter_init_f +#pragma weak MPI_Reduce_scatter_init_f08 = ompi_reduce_scatter_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_REDUCE_SCATTER_INIT, - mpix_reduce_scatter_init, - mpix_reduce_scatter_init_, - mpix_reduce_scatter_init__, - ompix_reduce_scatter_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_SCATTER_INIT, + mpi_reduce_scatter_init, + mpi_reduce_scatter_init_, + mpi_reduce_scatter_init__, + ompi_reduce_scatter_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, info, request, ierr) ) #else -#define ompix_reduce_scatter_init_f pompix_reduce_scatter_init_f +#define ompi_reduce_scatter_init_f pompi_reduce_scatter_init_f #endif #endif -void ompix_reduce_scatter_init_f(char *sendbuf, char *recvbuf, - MPI_Fint *recvcounts, MPI_Fint *datatype, - MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, - MPI_Fint *ierr) +void ompi_reduce_scatter_init_f(char *sendbuf, char *recvbuf, + MPI_Fint *recvcounts, MPI_Fint *datatype, + MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, + MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; @@ -94,9 +93,9 @@ void ompix_reduce_scatter_init_f(char *sendbuf, char *recvbuf, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Reduce_scatter_init(sendbuf, recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - c_type, c_op, c_comm, c_info, &c_request); + c_ierr = PMPI_Reduce_scatter_init(sendbuf, recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + c_type, c_op, c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpiext/pcollreq/mpif-h/scan_init_f.c b/ompi/mpi/fortran/mpif-h/scan_init_f.c similarity index 59% rename from ompi/mpiext/pcollreq/mpif-h/scan_init_f.c rename to ompi/mpi/fortran/mpif-h/scan_init_f.c index da0679d5d21..a58d237b942 100644 --- a/ompi/mpiext/pcollreq/mpif-h/scan_init_f.c +++ b/ompi/mpi/fortran/mpif-h/scan_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,54 +23,53 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_SCAN_INIT = ompix_scan_init_f -#pragma weak pmpix_scan_init = ompix_scan_init_f -#pragma weak pmpix_scan_init_ = ompix_scan_init_f -#pragma weak pmpix_scan_init__ = ompix_scan_init_f +#pragma weak PMPI_SCAN_INIT = ompi_scan_init_f +#pragma weak pmpi_scan_init = ompi_scan_init_f +#pragma weak pmpi_scan_init_ = ompi_scan_init_f +#pragma weak pmpi_scan_init__ = ompi_scan_init_f -#pragma weak PMPIX_Scan_init_f = ompix_scan_init_f -#pragma weak PMPIX_Scan_init_f08 = ompix_scan_init_f +#pragma weak PMPI_Scan_init_f = ompi_scan_init_f +#pragma weak PMPI_Scan_init_f08 = ompi_scan_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_SCAN_INIT, - pmpix_scan_init, - pmpix_scan_init_, - pmpix_scan_init__, - pompix_scan_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_SCAN_INIT, + pmpi_scan_init, + pmpi_scan_init_, + pmpi_scan_init__, + pompi_scan_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_SCAN_INIT = ompix_scan_init_f -#pragma weak mpix_scan_init = ompix_scan_init_f -#pragma weak mpix_scan_init_ = ompix_scan_init_f -#pragma weak mpix_scan_init__ = ompix_scan_init_f +#pragma weak MPI_SCAN_INIT = ompi_scan_init_f +#pragma weak mpi_scan_init = ompi_scan_init_f +#pragma weak mpi_scan_init_ = ompi_scan_init_f +#pragma weak mpi_scan_init__ = ompi_scan_init_f -#pragma weak MPIX_Scan_init_f = ompix_scan_init_f -#pragma weak MPIX_Scan_init_f08 = ompix_scan_init_f +#pragma weak MPI_Scan_init_f = ompi_scan_init_f +#pragma weak MPI_Scan_init_f08 = ompi_scan_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_SCAN_INIT, - mpix_scan_init, - mpix_scan_init_, - mpix_scan_init__, - ompix_scan_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_SCAN_INIT, + mpi_scan_init, + mpi_scan_init_, + mpi_scan_init__, + ompi_scan_init_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, info, request, ierr) ) #else -#define ompix_scan_init_f pompix_scan_init_f +#define ompi_scan_init_f pompi_scan_init_f #endif #endif -void ompix_scan_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, - MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, - MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_scan_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, + MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, + MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; @@ -88,7 +87,7 @@ void ompix_scan_init_f(char *sendbuf, char *recvbuf, MPI_Fint *count, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Scan_init(sendbuf, recvbuf, + c_ierr = PMPI_Scan_init(sendbuf, recvbuf, OMPI_FINT_2_INT(*count), c_type, c_op, c_comm, c_info, &c_request); diff --git a/ompi/mpiext/pcollreq/mpif-h/gather_init_f.c b/ompi/mpi/fortran/mpif-h/scatter_init_f.c similarity index 55% rename from ompi/mpiext/pcollreq/mpif-h/gather_init_f.c rename to ompi/mpi/fortran/mpif-h/scatter_init_f.c index 87abd523926..2f6f3e7fdb9 100644 --- a/ompi/mpiext/pcollreq/mpif-h/gather_init_f.c +++ b/ompi/mpi/fortran/mpif-h/scatter_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,77 +23,75 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_GATHER_INIT = ompix_gather_init_f -#pragma weak pmpix_gather_init = ompix_gather_init_f -#pragma weak pmpix_gather_init_ = ompix_gather_init_f -#pragma weak pmpix_gather_init__ = ompix_gather_init_f +#pragma weak PMPI_SCATTER_INIT = ompi_scatter_init_f +#pragma weak pmpi_scatter_init = ompi_scatter_init_f +#pragma weak pmpi_scatter_init_ = ompi_scatter_init_f +#pragma weak pmpi_scatter_init__ = ompi_scatter_init_f -#pragma weak PMPIX_Gather_init_f = ompix_gather_init_f -#pragma weak PMPIX_Gather_init_f08 = ompix_gather_init_f +#pragma weak PMPI_Scatter_init_f = ompi_scatter_init_f +#pragma weak PMPI_Scatter_init_f08 = ompi_scatter_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_GATHER_INIT, - pmpix_gather_init, - pmpix_gather_init_, - pmpix_gather_init__, - pompix_gather_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_SCATTER_INIT, + pmpi_scatter_init, + pmpi_scatter_init_, + pmpi_scatter_init__, + pompi_scatter_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_GATHER_INIT = ompix_gather_init_f -#pragma weak mpix_gather_init = ompix_gather_init_f -#pragma weak mpix_gather_init_ = ompix_gather_init_f -#pragma weak mpix_gather_init__ = ompix_gather_init_f +#pragma weak MPI_SCATTER_INIT = ompi_scatter_init_f +#pragma weak mpi_scatter_init = ompi_scatter_init_f +#pragma weak mpi_scatter_init_ = ompi_scatter_init_f +#pragma weak mpi_scatter_init__ = ompi_scatter_init_f -#pragma weak MPIX_Gather_init_f = ompix_gather_init_f -#pragma weak MPIX_Gather_init_f08 = ompix_gather_init_f +#pragma weak MPI_Scatter_init_f = ompi_scatter_init_f +#pragma weak MPI_Scatter_init_f08 = ompi_scatter_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_GATHER_INIT, - mpix_gather_init, - mpix_gather_init_, - mpix_gather_init__, - ompix_gather_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_SCATTER_INIT, + mpi_scatter_init, + mpi_scatter_init_, + mpi_scatter_init__, + ompi_scatter_init_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, ierr) ) #else -#define ompix_gather_init_f pompix_gather_init_f +#define ompi_scatter_init_f pompi_scatter_init_f #endif #endif -void ompix_gather_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, - MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, - MPI_Fint *ierr) +void ompi_scatter_init_f(char *sendbuf, MPI_Fint *sendcount, + MPI_Fint *sendtype, char *recvbuf, + MPI_Fint *recvcount, MPI_Fint *recvtype, + MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, + MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; MPI_Info c_info; MPI_Request c_request; + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); - c_comm = PMPI_Comm_f2c(*comm); c_sendtype = PMPI_Type_f2c(*sendtype); c_recvtype = PMPI_Type_f2c(*recvtype); c_info = PMPI_Info_f2c(*info); - sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); + recvbuf = (char *) OMPI_F2C_IN_PLACE(recvbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Gather_init(sendbuf, OMPI_FINT_2_INT(*sendcount), + c_ierr = PMPI_Scatter_init(sendbuf,OMPI_FINT_2_INT(*sendcount), c_sendtype, recvbuf, OMPI_FINT_2_INT(*recvcount), c_recvtype, - OMPI_FINT_2_INT(*root), - c_comm, c_info, &c_request); + OMPI_FINT_2_INT(*root), c_comm, c_info, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpiext/pcollreq/mpif-h/scatterv_init_f.c b/ompi/mpi/fortran/mpif-h/scatterv_init_f.c similarity index 62% rename from ompi/mpiext/pcollreq/mpif-h/scatterv_init_f.c rename to ompi/mpi/fortran/mpif-h/scatterv_init_f.c index 84c535fac98..c670554b24e 100644 --- a/ompi/mpiext/pcollreq/mpif-h/scatterv_init_f.c +++ b/ompi/mpi/fortran/mpif-h/scatterv_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,56 +23,55 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_SCATTERV_INIT = ompix_scatterv_init_f -#pragma weak pmpix_scatterv_init = ompix_scatterv_init_f -#pragma weak pmpix_scatterv_init_ = ompix_scatterv_init_f -#pragma weak pmpix_scatterv_init__ = ompix_scatterv_init_f +#pragma weak PMPI_SCATTERV_INIT = ompi_scatterv_init_f +#pragma weak pmpi_scatterv_init = ompi_scatterv_init_f +#pragma weak pmpi_scatterv_init_ = ompi_scatterv_init_f +#pragma weak pmpi_scatterv_init__ = ompi_scatterv_init_f -#pragma weak PMPIX_Scatterv_init_f = ompix_scatterv_init_f -#pragma weak PMPIX_Scatterv_init_f08 = ompix_scatterv_init_f +#pragma weak PMPI_Scatterv_init_f = ompi_scatterv_init_f +#pragma weak PMPI_Scatterv_init_f08 = ompi_scatterv_init_f #else -OMPI_GENERATE_F77_BINDINGS (PMPIX_SCATTERV_INIT, - pmpix_scatterv_init, - pmpix_scatterv_init_, - pmpix_scatterv_init__, - pompix_scatterv_init_f, +OMPI_GENERATE_F77_BINDINGS (PMPI_SCATTERV_INIT, + pmpi_scatterv_init, + pmpi_scatterv_init_, + pmpi_scatterv_init__, + pompi_scatterv_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, ierr) ) #endif #endif #if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_SCATTERV_INIT = ompix_scatterv_init_f -#pragma weak mpix_scatterv_init = ompix_scatterv_init_f -#pragma weak mpix_scatterv_init_ = ompix_scatterv_init_f -#pragma weak mpix_scatterv_init__ = ompix_scatterv_init_f +#pragma weak MPI_SCATTERV_INIT = ompi_scatterv_init_f +#pragma weak mpi_scatterv_init = ompi_scatterv_init_f +#pragma weak mpi_scatterv_init_ = ompi_scatterv_init_f +#pragma weak mpi_scatterv_init__ = ompi_scatterv_init_f -#pragma weak MPIX_Scatterv_init_f = ompix_scatterv_init_f -#pragma weak MPIX_Scatterv_init_f08 = ompix_scatterv_init_f +#pragma weak MPI_Scatterv_init_f = ompi_scatterv_init_f +#pragma weak MPI_Scatterv_init_f08 = ompi_scatterv_init_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_SCATTERV_INIT, - mpix_scatterv_init, - mpix_scatterv_init_, - mpix_scatterv_init__, - ompix_scatterv_init_f, +OMPI_GENERATE_F77_BINDINGS (MPI_SCATTERV_INIT, + mpi_scatterv_init, + mpi_scatterv_init_, + mpi_scatterv_init__, + ompi_scatterv_init_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, ierr) ) #else -#define ompix_scatterv_init_f pompix_scatterv_init_f +#define ompi_scatterv_init_f pompi_scatterv_init_f #endif #endif -void ompix_scatterv_init_f(char *sendbuf, MPI_Fint *sendcounts, - MPI_Fint *displs, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcount, - MPI_Fint *recvtype, MPI_Fint *root, - MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) +void ompi_scatterv_init_f(char *sendbuf, MPI_Fint *sendcounts, + MPI_Fint *displs, MPI_Fint *sendtype, + char *recvbuf, MPI_Fint *recvcount, + MPI_Fint *recvtype, MPI_Fint *root, + MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; @@ -95,7 +94,7 @@ void ompix_scatterv_init_f(char *sendbuf, MPI_Fint *sendcounts, recvbuf = (char *) OMPI_F2C_IN_PLACE(recvbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = PMPIX_Scatterv_init(sendbuf, + c_ierr = PMPI_Scatterv_init(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), OMPI_ARRAY_NAME_CONVERT(displs), c_sendtype, recvbuf, diff --git a/ompi/mpi/fortran/mpif-h/session_finalize_f.c b/ompi/mpi/fortran/mpif-h/session_finalize_f.c new file mode 100644 index 00000000000..57c26cf9557 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_finalize_f.c @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_FINALIZE = ompi_session_finalize_f +#pragma weak pmpi_session_finalize = ompi_session_finalize_f +#pragma weak pmpi_session_finalize_ = ompi_session_finalize_f +#pragma weak pmpi_session_finalize__ = ompi_session_finalize_f + +#pragma weak PMPI_Session_finalize_f = ompi_session_finalize_f +#pragma weak PMPI_Session_finalize_f08 = ompi_session_finalize_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_FINALIZE, + pmpi_session_finalize, + pmpi_session_finalize_, + pmpi_session_finalize__, + pompi_session_finalize_f, + (MPI_Fint *session, MPI_Fint *ierr), + (session, ierr) ) +#endif +#endif + + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_FINALIZE = ompi_session_finalize_f +#pragma weak mpi_session_finalize = ompi_session_finalize_f +#pragma weak mpi_session_finalize_ = ompi_session_finalize_f +#pragma weak mpi_session_finalize__ = ompi_session_finalize_f + +#pragma weak MPI_Session_finalize_f = ompi_session_finalize_f +#pragma weak MPI_Session_finalize_f08 = ompi_session_finalize_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_FINALIZE, + mpi_session_finalize, + mpi_session_finalize_, + mpi_session_finalize__, + ompi_session_finalize_f, + (MPI_Fint *session, MPI_Fint *ierr), + (session, ierr) ) +#else +#define ompi_session_finalize_f pompi_session_finalize_f +#endif +#endif + +void ompi_session_finalize_f(MPI_Fint *session, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + + c_session = PMPI_Session_f2c(*session); + + c_ierr = PMPI_Session_finalize(&c_session); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); +} diff --git a/ompi/mpi/fortran/mpif-h/session_get_info_f.c b/ompi/mpi/fortran/mpif-h/session_get_info_f.c new file mode 100644 index 00000000000..c9da5b16ff0 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_get_info_f.c @@ -0,0 +1,88 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_GET_INFO = ompi_session_get_info_f +#pragma weak pmpi_session_get_info = ompi_session_get_info_f +#pragma weak pmpi_session_get_info_ = ompi_session_get_info_f +#pragma weak pmpi_session_get_info__ = ompi_session_get_info_f + +#pragma weak PMPI_Session_get_info_f = ompi_session_get_info_f +#pragma weak PMPI_Session_get_info_f08 = ompi_session_get_info_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_GET_INFO, + pmpi_session_get_info, + pmpi_session_get_info_, + pmpi_session_get_info__, + pmpi_session_get_info_f, + (MPI_Fint *session, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_GET_INFO = ompi_session_get_info_f +#pragma weak mpi_session_get_info = ompi_session_get_info_f +#pragma weak mpi_session_get_info_ = ompi_session_get_info_f +#pragma weak mpi_session_get_info__ = ompi_session_get_info_f + +#pragma weak MPI_Session_get_info_f = ompi_session_get_info_f +#pragma weak MPI_Session_get_info_f08 = ompi_session_get_info_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_GET_INFO, + mpi_session_get_info, + mpi_session_get_info_, + mpi_session_get_info__, + ompi_session_get_info_f, + (MPI_Fint *session, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#else +#define ompi_session_get_info_f pompi_session_get_info_f +#endif +#endif + +void ompi_session_get_info_f(MPI_Fint *session, MPI_Fint *info, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + MPI_Info c_info; + + c_session = PMPI_Session_f2c(*session); + + c_ierr = PMPI_Session_get_info(c_session, &c_info); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *info = PMPI_Info_c2f(c_info); + } +} diff --git a/ompi/mpi/fortran/mpif-h/session_get_nth_pset_f.c b/ompi/mpi/fortran/mpif-h/session_get_nth_pset_f.c new file mode 100644 index 00000000000..4b2d0aa180b --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_get_nth_pset_f.c @@ -0,0 +1,103 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_GET_NTH_PSET = ompi_session_get_nth_pset_f +#pragma weak pmpi_session_get_nth_pset = ompi_session_get_nth_pset_f +#pragma weak pmpi_session_get_nth_pset_ = ompi_session_get_nth_pset_f +#pragma weak pmpi_session_get_nth_pset__ = ompi_session_get_nth_pset_f + +#pragma weak PMPI_Session_get_nth_pset_f = ompi_session_get_nth_pset_f +#pragma weak PMPI_Session_get_nth_pset_f08 = ompi_session_get_nth_pset_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_GET_NTH_PSET, + pmpi_session_get_nth_pset, + pmpi_session_get_nth_pset_, + pmpi_session_get_nth_pset__, + pmpi_session_get_nth_pset_f, + (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_GET_NTH_PSET = ompi_session_get_nth_pset_f +#pragma weak mpi_session_get_nth_pset = ompi_session_get_nth_pset_f +#pragma weak mpi_session_get_nth_pset_ = ompi_session_get_nth_pset_f +#pragma weak mpi_session_get_nth_pset__ = ompi_session_get_nth_pset_f + +#pragma weak MPI_Session_get_nth_pset_f = ompi_session_get_nth_pset_f +#pragma weak MPI_Session_get_nth_pset_f08 = ompi_session_get_nth_pset_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_GET_NTH_PSET, + mpi_session_get_nth_pset, + mpi_session_get_nth_pset_, + mpi_session_get_nth_pset__, + ompi_session_get_nth_pset_f, + (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#else +#define ompi_session_get_nth_pset_f pompi_session_get_nth_pset_f +#endif +#endif + +void ompi_session_get_nth_pset_f(MPI_Fint *session, MPI_Fint *info, MPI_Fint *n, MPI_Fint *pset_len, char *pset_name, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + char c_name[MPI_MAX_OBJECT_NAME]; + + c_session = PMPI_Session_f2c(*session); + + if (0 == *pset_len) { + c_ierr = PMPI_Session_get_nth_pset(c_session, MPI_INFO_NULL, *n, + OMPI_SINGLE_NAME_CONVERT(pset_len), + c_name); + if (MPI_SUCCESS == c_ierr) { + OMPI_SINGLE_INT_2_FINT(pset_len); + } + + } else { + c_ierr = PMPI_Session_get_nth_pset(c_session, MPI_INFO_NULL, *n, + OMPI_SINGLE_NAME_CONVERT(pset_len), + c_name); + if (MPI_SUCCESS == c_ierr) { + ompi_fortran_string_c2f(c_name, pset_name, *pset_len); + } + } + + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + +} diff --git a/ompi/mpi/fortran/mpif-h/session_get_num_psets_f.c b/ompi/mpi/fortran/mpif-h/session_get_num_psets_f.c new file mode 100644 index 00000000000..039b86b8686 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_get_num_psets_f.c @@ -0,0 +1,87 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_GET_NUM_PSETS = ompi_session_get_num_psets_f +#pragma weak pmpi_session_get_num_psets = ompi_session_get_num_psets_f +#pragma weak pmpi_session_get_num_psets_ = ompi_session_get_num_psets_f +#pragma weak pmpi_session_get_num_psets__ = ompi_session_get_num_psets_f + +#pragma weak PMPI_Session_get_num_psets_f = ompi_session_get_num_psets_f +#pragma weak PMPI_Session_get_num_psets_f08 = ompi_session_get_num_psets_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_GET_NUM_PSETS, + pmpi_session_get_num_psets, + pmpi_session_get_num_psets_, + pmpi_session_get_num_psets__, + pmpi_session_get_num_psets_f, + (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_GET_NUM_PSETS = ompi_session_get_num_psets_f +#pragma weak mpi_session_get_num_psets = ompi_session_get_num_psets_f +#pragma weak mpi_session_get_num_psets_ = ompi_session_get_num_psets_f +#pragma weak mpi_session_get_num_psets__ = ompi_session_get_num_psets_f + +#pragma weak MPI_Session_get_num_psets_f = ompi_session_get_num_psets_f +#pragma weak MPI_Session_get_num_psets_f08 = ompi_session_get_num_psets_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_GET_NUM_PSETS, + mpi_session_get_num_psets, + mpi_session_get_num_psets_, + mpi_session_get_num_psets__, + ompi_session_get_num_psets_f, + (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#else +#define ompi_session_get_num_psets_f pompi_session_get_num_psets_f +#endif +#endif + +void ompi_session_get_num_psets_f(MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + OMPI_SINGLE_NAME_DECL(npset_names); + + c_session = PMPI_Session_f2c(*session); + + c_ierr = PMPI_Session_get_num_psets(c_session, MPI_INFO_NULL, OMPI_SINGLE_NAME_CONVERT(npset_names)); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + OMPI_SINGLE_INT_2_FINT(npset_names); + } +} diff --git a/ompi/mpi/fortran/mpif-h/session_get_pset_info_f.c b/ompi/mpi/fortran/mpif-h/session_get_pset_info_f.c new file mode 100644 index 00000000000..a8b7b7e3052 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_get_pset_info_f.c @@ -0,0 +1,104 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" +#include "ompi/instance/instance.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_GET_PSET_INFO = ompi_session_get_pset_info_f +#pragma weak pmpi_session_get_pset_info = ompi_session_get_pset_info_f +#pragma weak pmpi_session_get_pset_info_ = ompi_session_get_pset_info_f +#pragma weak pmpi_session_get_pset_info__ = ompi_session_get_pset_info_f + +#pragma weak PMPI_Session_get_pset_info_f = ompi_session_get_pset_info_f +#pragma weak PMPI_Session_get_pset_info_f08 = ompi_session_get_pset_info_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_GET_PSET_INFO, + pmpi_session_get_pset_info, + pmpi_session_get_pset_info_, + pmpi_session_get_pset_info__, + pmpi_session_get_pset_info_f, + (MPI_Fint *session, char *pset_name, MPI_Fint *info, MPI_Fint *ierr, int name_len), + (session, pset_name, info, ierr, name_len) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_GET_PSET_INFO = ompi_session_get_pset_info_f +#pragma weak mpi_session_get_pset_info = ompi_session_get_pset_info_f +#pragma weak mpi_session_get_pset_info_ = ompi_session_get_pset_info_f +#pragma weak mpi_session_get_pset_info__ = ompi_session_get_pset_info_f + +#pragma weak MPI_Session_get_pset_info_f = ompi_session_get_pset_info_f +#pragma weak MPI_Session_get_pset_info_f08 = ompi_session_get_pset_info_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_GET_PSET_INFO, + mpi_session_get_pset_info, + mpi_session_get_pset_info_, + mpi_session_get_pset_info__, + ompi_session_get_pset_info_f, + (MPI_Fint *session, char *pset_name, MPI_Fint *info, MPI_Fint *ierr, int name_len), + (session, pset_name, info, ierr, name_len) ) +#else +#define ompi_session_get_pset_info_f pompi_session_get_pset_info_f +#endif +#endif + +void ompi_session_get_pset_info_f(MPI_Fint *session,char *pset_name, MPI_Fint *info, MPI_Fint *ierr, int name_len) +{ + int c_ierr, ret; + MPI_Session c_session; + char *c_name; + MPI_Info c_info; + + c_session = PMPI_Session_f2c(*session); + + /* Convert the fortran string */ + + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(pset_name, name_len, + &c_name))) { + c_ierr = OMPI_ERRHANDLER_INVOKE((ompi_instance_t *)c_session, ret, + "MPI_SESSION_GET_PSET_INFO"); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + return; + } + + c_ierr = PMPI_Session_get_pset_info(c_session, c_name, &c_info); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *info = PMPI_Info_c2f(c_info); + } +} + + diff --git a/ompi/mpi/fortran/mpif-h/session_init_f.c b/ompi/mpi/fortran/mpif-h/session_init_f.c new file mode 100644 index 00000000000..b36a324f09d --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_init_f.c @@ -0,0 +1,89 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_INIT = ompi_session_init_f +#pragma weak pmpi_session_init = ompi_session_init_f +#pragma weak pmpi_session_init_ = ompi_session_init_f +#pragma weak pmpi_session_init__ = ompi_session_init_f + +#pragma weak PMPI_Session_init_f = ompi_session_init_f +#pragma weak PMPI_Session_init_f08 = ompi_session_init_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_INIT, + pmpi_session_init, + pmpi_session_init_, + pmpi_session_init__, + pompi_session_init_f, + (MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *session, MPI_Fint *ierr), + (info, errhandler, session, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_INIT = ompi_session_init_f +#pragma weak mpi_session_init = ompi_session_init_f +#pragma weak mpi_session_init_ = ompi_session_init_f +#pragma weak mpi_session_init__ = ompi_session_init_f + +#pragma weak MPI_Session_init_f = ompi_session_init_f +#pragma weak MPI_Session_init_f08 = ompi_session_init_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_INIT, + mpi_session_init, + mpi_session_init_, + mpi_session_init__, + ompi_session_init_f, + (MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *session, MPI_Fint *ierr), + (info, errhandler, session, ierr) ) +#else +#define ompi_session_init_f pompi_session_init_f +#endif +#endif + +void ompi_session_init_f(MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *session, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + MPI_Info c_info; + MPI_Errhandler c_errhandler; + + c_info = PMPI_Info_f2c(*info); + c_errhandler = PMPI_Errhandler_f2c(*errhandler); + + c_ierr = PMPI_Session_init(c_info, c_errhandler, &c_session); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *session = PMPI_Session_c2f(c_session); + } +} diff --git a/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c b/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c index 30f365ea549..22074a8d93f 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c @@ -34,13 +34,13 @@ #pragma weak PMPI_Type_create_keyval_f = ompi_type_create_keyval_f #pragma weak PMPI_Type_create_keyval_f08 = ompi_type_create_keyval_f #else -OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_KEYVAL, - pmpi_type_create_keyval, - pmpi_type_create_keyval_, - pmpi_type_create_keyval__, +OMPI_GENERATE_F77_BINDINGS(PMPI_TYPE_CREATE_KEYVAL, pmpi_type_create_keyval, + pmpi_type_create_keyval_, pmpi_type_create_keyval__, pompi_type_create_keyval_f, - (ompi_aint_copy_attr_function* type_copy_attr_fn, ompi_aint_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), - (type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierr) ) + (ompi_aint_copy_attr_function type_copy_attr_fn, + ompi_aint_delete_attr_function type_delete_attr_fn, + MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierr)) #endif #endif @@ -54,13 +54,12 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_KEYVAL, #pragma weak MPI_Type_create_keyval_f08 = ompi_type_create_keyval_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_KEYVAL, - mpi_type_create_keyval, - mpi_type_create_keyval_, - mpi_type_create_keyval__, - ompi_type_create_keyval_f, - (ompi_aint_copy_attr_function* type_copy_attr_fn, ompi_aint_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), - (type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierr) ) +OMPI_GENERATE_F77_BINDINGS(MPI_TYPE_CREATE_KEYVAL, mpi_type_create_keyval, mpi_type_create_keyval_, + mpi_type_create_keyval__, ompi_type_create_keyval_f, + (ompi_aint_copy_attr_function type_copy_attr_fn, + ompi_aint_delete_attr_function type_delete_attr_fn, + MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierr)) #else #define ompi_type_create_keyval_f pompi_type_create_keyval_f #endif @@ -68,8 +67,8 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_KEYVAL, static char FUNC_NAME[] = "MPI_Type_create_keyval_f"; -void ompi_type_create_keyval_f(ompi_aint_copy_attr_function* type_copy_attr_fn, - ompi_aint_delete_attr_function* type_delete_attr_fn, +void ompi_type_create_keyval_f(ompi_aint_copy_attr_function type_copy_attr_fn, + ompi_aint_delete_attr_function type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr) { int ret, c_ierr; diff --git a/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c b/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c index aae4adc3bd7..c46bdcb9bc3 100644 --- a/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c @@ -73,8 +73,9 @@ static const char FUNC_NAME[] = "MPI_WIN_CREATE_ERRHANDLER"; void ompi_win_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr) { - MPI_Errhandler c_errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_WIN, + MPI_Errhandler c_errhandler; + + c_errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_WIN, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_FORTRAN); if (MPI_ERRHANDLER_NULL != c_errhandler) { diff --git a/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c b/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c index aefb8eb0641..8ce9842806a 100644 --- a/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c @@ -34,13 +34,12 @@ #pragma weak PMPI_Win_create_keyval_f = ompi_win_create_keyval_f #pragma weak PMPI_Win_create_keyval_f08 = ompi_win_create_keyval_f #else -OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_KEYVAL, - pmpi_win_create_keyval, - pmpi_win_create_keyval_, - pmpi_win_create_keyval__, - pompi_win_create_keyval_f, - (ompi_aint_copy_attr_function* win_copy_attr_fn, ompi_aint_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), - (win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierr) ) +OMPI_GENERATE_F77_BINDINGS(PMPI_WIN_CREATE_KEYVAL, pmpi_win_create_keyval, pmpi_win_create_keyval_, + pmpi_win_create_keyval__, pompi_win_create_keyval_f, + (ompi_aint_copy_attr_function win_copy_attr_fn, + ompi_aint_delete_attr_function win_delete_attr_fn, MPI_Fint *win_keyval, + MPI_Aint *extra_state, MPI_Fint *ierr), + (win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierr)) #endif #endif @@ -54,13 +53,12 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_KEYVAL, #pragma weak MPI_Win_create_keyval_f08 = ompi_win_create_keyval_f #else #if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_KEYVAL, - mpi_win_create_keyval, - mpi_win_create_keyval_, - mpi_win_create_keyval__, - ompi_win_create_keyval_f, - (ompi_aint_copy_attr_function* win_copy_attr_fn, ompi_aint_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), - (win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierr) ) +OMPI_GENERATE_F77_BINDINGS(MPI_WIN_CREATE_KEYVAL, mpi_win_create_keyval, mpi_win_create_keyval_, + mpi_win_create_keyval__, ompi_win_create_keyval_f, + (ompi_aint_copy_attr_function win_copy_attr_fn, + ompi_aint_delete_attr_function win_delete_attr_fn, MPI_Fint *win_keyval, + MPI_Aint *extra_state, MPI_Fint *ierr), + (win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierr)) #else #define ompi_win_create_keyval_f pompi_win_create_keyval_f #endif @@ -68,9 +66,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_KEYVAL, static char FUNC_NAME[] = "MPI_Win_create_keyval"; -void ompi_win_create_keyval_f(ompi_aint_copy_attr_function* win_copy_attr_fn, - ompi_aint_delete_attr_function* win_delete_attr_fn, - MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr) +void ompi_win_create_keyval_f(ompi_aint_copy_attr_function win_copy_attr_fn, + ompi_aint_delete_attr_function win_delete_attr_fn, + MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr) { int ret, c_ierr; OMPI_SINGLE_NAME_DECL(win_keyval); diff --git a/ompi/mpi/fortran/use-mpi-f08/Makefile.am b/ompi/mpi/fortran/use-mpi-f08/Makefile.am index 08e316c86b9..95f449ffc34 100644 --- a/ompi/mpi/fortran/use-mpi-f08/Makefile.am +++ b/ompi/mpi/fortran/use-mpi-f08/Makefile.am @@ -118,14 +118,22 @@ mpi_api_files = \ aint_add_f08.F90 \ aint_diff_f08.F90 \ allgather_f08.F90 \ + allgather_init_f08.F90 \ allgatherv_f08.F90 \ + allgatherv_init_f08.F90 \ alloc_mem_f08.F90 \ allreduce_f08.F90 \ + allreduce_init_f08.F90 \ alltoall_f08.F90 \ + alltoall_init_f08.F90 \ alltoallv_f08.F90 \ + alltoallv_init_f08.F90 \ alltoallw_f08.F90 \ + alltoallw_init_f08.F90 \ barrier_f08.F90 \ + barrier_init_f08.F90 \ bcast_f08.F90 \ + bcast_init_f08.F90 \ bsend_f08.F90 \ bsend_init_f08.F90 \ buffer_attach_f08.F90 \ @@ -146,6 +154,7 @@ mpi_api_files = \ comm_connect_f08.F90 \ comm_create_errhandler_f08.F90 \ comm_create_f08.F90 \ + comm_create_from_group_f08.F90 \ comm_create_group_f08.F90 \ comm_create_keyval_f08.F90 \ comm_delete_attr_f08.F90 \ @@ -153,6 +162,7 @@ mpi_api_files = \ comm_dup_f08.F90 \ comm_dup_with_info_f08.F90 \ comm_idup_f08.F90 \ + comm_idup_with_info_f08.F90 \ comm_free_f08.F90 \ comm_free_keyval_f08.F90 \ comm_get_attr_f08.F90 \ @@ -185,6 +195,7 @@ mpi_api_files = \ error_class_f08.F90 \ error_string_f08.F90 \ exscan_f08.F90 \ + exscan_init_f08.F90 \ f_sync_reg_f08.F90 \ fetch_and_op_f08.F90 \ file_call_errhandler_f08.F90 \ @@ -250,7 +261,9 @@ mpi_api_files = \ finalize_f08.F90 \ free_mem_f08.F90 \ gather_f08.F90 \ + gather_init_f08.F90 \ gatherv_f08.F90 \ + gatherv_init_f08.F90 \ get_accumulate_f08.F90 \ get_address_f08.F90 \ get_count_f08.F90 \ @@ -272,6 +285,7 @@ mpi_api_files = \ group_difference_f08.F90 \ group_excl_f08.F90 \ group_free_f08.F90 \ + group_from_session_pset_f08.F90 \ group_incl_f08.F90 \ group_intersection_f08.F90 \ group_range_excl_f08.F90 \ @@ -307,11 +321,13 @@ mpi_api_files = \ info_get_nkeys_f08.F90 \ info_get_nthkey_f08.F90 \ info_get_valuelen_f08.F90 \ + info_get_string_f08.F90 \ info_set_f08.F90 \ init_f08.F90 \ initialized_f08.F90 \ init_thread_f08.F90 \ intercomm_create_f08.F90 \ + intercomm_create_from_groups_f08.F90 \ intercomm_merge_f08.F90 \ iprobe_f08.F90 \ irecv_f08.F90 \ @@ -323,16 +339,23 @@ mpi_api_files = \ iscatter_f08.F90 \ iscatterv_f08.F90 \ isend_f08.F90 \ + isendrecv_f08.F90 \ + isendrecv_replace_f08.F90 \ issend_f08.F90 \ is_thread_main_f08.F90 \ lookup_name_f08.F90 \ mprobe_f08.F90 \ mrecv_f08.F90 \ neighbor_allgather_f08.F90 \ + neighbor_allgather_init_f08.F90 \ neighbor_allgatherv_f08.F90 \ + neighbor_allgatherv_init_f08.F90 \ neighbor_alltoall_f08.F90 \ + neighbor_alltoall_init_f08.F90 \ neighbor_alltoallv_f08.F90 \ + neighbor_alltoallv_init_f08.F90 \ neighbor_alltoallw_f08.F90 \ + neighbor_alltoallw_init_f08.F90 \ op_commutative_f08.F90 \ op_create_f08.F90 \ open_port_f08.F90 \ @@ -356,9 +379,12 @@ mpi_api_files = \ recv_f08.F90 \ recv_init_f08.F90 \ reduce_f08.F90 \ + reduce_init_f08.F90 \ reduce_local_f08.F90 \ reduce_scatter_f08.F90 \ + reduce_scatter_init_f08.F90 \ reduce_scatter_block_f08.F90 \ + reduce_scatter_block_init_f08.F90 \ register_datarep_f08.F90 \ request_free_f08.F90 \ request_get_status_f08.F90 \ @@ -368,12 +394,21 @@ mpi_api_files = \ rsend_f08.F90 \ rsend_init_f08.F90 \ scan_f08.F90 \ + scan_init_f08.F90 \ scatter_f08.F90 \ + scatter_init_f08.F90 \ scatterv_f08.F90 \ + scatterv_init_f08.F90 \ send_f08.F90 \ send_init_f08.F90 \ sendrecv_f08.F90 \ sendrecv_replace_f08.F90 \ + session_get_info_f08.F90 \ + session_get_nth_pset_f08.F90 \ + session_get_num_psets_f08.F90 \ + session_get_pset_info_f08.F90 \ + session_init_f08.F90 \ + session_finalize_f08.F90 \ ssend_f08.F90 \ ssend_init_f08.F90 \ startall_f08.F90 \ diff --git a/ompi/mpi/fortran/use-mpi-f08/accumulate_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/accumulate_f08.F90 index 4eacb2b48a8..cf5f9ada671 100644 --- a/ompi/mpi/fortran/use-mpi-f08/accumulate_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/accumulate_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -17,7 +17,7 @@ subroutine MPI_Accumulate_f08(origin_addr,origin_count,origin_datatype,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Win, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_accumulate_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/allgather_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/allgather_init_f08.F90 similarity index 61% rename from ompi/mpiext/pcollreq/use-mpi-f08/allgather_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/allgather_init_f08.F90 index f31cd539644..8fe93a449d0 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/allgather_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/allgather_init_f08.F90 @@ -1,21 +1,23 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_allgather_init_f + use :: ompi_mpifh_bindings, only : ompi_allgather_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype @@ -25,8 +27,8 @@ subroutine MPIX_Allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount, INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_allgather_init_f(sendbuf,sendcount,sendtype%MPI_VAL,& + call ompi_allgather_init_f(sendbuf,sendcount,sendtype%MPI_VAL,& recvbuf,recvcount,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Allgather_init_f08 +end subroutine MPI_Allgather_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/allgatherv_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/allgatherv_init_f08.F90 similarity index 60% rename from ompi/mpiext/pcollreq/use-mpi-f08/allgatherv_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/allgatherv_init_f08.F90 index db2f5eeafaf..97f1b223350 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/allgatherv_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/allgatherv_init_f08.F90 @@ -1,24 +1,26 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& +#include "mpi-f08-rename.h" + +subroutine MPI_Allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& displs,recvtype,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_allgatherv_init_f + use :: ompi_mpifh_bindings, only : ompi_allgatherv_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -27,8 +29,8 @@ subroutine MPIX_Allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_allgatherv_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcounts,& + call ompi_allgatherv_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcounts,& displs,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Allgatherv_init_f08 +end subroutine MPI_Allgatherv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/allreduce_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/allreduce_init_f08.F90 similarity index 61% rename from ompi/mpiext/pcollreq/use-mpi-f08/allreduce_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/allreduce_init_f08.F90 index d31e21f88d0..85b67cac49c 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/allreduce_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/allreduce_init_f08.F90 @@ -1,21 +1,23 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Allreduce_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Allreduce_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_allreduce_init_f + use :: ompi_mpifh_bindings, only : ompi_allreduce_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -25,8 +27,8 @@ subroutine MPIX_Allreduce_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,r INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_allreduce_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& + call ompi_allreduce_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Allreduce_init_f08 +end subroutine MPI_Allreduce_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/alltoall_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/alltoall_init_f08.F90 similarity index 65% rename from ompi/mpiext/pcollreq/use-mpi-f08/alltoall_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/alltoall_init_f08.F90 index 59728df95c7..513ef505d00 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/alltoall_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/alltoall_init_f08.F90 @@ -1,22 +1,24 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,& +#include "mpi-f08-rename.h" + +subroutine MPI_Alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,& recvcount,recvtype,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_alltoall_init_f + use :: ompi_mpifh_bindings, only : ompi_alltoall_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype @@ -26,8 +28,8 @@ subroutine MPIX_Alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,& INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_alltoall_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,& + call ompi_alltoall_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,& recvcount,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Alltoall_init_f08 +end subroutine MPI_Alltoall_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/alltoallv_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/alltoallv_init_f08.F90 similarity index 60% rename from ompi/mpiext/pcollreq/use-mpi-f08/alltoallv_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/alltoallv_init_f08.F90 index 133536c59ca..49bf9e97d01 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/alltoallv_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/alltoallv_init_f08.F90 @@ -1,23 +1,25 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,& +#include "mpi-f08-rename.h" + +subroutine MPI_Alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,& recvcounts,rdispls,recvtype,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_alltoallv_init_f + use :: ompi_mpifh_bindings, only : ompi_alltoallv_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -26,8 +28,8 @@ subroutine MPIX_Alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,& INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype%MPI_VAL,& + call ompi_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype%MPI_VAL,& recvbuf,recvcounts,rdispls,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Alltoallv_init_f08 +end subroutine MPI_Alltoallv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/alltoallw_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/alltoallw_init_f08.F90 similarity index 68% rename from ompi/mpiext/pcollreq/use-mpi-f08/alltoallw_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/alltoallw_init_f08.F90 index d1b91a5788c..ce2dbd88532 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/alltoallw_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/alltoallw_init_f08.F90 @@ -1,24 +1,26 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Alltoallw_init_f08(sendbuf,sendcounts,sdispls,sendtypes,& +#include "mpi-f08-rename.h" + +subroutine MPI_Alltoallw_init_f08(sendbuf,sendcounts,sdispls,sendtypes,& recvbuf,recvcounts,rdispls,recvtypes,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_alltoallw_init_f + use :: ompi_mpifh_bindings, only : ompi_alltoallw_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: sendtypes(*), recvtypes(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + TYPE(MPI_Datatype), INTENT(IN) OMPI_ASYNCHRONOUS :: sendtypes(*), recvtypes(*) TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Info), INTENT(IN) :: info TYPE(MPI_Request), INTENT(OUT) :: request @@ -35,8 +37,8 @@ subroutine MPIX_Alltoallw_init_f08(sendbuf,sendcounts,sdispls,sendtypes,& ! as passing the address to an array of integers. To be clear: the ! back-end ompi_alltoallw_f is expecting a pointer to an array of ! integers. So it all works out (but is a hack :-\ ). - call ompix_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes(1)%MPI_VAL,& + call ompi_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes(1)%MPI_VAL,& recvbuf,recvcounts,rdispls,recvtypes(1)%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Alltoallw_init_f08 +end subroutine MPI_Alltoallw_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/barrier_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/barrier_init_f08.F90 similarity index 64% rename from ompi/mpiext/pcollreq/use-mpi-f08/barrier_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/barrier_init_f08.F90 index 538e6225125..03b40a41292 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/barrier_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/barrier_init_f08.F90 @@ -3,13 +3,15 @@ ! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ -subroutine MPIX_Barrier_init_f08(comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Barrier_init_f08(comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_barrier_init_f + use :: ompi_mpifh_bindings, only : ompi_barrier_init_f implicit none TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Info), INTENT(IN) :: info @@ -17,7 +19,7 @@ subroutine MPIX_Barrier_init_f08(comm,info,request,ierror) INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_barrier_init_f(comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) + call ompi_barrier_init_f(comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Barrier_init_f08 +end subroutine MPI_Barrier_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/bcast_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/bcast_init_f08.F90 similarity index 59% rename from ompi/mpiext/pcollreq/use-mpi-f08/bcast_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/bcast_init_f08.F90 index 0a2b2bd9360..997d28263e3 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/bcast_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/bcast_init_f08.F90 @@ -1,20 +1,22 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Bcast_init_f08(buffer,count,datatype,root,comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Bcast_init_f08(buffer,count,datatype,root,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_bcast_init_f + use :: ompi_mpifh_bindings, only : ompi_bcast_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: buffer + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buffer INTEGER, INTENT(IN) :: count, root TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -23,7 +25,7 @@ subroutine MPIX_Bcast_init_f08(buffer,count,datatype,root,comm,info,request,ierr INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_bcast_init_f(buffer,count,datatype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) + call ompi_bcast_init_f(buffer,count,datatype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Bcast_init_f08 +end subroutine MPI_Bcast_init_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/bindings/mpi-f-interfaces-bind.h b/ompi/mpi/fortran/use-mpi-f08/bindings/mpi-f-interfaces-bind.h index 248fda71852..668ec44e9c8 100644 --- a/ompi/mpi/fortran/use-mpi-f08/bindings/mpi-f-interfaces-bind.h +++ b/ompi/mpi/fortran/use-mpi-f08/bindings/mpi-f-interfaces-bind.h @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2012 The University of Tennessee and The University @@ -9,6 +9,11 @@ ! Copyright (c) 2012 Inria. All rights reserved. ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! Copyright (c) 2021 Bull S.A.S. All rights reserved. +! Copyright (c) 2021 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran @@ -145,6 +150,7 @@ ! MPI_Is_thread_main ! MPI_Op_commutative ! MPI_Op_create +! MPI_Parrived ! MPI_Type_get_attr ! MPI_Win_get_attr ! MPI_Win_test @@ -176,7 +182,7 @@ end subroutine ompi_bsend_f subroutine ompi_bsend_init_f(buf,count,datatype,dest,tag,comm,request,ierror) & BIND(C, name="ompi_bsend_init_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -187,7 +193,7 @@ end subroutine ompi_bsend_init_f subroutine ompi_buffer_attach_f(buffer,size,ierror) & BIND(C, name="ompi_buffer_attach_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buffer + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buffer INTEGER, INTENT(IN) :: size INTEGER, INTENT(OUT) :: ierror end subroutine ompi_buffer_attach_f @@ -224,7 +230,7 @@ end subroutine ompi_get_count_f subroutine ompi_ibsend_f(buf,count,datatype,dest,tag,comm,request,ierror) & BIND(C, name="ompi_ibsend_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -235,7 +241,7 @@ end subroutine ompi_ibsend_f subroutine ompi_irecv_f(buf,count,datatype,source,tag,comm,request,ierror) & BIND(C, name="ompi_irecv_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, source, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -246,7 +252,7 @@ end subroutine ompi_irecv_f subroutine ompi_irsend_f(buf,count,datatype,dest,tag,comm,request,ierror) & BIND(C, name="ompi_irsend_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -257,7 +263,7 @@ end subroutine ompi_irsend_f subroutine ompi_isend_f(buf,count,datatype,dest,tag,comm,request,ierror) & BIND(C, name="ompi_isend_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -265,10 +271,36 @@ subroutine ompi_isend_f(buf,count,datatype,dest,tag,comm,request,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_isend_f +subroutine ompi_isendrecv_f(sendbuf,sendcount,sendtype,dest,sendtag,recvbuf, & + recvcount,recvtype,source,recvtag,comm,request,ierror) & + BIND(C, name="ompi_isendrecv_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf + INTEGER, INTENT(IN) :: sendcount, dest, sendtag, recvcount, source, recvtag + INTEGER, INTENT(IN) :: sendtype + INTEGER, INTENT(IN) :: recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_isendrecv_f + +subroutine ompi_isendrecv_replace_f(buf,count,datatype,dest,sendtag,source, & + recvtag,comm,request,ierror) & + BIND(C, name="ompi_isendrecv_replace_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf + INTEGER, INTENT(IN) :: count, dest, sendtag, source, recvtag + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_isendrecv_replace_f + subroutine ompi_issend_f(buf,count,datatype,dest,tag,comm,request,ierror) & BIND(C, name="ompi_issend_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -276,24 +308,26 @@ subroutine ompi_issend_f(buf,count,datatype,dest,tag,comm,request,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_issend_f -subroutine ompi_psend_init_f(buf,partitions,count,datatype,dest,tag,comm,request,ierror) & +subroutine ompi_psend_init_f(buf,partitions,count,datatype,dest,tag,comm,info,request,ierror) & BIND(C, name="ompi_psend_init_f") implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: partitions, count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info INTEGER, INTENT(OUT) :: request INTEGER, INTENT(OUT) :: ierror end subroutine ompi_psend_init_f -subroutine ompi_precv_init_f(buf,partitions,count,datatype,dest,tag,comm,request,ierror) & +subroutine ompi_precv_init_f(buf,partitions,count,datatype,dest,tag,comm,info,request,ierror) & BIND(C, name="ompi_precv_init_f") implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: partitions, count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info INTEGER, INTENT(OUT) :: request INTEGER, INTENT(OUT) :: ierror end subroutine ompi_precv_init_f @@ -329,7 +363,7 @@ subroutine ompi_probe_f(source,tag,comm,status,ierror) & implicit none INTEGER, INTENT(IN) :: source, tag INTEGER, INTENT(IN) :: comm - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_probe_f @@ -341,14 +375,14 @@ subroutine ompi_recv_f(buf,count,datatype,source,tag,comm,status,ierror) & INTEGER, INTENT(IN) :: count, source, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_recv_f subroutine ompi_recv_init_f(buf,count,datatype,source,tag,comm,request,ierror) & BIND(C, name="ompi_recv_init_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, source, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -376,7 +410,7 @@ end subroutine ompi_rsend_f subroutine ompi_rsend_init_f(buf,count,datatype,dest,tag,comm,request,ierror) & BIND(C, name="ompi_rsend_init_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -405,7 +439,7 @@ subroutine ompi_sendrecv_f(sendbuf,sendcount,sendtype,dest,sendtag,recvbuf, & INTEGER, INTENT(IN) :: sendtype INTEGER, INTENT(IN) :: recvtype INTEGER, INTENT(IN) :: comm - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_sendrecv_f @@ -418,14 +452,14 @@ subroutine ompi_sendrecv_replace_f(buf,count,datatype,dest,sendtag,source, & INTEGER, INTENT(IN) :: count, dest, sendtag, source, recvtag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_sendrecv_replace_f subroutine ompi_send_init_f(buf,count,datatype,dest,tag,comm,request,ierror) & BIND(C, name="ompi_send_init_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -446,7 +480,7 @@ end subroutine ompi_ssend_f subroutine ompi_ssend_init_f(buf,count,datatype,dest,tag,comm,request,ierror) & BIND(C, name="ompi_ssend_init_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -474,7 +508,7 @@ subroutine ompi_wait_f(request,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(INOUT) :: request - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_wait_f @@ -484,7 +518,7 @@ subroutine ompi_waitall_f(count,array_of_requests,array_of_statuses,ierror) & implicit none INTEGER, INTENT(IN) :: count INTEGER, INTENT(INOUT) :: array_of_requests(count) - TYPE(MPI_Status), INTENT(OUT) :: array_of_statuses(count) + TYPE(MPI_Status) :: array_of_statuses(*) INTEGER, INTENT(OUT) :: ierror end subroutine ompi_waitall_f @@ -495,7 +529,7 @@ subroutine ompi_waitany_f(count,array_of_requests,index,status,ierror) & INTEGER, INTENT(IN) :: count INTEGER, INTENT(INOUT) :: array_of_requests(count) INTEGER, INTENT(OUT) :: index - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_waitany_f @@ -508,7 +542,7 @@ subroutine ompi_waitsome_f(incount,array_of_requests,outcount, & INTEGER, INTENT(INOUT) :: array_of_requests(incount) INTEGER, INTENT(OUT) :: outcount INTEGER, INTENT(OUT) :: array_of_indices(*) - TYPE(MPI_Status), INTENT(OUT) :: array_of_statuses(*) + TYPE(MPI_Status) :: array_of_statuses(*) INTEGER, INTENT(OUT) :: ierror end subroutine ompi_waitsome_f @@ -516,7 +550,7 @@ subroutine ompi_get_address_f(location,address,ierror) & BIND(C, name="ompi_get_address_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: location + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: location INTEGER(MPI_ADDRESS_KIND), INTENT(OUT) :: address INTEGER, INTENT(OUT) :: ierror end subroutine ompi_get_address_f @@ -698,7 +732,7 @@ subroutine ompi_type_create_subarray_f(ndims,array_of_sizes, & BIND(C, name="ompi_type_create_subarray_f") implicit none INTEGER, INTENT(IN) :: ndims, order - INTEGER, INTENT(IN) :: array_of_sizes(*), array_of_subsizes(*), array_of_starts(*) + INTEGER, INTENT(IN) :: array_of_sizes(ndims), array_of_subsizes(ndims), array_of_starts(ndims) INTEGER, INTENT(IN) :: oldtype INTEGER, INTENT(OUT) :: newtype INTEGER, INTENT(OUT) :: ierror @@ -859,11 +893,11 @@ subroutine ompi_allgather_f(sendbuf,sendcount,sendtype,recvbuf, & end subroutine ompi_allgather_f subroutine ompi_iallgather_f(sendbuf,sendcount,sendtype,recvbuf, & - recvcount,recvtype,comm,request,ierror) & + recvcount,recvtype,comm,request,ierror) & BIND(C, name="ompi_iallgather_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount INTEGER, INTENT(IN) :: sendtype INTEGER, INTENT(IN) :: recvtype @@ -872,6 +906,21 @@ subroutine ompi_iallgather_f(sendbuf,sendcount,sendtype,recvbuf, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_iallgather_f +subroutine ompi_allgather_init_f(sendbuf,sendcount,sendtype,recvbuf, & + recvcount,recvtype,comm,info,request,ierror) & + BIND(C, name="ompi_allgather_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount + INTEGER, INTENT(IN) :: sendtype + INTEGER, INTENT(IN) :: recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_allgather_init_f + subroutine ompi_allgatherv_f(sendbuf,sendcount,sendtype,recvbuf, & recvcounts,displs,recvtype,comm,ierror) & BIND(C, name="ompi_allgatherv_f") @@ -890,10 +939,10 @@ subroutine ompi_iallgatherv_f(sendbuf,sendcount,sendtype,recvbuf, & recvcounts,displs,recvtype,comm,request,ierror) & BIND(C, name="ompi_iallgatherv_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) INTEGER, INTENT(IN) :: sendtype INTEGER, INTENT(IN) :: recvtype INTEGER, INTENT(IN) :: comm @@ -901,6 +950,22 @@ subroutine ompi_iallgatherv_f(sendbuf,sendcount,sendtype,recvbuf, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_iallgatherv_f +subroutine ompi_allgatherv_init_f(sendbuf,sendcount,sendtype,recvbuf, & + recvcounts,displs,recvtype,comm,info,request,ierror) & + BIND(C, name="ompi_allgatherv_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) :: sendtype + INTEGER, INTENT(IN) :: recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_allgatherv_init_f + subroutine ompi_allreduce_f(sendbuf,recvbuf,count,datatype,op,comm,ierror) & BIND(C, name="ompi_allreduce_f") implicit none @@ -916,8 +981,8 @@ end subroutine ompi_allreduce_f subroutine ompi_iallreduce_f(sendbuf,recvbuf,count,datatype,op,comm,request,ierror) & BIND(C, name="ompi_iallreduce_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: op @@ -926,6 +991,20 @@ subroutine ompi_iallreduce_f(sendbuf,recvbuf,count,datatype,op,comm,request,ierr INTEGER, INTENT(OUT) :: ierror end subroutine ompi_iallreduce_f +subroutine ompi_allreduce_init_f(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) & + BIND(C, name="ompi_allreduce_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(IN) :: op + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_allreduce_init_f + subroutine ompi_alltoall_f(sendbuf,sendcount,sendtype,recvbuf, & recvcount,recvtype,comm,ierror) & BIND(C, name="ompi_alltoall_f") @@ -943,8 +1022,8 @@ subroutine ompi_ialltoall_f(sendbuf,sendcount,sendtype,recvbuf, & recvcount,recvtype,comm,request,ierror) & BIND(C, name="ompi_ialltoall_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount INTEGER, INTENT(IN) :: sendtype INTEGER, INTENT(IN) :: recvtype @@ -953,6 +1032,21 @@ subroutine ompi_ialltoall_f(sendbuf,sendcount,sendtype,recvbuf, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_ialltoall_f +subroutine ompi_alltoall_init_f(sendbuf,sendcount,sendtype,recvbuf, & + recvcount,recvtype,comm,info,request,ierror) & + BIND(C, name="ompi_alltoall_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount + INTEGER, INTENT(IN) :: sendtype + INTEGER, INTENT(IN) :: recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_alltoall_init_f + subroutine ompi_alltoallv_f(sendbuf,sendcounts,sdispls,sendtype, & recvbuf,recvcounts,rdispls,recvtype,comm,ierror) & BIND(C, name="ompi_alltoallv_f") @@ -970,9 +1064,9 @@ subroutine ompi_ialltoallv_f(sendbuf,sendcounts,sdispls,sendtype, & recvbuf,recvcounts,rdispls,recvtype,comm,request,ierror) & BIND(C, name="ompi_ialltoallv_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) INTEGER, INTENT(IN) :: sendtype INTEGER, INTENT(IN) :: recvtype INTEGER, INTENT(IN) :: comm @@ -980,6 +1074,21 @@ subroutine ompi_ialltoallv_f(sendbuf,sendcounts,sdispls,sendtype, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_ialltoallv_f +subroutine ompi_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype, & + recvbuf,recvcounts,rdispls,recvtype,comm,info,request,ierror) & + BIND(C, name="ompi_alltoallv_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + INTEGER, INTENT(IN) :: sendtype + INTEGER, INTENT(IN) :: recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_alltoallv_init_f + subroutine ompi_alltoallw_f(sendbuf,sendcounts,sdispls,sendtypes, & recvbuf,recvcounts,rdispls,recvtypes,comm,ierror) & BIND(C, name="ompi_alltoallw_f") @@ -997,16 +1106,31 @@ subroutine ompi_ialltoallw_f(sendbuf,sendcounts,sdispls,sendtypes, & recvbuf,recvcounts,rdispls,recvtypes,comm,request,ierror) & BIND(C, name="ompi_ialltoallw_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendtypes - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvtypes + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendtypes + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvtypes INTEGER, INTENT(IN) :: comm INTEGER, INTENT(OUT) :: request INTEGER, INTENT(OUT) :: ierror end subroutine ompi_ialltoallw_f +subroutine ompi_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes, & + recvbuf,recvcounts,rdispls,recvtypes,comm,info,request,ierror) & + BIND(C, name="ompi_alltoallw_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendtypes + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvtypes + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_alltoallw_init_f + subroutine ompi_barrier_f(comm,ierror) & BIND(C, name="ompi_barrier_f") implicit none @@ -1022,6 +1146,15 @@ subroutine ompi_ibarrier_f(comm,request,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_ibarrier_f +subroutine ompi_barrier_init_f(comm,info,request,ierror) & + BIND(C, name="ompi_barrier_init_f") + implicit none + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_barrier_init_f + subroutine ompi_bcast_f(buffer,count,datatype,root,comm,ierror) & BIND(C, name="ompi_bcast_f") implicit none @@ -1035,7 +1168,7 @@ end subroutine ompi_bcast_f subroutine ompi_ibcast_f(buffer,count,datatype,root,comm,request,ierror) & BIND(C, name="ompi_ibcast_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: buffer + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buffer INTEGER, INTENT(IN) :: count, root INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: comm @@ -1043,6 +1176,18 @@ subroutine ompi_ibcast_f(buffer,count,datatype,root,comm,request,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_ibcast_f +subroutine ompi_bcast_init_f(buffer,count,datatype,root,comm,info,request,ierror) & + BIND(C, name="ompi_bcast_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buffer + INTEGER, INTENT(IN) :: count, root + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_bcast_init_f + subroutine ompi_exscan_f(sendbuf,recvbuf,count,datatype,op,comm,ierror) & BIND(C, name="ompi_exscan_f") implicit none @@ -1058,8 +1203,8 @@ end subroutine ompi_exscan_f subroutine ompi_iexscan_f(sendbuf,recvbuf,count,datatype,op,comm,request,ierror) & BIND(C, name="ompi_iexscan_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: op @@ -1068,6 +1213,20 @@ subroutine ompi_iexscan_f(sendbuf,recvbuf,count,datatype,op,comm,request,ierror) INTEGER, INTENT(OUT) :: ierror end subroutine ompi_iexscan_f +subroutine ompi_exscan_init_f(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) & + BIND(C, name="ompi_exscan_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(IN) :: op + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_exscan_init_f + subroutine ompi_gather_f(sendbuf,sendcount,sendtype,recvbuf, & recvcount,recvtype,root,comm,ierror) & BIND(C, name="ompi_gather_f") @@ -1085,8 +1244,8 @@ subroutine ompi_igather_f(sendbuf,sendcount,sendtype,recvbuf, & recvcount,recvtype,root,comm,request,ierror) & BIND(C, name="ompi_igather_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount, root INTEGER, INTENT(IN) :: sendtype INTEGER, INTENT(IN) :: recvtype @@ -1095,6 +1254,21 @@ subroutine ompi_igather_f(sendbuf,sendcount,sendtype,recvbuf, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_igather_f +subroutine ompi_gather_init_f(sendbuf,sendcount,sendtype,recvbuf, & + recvcount,recvtype,root,comm,info,request,ierror) & + BIND(C, name="ompi_gather_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount, root + INTEGER, INTENT(IN) :: sendtype + INTEGER, INTENT(IN) :: recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_gather_init_f + subroutine ompi_gatherv_f(sendbuf,sendcount,sendtype,recvbuf, & recvcounts,displs,recvtype,root,comm,ierror) & BIND(C, name="ompi_gatherv_f") @@ -1113,10 +1287,10 @@ subroutine ompi_igatherv_f(sendbuf,sendcount,sendtype,recvbuf, & recvcounts,displs,recvtype,root,comm,request,ierror) & BIND(C, name="ompi_igatherv_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) INTEGER, INTENT(IN) :: sendtype INTEGER, INTENT(IN) :: recvtype INTEGER, INTENT(IN) :: comm @@ -1124,6 +1298,22 @@ subroutine ompi_igatherv_f(sendbuf,sendcount,sendtype,recvbuf, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_igatherv_f +subroutine ompi_gatherv_init_f(sendbuf,sendcount,sendtype,recvbuf, & + recvcounts,displs,recvtype,root,comm,info,request,ierror) & + BIND(C, name="ompi_gatherv_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, root + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) :: sendtype + INTEGER, INTENT(IN) :: recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_gatherv_init_f + subroutine ompi_op_free_f(op,ierror) & BIND(C, name="ompi_op_free_f") implicit none @@ -1146,8 +1336,8 @@ end subroutine ompi_reduce_f subroutine ompi_ireduce_f(sendbuf,recvbuf,count,datatype,op,root,comm,request,ierror) & BIND(C, name="ompi_ireduce_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count, root INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: op @@ -1156,6 +1346,20 @@ subroutine ompi_ireduce_f(sendbuf,recvbuf,count,datatype,op,root,comm,request,ie INTEGER, INTENT(OUT) :: ierror end subroutine ompi_ireduce_f +subroutine ompi_reduce_init_f(sendbuf,recvbuf,count,datatype,op,root,comm,info,request,ierror) & + BIND(C, name="ompi_reduce_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: count, root + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(IN) :: op + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_reduce_init_f + subroutine ompi_reduce_local_f(inbuf,inoutbuf,count,datatype,op,ierror) & BIND(C, name="ompi_reduce_local_f") implicit none @@ -1184,9 +1388,9 @@ subroutine ompi_ireduce_scatter_f(sendbuf,recvbuf,recvcounts, & datatype,op,comm,request,ierror) & BIND(C, name="ompi_ireduce_scatter_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*) INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: op INTEGER, INTENT(IN) :: comm @@ -1194,6 +1398,21 @@ subroutine ompi_ireduce_scatter_f(sendbuf,recvbuf,recvcounts, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_ireduce_scatter_f +subroutine ompi_reduce_scatter_init_f(sendbuf,recvbuf,recvcounts, & + datatype,op,comm,info,request,ierror) & + BIND(C, name="ompi_reduce_scatter_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*) + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(IN) :: op + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_reduce_scatter_init_f + subroutine ompi_reduce_scatter_block_f(sendbuf,recvbuf,recvcount, & datatype,op,comm,ierror) & BIND(C, name="ompi_reduce_scatter_block_f") @@ -1211,8 +1430,8 @@ subroutine ompi_ireduce_scatter_block_f(sendbuf,recvbuf,recvcount, & datatype,op,comm,request,ierror) & BIND(C, name="ompi_ireduce_scatter_block_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: recvcount INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: op @@ -1221,6 +1440,21 @@ subroutine ompi_ireduce_scatter_block_f(sendbuf,recvbuf,recvcount, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_ireduce_scatter_block_f +subroutine ompi_reduce_scatter_block_init_f(sendbuf,recvbuf,recvcount, & + datatype,op,comm,info,request,ierror) & + BIND(C, name="ompi_reduce_scatter_block_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: recvcount + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(IN) :: op + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_reduce_scatter_block_init_f + subroutine ompi_scan_f(sendbuf,recvbuf,count,datatype,op,comm,ierror) & BIND(C, name="ompi_scan_f") implicit none @@ -1236,8 +1470,8 @@ end subroutine ompi_scan_f subroutine ompi_iscan_f(sendbuf,recvbuf,count,datatype,op,comm,request,ierror) & BIND(C, name="ompi_iscan_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: op @@ -1246,6 +1480,20 @@ subroutine ompi_iscan_f(sendbuf,recvbuf,count,datatype,op,comm,request,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_iscan_f +subroutine ompi_scan_init_f(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) & + BIND(C, name="ompi_scan_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(IN) :: op + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_scan_init_f + subroutine ompi_scatter_f(sendbuf,sendcount,sendtype,recvbuf, & recvcount,recvtype,root,comm,ierror) & BIND(C, name="ompi_scatter_f") @@ -1263,8 +1511,8 @@ subroutine ompi_iscatter_f(sendbuf,sendcount,sendtype,recvbuf, & recvcount,recvtype,root,comm,request,ierror) & BIND(C, name="ompi_iscatter_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount, root INTEGER, INTENT(IN) :: sendtype INTEGER, INTENT(IN) :: recvtype @@ -1273,6 +1521,21 @@ subroutine ompi_iscatter_f(sendbuf,sendcount,sendtype,recvbuf, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_iscatter_f +subroutine ompi_scatter_init_f(sendbuf,sendcount,sendtype,recvbuf, & + recvcount,recvtype,root,comm,info,request,ierror) & + BIND(C, name="ompi_scatter_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount, root + INTEGER, INTENT(IN) :: sendtype + INTEGER, INTENT(IN) :: recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_scatter_init_f + subroutine ompi_scatterv_f(sendbuf,sendcounts,displs,sendtype, & recvbuf,recvcount,recvtype,root,comm,ierror) & BIND(C, name="ompi_scatterv_f") @@ -1291,10 +1554,11 @@ subroutine ompi_iscatterv_f(sendbuf,sendcounts,displs,sendtype, & recvbuf,recvcount,recvtype,root,comm,request,ierror) & BIND(C, name="ompi_iscatterv_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: recvcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), displs(*) + INTEGER, INTENT(IN) :: sendtype INTEGER, INTENT(IN) :: recvtype INTEGER, INTENT(IN) :: comm @@ -1302,6 +1566,22 @@ subroutine ompi_iscatterv_f(sendbuf,sendcounts,displs,sendtype, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_iscatterv_f +subroutine ompi_scatterv_init_f(sendbuf,sendcounts,displs,sendtype, & + recvbuf,recvcount,recvtype,root,comm,info,request,ierror) & + BIND(C, name="ompi_scatterv_init_f") + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: recvcount, root + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), displs(*) + INTEGER, INTENT(IN) :: sendtype + INTEGER, INTENT(IN) :: recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_scatterv_init_f + subroutine ompi_comm_compare_f(comm1,comm2,result,ierror) & BIND(C, name="ompi_comm_compare_f") implicit none @@ -1320,6 +1600,19 @@ subroutine ompi_comm_create_f(comm,group,newcomm,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_comm_create_f +subroutine ompi_comm_create_from_group_f(group, stringtag, info, errhandler, newcomm, ierror, name_len) & + BIND(C, name="ompi_comm_create_from_group_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + integer, intent(in) :: group + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: stringtag + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: newcomm + integer, intent(out) :: ierror + INTEGER, VALUE, INTENT(IN) :: name_len +end subroutine ompi_comm_create_from_group_f + subroutine ompi_comm_create_group_f(comm, group, tag, newcomm, ierror) & BIND(C, name="ompi_comm_create_group_f") implicit none @@ -1368,6 +1661,16 @@ subroutine ompi_comm_dup_with_info_f(comm, info, newcomm, ierror) & integer, intent(out) :: ierror end subroutine ompi_comm_dup_with_info_f +subroutine ompi_comm_idup_with_info_f(comm, info, newcomm, request, ierror) & + BIND(C, name="ompi_comm_idup_with_info_f") + implicit none + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: newcomm + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine ompi_comm_idup_with_info_f + subroutine ompi_comm_free_f(comm,ierror) & BIND(C, name="ompi_comm_free_f") implicit none @@ -1401,6 +1704,19 @@ subroutine ompi_comm_get_name_f(comm,comm_name,resultlen,ierror,comm_name_len) & INTEGER, VALUE, INTENT(IN) :: comm_name_len end subroutine ompi_comm_get_name_f +subroutine ompi_comm_from_group_f(group, stringtag, info, errhandler, newcomm, ierror, name_len) & + BIND(C, name="ompi_comm_from_group_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + INTEGER, INTENT(IN) :: group + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: stringtag + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(IN) :: errhandler + INTEGER, INTENT(OUT) :: newcomm + INTEGER, INTENT(OUT) :: ierror + INTEGER, VALUE, INTENT(IN) :: name_len +end subroutine ompi_comm_from_group_f + subroutine ompi_comm_group_f(comm,group,ierror) & BIND(C, name="ompi_comm_group_f") implicit none @@ -1413,7 +1729,7 @@ subroutine ompi_comm_idup_f(comm, newcomm, request, ierror) & BIND(C, name="ompi_comm_idup_f") implicit none integer, intent(in) :: comm - integer, intent(out) :: newcomm + integer, intent(out) OMPI_ASYNCHRONOUS :: newcomm integer, intent(out) :: request integer, intent(out) :: ierror end subroutine ompi_comm_idup_f @@ -1510,7 +1826,7 @@ subroutine ompi_group_excl_f(group,n,ranks,newgroup,ierror) & implicit none INTEGER, INTENT(IN) :: group INTEGER, INTENT(IN) :: n - INTEGER, INTENT(IN) :: ranks(*) + INTEGER, INTENT(IN) :: ranks(n) INTEGER, INTENT(OUT) :: newgroup INTEGER, INTENT(OUT) :: ierror end subroutine ompi_group_excl_f @@ -1522,11 +1838,22 @@ subroutine ompi_group_free_f(group,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_group_free_f +subroutine ompi_group_from_session_pset_f(session, pset_name, newgroup, ierror, name_len) & + BIND(C, name="ompi_group_from_session_pset_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + INTEGER, INTENT(IN) :: session + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: pset_name + INTEGER, INTENT(OUT) :: newgroup + integer, intent(out) :: ierror + INTEGER, VALUE, INTENT(IN) :: name_len +end subroutine ompi_group_from_session_pset_f + subroutine ompi_group_incl_f(group,n,ranks,newgroup,ierror) & BIND(C, name="ompi_group_incl_f") implicit none INTEGER, INTENT(IN) :: n - INTEGER, INTENT(IN) :: ranks(*) + INTEGER, INTENT(IN) :: ranks(n) INTEGER, INTENT(IN) :: group INTEGER, INTENT(OUT) :: newgroup INTEGER, INTENT(OUT) :: ierror @@ -1546,7 +1873,7 @@ subroutine ompi_group_range_excl_f(group,n,ranges,newgroup,ierror) & implicit none INTEGER, INTENT(IN) :: group INTEGER, INTENT(IN) :: n - INTEGER, INTENT(IN) :: ranges(*) + INTEGER, INTENT(IN) :: ranges(3, n) INTEGER, INTENT(OUT) :: newgroup INTEGER, INTENT(OUT) :: ierror end subroutine ompi_group_range_excl_f @@ -1556,7 +1883,7 @@ subroutine ompi_group_range_incl_f(group,n,ranges,newgroup,ierror) & implicit none INTEGER, INTENT(IN) :: group INTEGER, INTENT(IN) :: n - INTEGER, INTENT(IN) :: ranges(*) + INTEGER, INTENT(IN) :: ranges(3, n) INTEGER, INTENT(OUT) :: newgroup INTEGER, INTENT(OUT) :: ierror end subroutine ompi_group_range_incl_f @@ -1582,8 +1909,8 @@ subroutine ompi_group_translate_ranks_f(group1,n,ranks1,group2,ranks2,ierror) & implicit none INTEGER, INTENT(IN) :: group1, group2 INTEGER, INTENT(IN) :: n - INTEGER, INTENT(IN) :: ranks1(*) - INTEGER, INTENT(OUT) :: ranks2(*) + INTEGER, INTENT(IN) :: ranks1(n) + INTEGER, INTENT(OUT) :: ranks2(n) INTEGER, INTENT(OUT) :: ierror end subroutine ompi_group_translate_ranks_f @@ -1605,6 +1932,21 @@ subroutine ompi_intercomm_create_f(local_comm,local_leader,peer_comm, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_intercomm_create_f +subroutine ompi_intercomm_create_from_groups_f(local_group, local_leader, remote_group, & + remote_leader, stringtag, info, errhandler, & + newintercomm, ierror, name_len) & + BIND(C, name="ompi_intercomm_create_from_groups_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + INTEGER, INTENT(IN) :: local_group, remote_group + INTEGER, INTENT(IN) :: local_leader, remote_leader + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: stringtag + INTEGER, INTENT(IN) :: info, errhandler + INTEGER, INTENT(OUT) :: newintercomm + INTEGER, INTENT(OUT) :: ierror + INTEGER, VALUE, INTENT(IN) :: name_len +end subroutine ompi_intercomm_create_from_groups_f + subroutine ompi_type_create_keyval_f(type_copy_attr_fn,type_delete_attr_fn, & type_keyval,extra_state,ierror) & BIND(C, name="ompi_type_create_keyval_f") @@ -1788,7 +2130,7 @@ subroutine ompi_dims_create_f(nnodes,ndims,dims,ierror) & BIND(C, name="ompi_dims_create_f") implicit none INTEGER, INTENT(IN) :: nnodes, ndims - INTEGER, INTENT(INOUT) :: dims(*) + INTEGER, INTENT(INOUT) :: dims(ndims) INTEGER, INTENT(OUT) :: ierror end subroutine ompi_dims_create_f @@ -1816,7 +2158,7 @@ subroutine ompi_graph_get_f(comm,maxindex,maxedges,index,edges,ierror) & implicit none INTEGER, INTENT(IN) :: comm INTEGER, INTENT(IN) :: maxindex, maxedges - INTEGER, INTENT(OUT) :: index(*), edges(*) + INTEGER, INTENT(OUT) :: index(maxindex), edges(maxedges) INTEGER, INTENT(OUT) :: ierror end subroutine ompi_graph_get_f @@ -1825,7 +2167,7 @@ subroutine ompi_graph_map_f(comm,nnodes,index,edges,newrank,ierror) & implicit none INTEGER, INTENT(IN) :: comm INTEGER, INTENT(IN) :: nnodes - INTEGER, INTENT(IN) :: index(*), edges(*) + INTEGER, INTENT(IN) :: index(nnodes), edges(*) INTEGER, INTENT(OUT) :: newrank INTEGER, INTENT(OUT) :: ierror end subroutine ompi_graph_map_f @@ -1835,7 +2177,7 @@ subroutine ompi_graph_neighbors_f(comm,rank,maxneighbors,neighbors,ierror) & implicit none INTEGER, INTENT(IN) :: comm INTEGER, INTENT(IN) :: rank, maxneighbors - INTEGER, INTENT(OUT) :: neighbors(*) + INTEGER, INTENT(OUT) :: neighbors(maxneighbors) INTEGER, INTENT(OUT) :: ierror end subroutine ompi_graph_neighbors_f @@ -2295,7 +2637,7 @@ subroutine ompi_accumulate_f(origin_addr,origin_count,origin_datatype, & BIND(C, name="ompi_accumulate_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count INTEGER, INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2312,7 +2654,7 @@ subroutine ompi_raccumulate_f(origin_addr,origin_count,origin_datatype, & BIND(C, name="ompi_raccumulate_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count INTEGER, INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2328,7 +2670,7 @@ subroutine ompi_get_f(origin_addr,origin_count,origin_datatype,target_rank, & BIND(C, name="ompi_get_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count INTEGER, INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2343,7 +2685,7 @@ subroutine ompi_rget_f(origin_addr,origin_count,origin_datatype,target_rank, & BIND(C, name="ompi_rget_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count INTEGER, INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2361,10 +2703,10 @@ subroutine ompi_get_accumulate_f(origin_addr,origin_count,origin_datatype, & BIND(C, name="ompi_get_accumulate_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, result_count, target_rank, target_count INTEGER, INTENT(IN) :: origin_datatype - OMPI_FORTRAN_IGNORE_TKR_TYPE :: result_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: result_addr INTEGER, INTENT(IN) :: result_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp INTEGER, INTENT(IN) :: target_datatype @@ -2381,10 +2723,10 @@ subroutine ompi_rget_accumulate_f(origin_addr,origin_count,origin_datatype, & BIND(C, name="ompi_rget_accumulate_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, result_count, target_rank, target_count INTEGER, INTENT(IN) :: origin_datatype - OMPI_FORTRAN_IGNORE_TKR_TYPE :: result_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: result_addr INTEGER, INTENT(IN) :: result_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp INTEGER, INTENT(IN) :: target_datatype @@ -2399,7 +2741,7 @@ subroutine ompi_put_f(origin_addr,origin_count,origin_datatype,target_rank, & BIND(C, name="ompi_put_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count INTEGER, INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2414,7 +2756,7 @@ subroutine ompi_rput_f(origin_addr,origin_count,origin_datatype,target_rank, & BIND(C, name="ompi_rput_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count INTEGER, INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2437,8 +2779,8 @@ subroutine ompi_compare_and_swap_f(origin_addr,compare_addr,result_addr, & BIND(C, name="ompi_compare_and_swap_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr, compare_addr - OMPI_FORTRAN_IGNORE_TKR_TYPE :: result_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr, compare_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: result_addr INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: target_rank INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2451,8 +2793,8 @@ subroutine ompi_fetch_and_op_f(origin_addr,result_addr,datatype,target_rank, & BIND(C, name="ompi_fetch_and_op_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: origin_addr - OMPI_FORTRAN_IGNORE_TKR_TYPE :: result_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: result_addr INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(IN) :: target_rank INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2465,7 +2807,7 @@ subroutine ompi_win_create_f(base,size,disp_unit,info,comm,win,ierror) & BIND(C, name="ompi_win_create_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size INTEGER, INTENT(IN) :: disp_unit INTEGER, INTENT(IN) :: info @@ -2487,7 +2829,7 @@ subroutine ompi_win_attach_f(win,base,size,ierror) & BIND(C, name="ompi_win_attach_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size INTEGER, INTENT(IN) :: win INTEGER, INTENT(OUT) :: ierror @@ -2497,7 +2839,7 @@ subroutine ompi_win_detach_f(win,base,ierror) & BIND(C, name="ompi_win_detach_f") use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: base INTEGER, INTENT(IN) :: win INTEGER, INTENT(OUT) :: ierror end subroutine ompi_win_detach_f @@ -2555,11 +2897,11 @@ subroutine ompi_win_get_group_f(win,group,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_win_get_group_f -subroutine ompi_win_get_info_f(comm,info,ierror) & +subroutine ompi_win_get_info_f(win,info_used,ierror) & BIND(C, name="ompi_win_get_info_f") implicit none - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(OUT) :: info + INTEGER, INTENT(IN) :: win + INTEGER, INTENT(OUT) :: info_used INTEGER, INTENT(OUT) :: ierror end subroutine ompi_win_get_info_f @@ -2588,10 +2930,10 @@ subroutine ompi_win_post_f(group,assert,win,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_win_post_f -subroutine ompi_win_set_info_f(comm,info,ierror) & +subroutine ompi_win_set_info_f(win,info,ierror) & BIND(C, name="ompi_win_set_info_f") implicit none - INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: win INTEGER, INTENT(IN) :: info INTEGER, INTENT(OUT) :: ierror end subroutine ompi_win_set_info_f @@ -2683,7 +3025,7 @@ subroutine ompi_query_thread_f(provided,ierror) & end subroutine ompi_query_thread_f subroutine ompi_status_f082f_f(f08_status,f_status,ierror) & - BIND(C, name="ompi_status_f2f08_f") + BIND(C, name="ompi_status_f082f_f") use :: mpi_f08_types, only : MPI_Status, MPI_STATUS_SIZE implicit none TYPE(MPI_Status), INTENT(IN) :: f08_status @@ -2692,7 +3034,7 @@ subroutine ompi_status_f082f_f(f08_status,f_status,ierror) & end subroutine ompi_status_f082f_f subroutine ompi_status_f2f08_f(f_status,f08_status,ierror) & - BIND(C, name="ompi_status_f082f_f") + BIND(C, name="ompi_status_f2f08_f") use :: mpi_f08_types, only : MPI_Status, MPI_STATUS_SIZE implicit none INTEGER, INTENT(IN) :: f_status(MPI_STATUS_SIZE) @@ -2826,7 +3168,7 @@ subroutine ompi_file_iread_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="ompi_file_iread_f") implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: request @@ -2839,7 +3181,7 @@ subroutine ompi_file_iread_at_f(fh,offset,buf,count,datatype,request,ierror) & implicit none INTEGER, INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: request @@ -2850,7 +3192,7 @@ subroutine ompi_file_iread_all_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="ompi_file_iread_all_f") implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: request @@ -2863,7 +3205,7 @@ subroutine ompi_file_iread_at_all_f(fh,offset,buf,count,datatype,request,ierror) implicit none INTEGER, INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: request @@ -2874,7 +3216,7 @@ subroutine ompi_file_iread_shared_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="ompi_file_iread_shared_f") implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: request @@ -2885,7 +3227,7 @@ subroutine ompi_file_iwrite_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="ompi_file_iwrite_f") implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: request @@ -2898,7 +3240,7 @@ subroutine ompi_file_iwrite_at_f(fh,offset,buf,count,datatype,request,ierror) & implicit none INTEGER, INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: request @@ -2909,7 +3251,7 @@ subroutine ompi_file_iwrite_all_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="ompi_file_iwrite_all_f") implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: request @@ -2922,7 +3264,7 @@ subroutine ompi_file_iwrite_at_all_f(fh,offset,buf,count,datatype,request,ierror implicit none INTEGER, INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: request @@ -2932,7 +3274,7 @@ end subroutine ompi_file_iwrite_at_all_f subroutine ompi_file_iwrite_shared_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="ompi_file_iwrite_shared_f") implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: fh INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype @@ -2967,10 +3309,10 @@ subroutine ompi_file_read_f(fh,buf,count,datatype,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_read_f @@ -2979,10 +3321,10 @@ subroutine ompi_file_read_all_f(fh,buf,count,datatype,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_read_all_f @@ -2990,7 +3332,7 @@ subroutine ompi_file_read_all_begin_f(fh,buf,count,datatype,ierror) & BIND(C, name="ompi_file_read_all_begin_f") implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: ierror @@ -3001,8 +3343,8 @@ subroutine ompi_file_read_all_end_f(fh,buf,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf - TYPE(MPI_Status), INTENT(OUT) :: status + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_read_all_end_f @@ -3012,10 +3354,10 @@ subroutine ompi_file_read_at_f(fh,offset,buf,count,datatype,status,ierror) & implicit none INTEGER, INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_read_at_f @@ -3025,10 +3367,10 @@ subroutine ompi_file_read_at_all_f(fh,offset,buf,count,datatype,status,ierror) & implicit none INTEGER, INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_read_at_all_f @@ -3038,7 +3380,7 @@ subroutine ompi_file_read_at_all_begin_f(fh,offset,buf,count,datatype,ierror) & implicit none INTEGER, INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: ierror @@ -3049,8 +3391,8 @@ subroutine ompi_file_read_at_all_end_f(fh,buf,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf - TYPE(MPI_Status), INTENT(OUT) :: status + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_read_at_all_end_f @@ -3059,10 +3401,10 @@ subroutine ompi_file_read_ordered_f(fh,buf,count,datatype,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_read_ordered_f @@ -3070,7 +3412,7 @@ subroutine ompi_file_read_ordered_begin_f(fh,buf,count,datatype,ierror) & BIND(C, name="ompi_file_read_ordered_begin_f") implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: ierror @@ -3081,8 +3423,8 @@ subroutine ompi_file_read_ordered_end_f(fh,buf,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf - TYPE(MPI_Status), INTENT(OUT) :: status + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_read_ordered_end_f @@ -3091,10 +3433,10 @@ subroutine ompi_file_read_shared_f(fh,buf,count,datatype,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_read_shared_f @@ -3165,7 +3507,7 @@ subroutine ompi_file_write_f(fh,buf,count,datatype,status,ierror) & OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_write_f @@ -3177,7 +3519,7 @@ subroutine ompi_file_write_all_f(fh,buf,count,datatype,status,ierror) & OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_write_all_f @@ -3185,7 +3527,7 @@ subroutine ompi_file_write_all_begin_f(fh,buf,count,datatype,ierror) & BIND(C, name="ompi_file_write_all_begin_f") implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: ierror @@ -3196,8 +3538,8 @@ subroutine ompi_file_write_all_end_f(fh,buf,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf - TYPE(MPI_Status), INTENT(OUT) :: status + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_write_all_end_f @@ -3210,7 +3552,7 @@ subroutine ompi_file_write_at_f(fh,offset,buf,count,datatype,status,ierror) & OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_write_at_f @@ -3223,7 +3565,7 @@ subroutine ompi_file_write_at_all_f(fh,offset,buf,count,datatype,status,ierror) OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_write_at_all_f @@ -3233,7 +3575,7 @@ subroutine ompi_file_write_at_all_begin_f(fh,offset,buf,count,datatype,ierror) & implicit none INTEGER, INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: ierror @@ -3244,8 +3586,8 @@ subroutine ompi_file_write_at_all_end_f(fh,buf,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf - TYPE(MPI_Status), INTENT(OUT) :: status + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_write_at_all_end_f @@ -3257,7 +3599,7 @@ subroutine ompi_file_write_ordered_f(fh,buf,count,datatype,status,ierror) & OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_write_ordered_f @@ -3265,7 +3607,7 @@ subroutine ompi_file_write_ordered_begin_f(fh,buf,count,datatype,ierror) & BIND(C, name="ompi_file_write_ordered_begin_f") implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype INTEGER, INTENT(OUT) :: ierror @@ -3276,8 +3618,8 @@ subroutine ompi_file_write_ordered_end_f(fh,buf,status,ierror) & use :: mpi_f08_types, only : MPI_Status implicit none INTEGER, INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf - TYPE(MPI_Status), INTENT(OUT) :: status + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_write_ordered_end_f @@ -3289,7 +3631,7 @@ subroutine ompi_file_write_shared_f(fh,buf,count,datatype,status,ierror) & OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count INTEGER, INTENT(IN) :: datatype - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_write_shared_f @@ -3399,7 +3741,7 @@ subroutine ompi_mprobe_f(source,tag,comm,message,status,ierror) & INTEGER, INTENT(IN) :: source, tag INTEGER, INTENT(IN) :: comm INTEGER, INTENT(OUT) :: message - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, INTENT(OUT) :: ierror end subroutine ompi_mprobe_f @@ -3444,14 +3786,29 @@ subroutine ompi_ineighbor_allgather_f(sendbuf,sendcount,sendtype,recvbuf,recvcou BIND(C, name="ompi_ineighbor_allgather_f") use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount + INTEGER, INTENT(IN) :: sendtype, recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_ineighbor_allgather_f + +subroutine ompi_neighbor_allgather_init_f(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & + comm,info,request,ierror) & + BIND(C, name="ompi_neighbor_allgather_init_f") + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request + implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount INTEGER, INTENT(IN) :: sendtype, recvtype INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info INTEGER, INTENT(OUT) :: request INTEGER, INTENT(OUT) :: ierror -end subroutine ompi_ineighbor_allgather_f +end subroutine ompi_neighbor_allgather_init_f subroutine ompi_neighbor_allgatherv_f(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs, & recvtype,comm,ierror) & @@ -3472,15 +3829,31 @@ subroutine ompi_ineighbor_allgatherv_f(sendbuf,sendcount,sendtype,recvbuf,recvco BIND(C, name="ompi_ineighbor_allgatherv_f") use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) :: sendtype, recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_ineighbor_allgatherv_f + +subroutine ompi_neighbor_allgatherv_init_f(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs, & + recvtype,comm,info,request,ierror) & + BIND(C, name="ompi_neighbor_allgatherv_init_f") + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request + implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf INTEGER, INTENT(IN) :: sendcount INTEGER, INTENT(IN) :: recvcounts(*), displs(*) INTEGER, INTENT(IN) :: sendtype, recvtype INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info INTEGER, INTENT(OUT) :: request INTEGER, INTENT(OUT) :: ierror -end subroutine ompi_ineighbor_allgatherv_f +end subroutine ompi_neighbor_allgatherv_init_f subroutine ompi_neighbor_alltoall_f(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & comm,ierror) & @@ -3500,14 +3873,29 @@ subroutine ompi_ineighbor_alltoall_f(sendbuf,sendcount,sendtype,recvbuf,recvcoun BIND(C, name="ompi_ineighbor_alltoall_f") use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount + INTEGER, INTENT(IN) :: sendtype, recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_ineighbor_alltoall_f + +subroutine ompi_neighbor_alltoall_init_f(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & + comm,info,request,ierror) & + BIND(C, name="ompi_neighbor_alltoall_init_f") + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request + implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount INTEGER, INTENT(IN) :: sendtype, recvtype INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info INTEGER, INTENT(OUT) :: request INTEGER, INTENT(OUT) :: ierror -end subroutine ompi_ineighbor_alltoall_f +end subroutine ompi_neighbor_alltoall_init_f subroutine ompi_neighbor_alltoallv_f(sendbuf,sendcounts,sdispls,sendtype,recvbuf,recvcounts, & rdispls,recvtype,comm,ierror) & @@ -3527,14 +3915,29 @@ subroutine ompi_ineighbor_alltoallv_f(sendbuf,sendcounts,sdispls,sendtype,recvbu BIND(C, name="ompi_ineighbor_alltoallv_f") use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + INTEGER, INTENT(IN) :: sendtype, recvtype + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_ineighbor_alltoallv_f + +subroutine ompi_neighbor_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype,recvbuf,recvcounts, & + rdispls,recvtype,comm,info,request,ierror) & + BIND(C, name="ompi_neighbor_alltoallv_init_f") + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request + implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf INTEGER, INTENT(IN) :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) INTEGER, INTENT(IN) :: sendtype, recvtype INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info INTEGER, INTENT(OUT) :: request INTEGER, INTENT(OUT) :: ierror -end subroutine ompi_ineighbor_alltoallv_f +end subroutine ompi_neighbor_alltoallv_init_f subroutine ompi_neighbor_alltoallw_f(sendbuf,sendcounts,sdispls,sendtypes,recvbuf,recvcounts, & rdispls,recvtypes,comm,ierror) & @@ -3555,14 +3958,86 @@ subroutine ompi_ineighbor_alltoallw_f(sendbuf,sendcounts,sdispls,sendtypes,recvb BIND(C, name="ompi_ineighbor_alltoallw_f") use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request, MPI_ADDRESS_KIND implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), recvcounts(*) + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) OMPI_ASYNCHRONOUS :: sdispls(*), rdispls(*) + INTEGER, INTENT(IN) :: sendtypes, recvtypes + INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_ineighbor_alltoallw_f + +subroutine ompi_neighbor_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes,recvbuf,recvcounts, & + rdispls,recvtypes,comm,info,request,ierror) & + BIND(C, name="ompi_neighbor_alltoallw_init_f") + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request, MPI_ADDRESS_KIND + implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf INTEGER, INTENT(IN) :: sendcounts(*), recvcounts(*) INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: sdispls(*), rdispls(*) INTEGER, INTENT(IN) :: sendtypes, recvtypes INTEGER, INTENT(IN) :: comm + INTEGER, INTENT(IN) :: info INTEGER, INTENT(OUT) :: request INTEGER, INTENT(OUT) :: ierror -end subroutine ompi_ineighbor_alltoallw_f +end subroutine ompi_neighbor_alltoallw_init_f + +subroutine ompi_session_get_info_f(session, info, ierror) & + BIND(C, name="ompi_session_get_info_f") + implicit none + integer, intent(in) :: session + integer, intent(out) :: info + integer, intent(out) :: ierror +end subroutine ompi_session_get_info_f + +subroutine ompi_session_get_nth_pset_f(session, info, n, pset_len, pset_name, ierror) & + BIND(C, name="ompi_session_get_nth_pset_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + integer, intent(in) :: session + integer, intent(in) :: info + integer, intent(in) :: n + integer, intent(inout) :: pset_len + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(OUT) :: pset_name + integer, intent(out) :: ierror +end subroutine ompi_session_get_nth_pset_f + +subroutine ompi_session_get_num_psets_f(session, info, npset_names, ierror) & + BIND(C, name="ompi_session_get_num_psets_f") + implicit none + integer, intent(in) :: session + integer, intent(in) :: info + integer, intent(out) :: npset_names + integer, intent(out) :: ierror +end subroutine ompi_session_get_num_psets_f + +subroutine ompi_session_get_pset_info_f(session, pset_name, info, ierror, name_len) & + BIND(C, name="ompi_session_get_pset_info_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + integer, intent(in) :: session + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: pset_name + INTEGER, VALUE, INTENT(IN) :: name_len + integer, intent(out) :: info + integer, intent(out) :: ierror +end subroutine ompi_session_get_pset_info_f + +subroutine ompi_session_init_f(info, errhandler, session, ierror) & + BIND(C, name="ompi_session_init_f") + implicit none + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: session + integer, intent(out) :: ierror +end subroutine ompi_session_init_f + +subroutine ompi_session_finalize_f(session, ierror) & + BIND(C, name="ompi_session_finalize_f") + implicit none + integer, intent(out) :: session + integer, intent(out) :: ierror +end subroutine ompi_session_finalize_f end interface diff --git a/ompi/mpi/fortran/use-mpi-f08/bsend_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/bsend_init_f08.F90 index dfaafdc8dcb..78e48fc1240 100644 --- a/ompi/mpi/fortran/use-mpi-f08/bsend_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/bsend_init_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -11,11 +11,13 @@ #include "mpi-f08-rename.h" +#include "mpi-f08-rename.h" + subroutine MPI_Bsend_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_bsend_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/buffer_attach_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/buffer_attach_f08.F90 index 5e3e907bd58..d5491bc753e 100644 --- a/ompi/mpi/fortran/use-mpi-f08/buffer_attach_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/buffer_attach_f08.F90 @@ -15,7 +15,7 @@ subroutine MPI_Buffer_attach_f08(buffer,size,ierror) use :: ompi_mpifh_bindings, only : ompi_buffer_attach_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE :: buffer + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS:: buffer INTEGER, INTENT(IN) :: size INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/comm_create_from_group_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/comm_create_from_group_f08.F90 new file mode 100644 index 00000000000..8f1befe0d8c --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/comm_create_from_group_f08.F90 @@ -0,0 +1,29 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Comm_create_from_group_f08(group, stringtag, info, errhandler, newcomm, ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Group, MPI_Errhandler, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_comm_create_from_group_f + implicit none + TYPE(MPI_Group), INTENT(IN) :: group + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newcomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_comm_create_from_group_f(group%MPI_VAL, stringtag, info%MPI_VAL, errhandler%MPI_VAL, & + newcomm%MPI_VAL, c_ierror, len(stringtag)) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Comm_create_from_group_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/comm_get_name_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/comm_get_name_f08.F90 index 3b7dad61b75..397e1c9aed9 100644 --- a/ompi/mpi/fortran/use-mpi-f08/comm_get_name_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/comm_get_name_f08.F90 @@ -19,7 +19,8 @@ subroutine MPI_Comm_get_name_f08(comm,comm_name,resultlen,ierror) INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompi_comm_get_name_f(comm%MPI_VAL,comm_name,resultlen,c_ierror,len(comm_name)) + call ompi_comm_get_name_f(comm%MPI_VAL,comm_name,resultlen,c_ierror, & + len(comm_name)) if (present(ierror)) ierror = c_ierror end subroutine MPI_Comm_get_name_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/comm_idup_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/comm_idup_f08.F90 index a0eb02bd672..5f48967dd4a 100644 --- a/ompi/mpi/fortran/use-mpi-f08/comm_idup_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/comm_idup_f08.F90 @@ -7,6 +7,8 @@ ! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ +#include "ompi/mpi/fortran/configure-fortran-output.h" + #include "mpi-f08-rename.h" subroutine MPI_Comm_idup_f08(comm,newcomm,request,ierror) @@ -14,7 +16,7 @@ subroutine MPI_Comm_idup_f08(comm,newcomm,request,ierror) use :: ompi_mpifh_bindings, only : ompi_comm_idup_f implicit none TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Comm), INTENT(OUT) :: newcomm + TYPE(MPI_Comm), INTENT(OUT) OMPI_ASYNCHRONOUS :: newcomm TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/comm_idup_with_info_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/comm_idup_with_info_f08.F90 new file mode 100644 index 00000000000..c402a16709f --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/comm_idup_with_info_f08.F90 @@ -0,0 +1,30 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018-2020 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2021 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +#include "mpi-f08-rename.h" + +subroutine MPI_Comm_idup_with_info_f08(comm,info,newcomm,request,ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Info, MPI_Request + use :: ompi_mpifh_bindings, only : ompi_comm_idup_with_info_f + implicit none + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Comm), INTENT(OUT) :: newcomm + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_comm_idup_with_info_f(comm%MPI_VAL,info%MPI_VAL,newcomm%MPI_VAL,request%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Comm_idup_with_info_f08 + + diff --git a/ompi/mpi/fortran/use-mpi-f08/compare_and_swap_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/compare_and_swap_f08.F90 index d62dc914724..07f9080087b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/compare_and_swap_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/compare_and_swap_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2014 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -17,8 +17,8 @@ subroutine MPI_Compare_and_swap_f08(origin_addr,compare_addr,result_addr,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Win, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_compare_and_swap_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: origin_addr, compare_addr - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: result_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr, compare_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: result_addr TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, INTENT(IN) :: target_rank INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/exscan_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/exscan_init_f08.F90 similarity index 61% rename from ompi/mpiext/pcollreq/use-mpi-f08/exscan_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/exscan_init_f08.F90 index ceefd482f52..3669eb7d454 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/exscan_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/exscan_init_f08.F90 @@ -1,21 +1,23 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Exscan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Exscan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_exscan_init_f + use :: ompi_mpifh_bindings, only : ompi_exscan_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -25,8 +27,8 @@ subroutine MPIX_Exscan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,requ INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_exscan_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& + call ompi_exscan_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Exscan_init_f08 +end subroutine MPI_Exscan_init_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/f_sync_reg_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/f_sync_reg_f08.F90 index 6b100045f82..ec8d86c5cb6 100644 --- a/ompi/mpi/fortran/use-mpi-f08/f_sync_reg_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/f_sync_reg_f08.F90 @@ -15,7 +15,7 @@ subroutine MPI_F_sync_reg_f08(buf) use :: ompi_mpifh_bindings, only : ompi_f_sync_reg_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS:: buf call ompi_f_sync_reg_f(buf) diff --git a/ompi/mpi/fortran/use-mpi-f08/fetch_and_op_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/fetch_and_op_f08.F90 index e618a861bf0..4958a4019ac 100644 --- a/ompi/mpi/fortran/use-mpi-f08/fetch_and_op_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/fetch_and_op_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2014 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,8 +16,8 @@ subroutine MPI_Fetch_and_op_f08(origin_addr,result_addr,datatype,target_rank, & use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Win, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_fetch_and_op_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: origin_addr - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: result_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: result_addr TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, INTENT(IN) :: target_rank INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iread_all_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iread_all_f08.F90 index 6f28c453492..88a2f714bef 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iread_all_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iread_all_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_iread_all_f08(fh,buf,count,datatype,request,ierror) use :: ompi_mpifh_bindings, only : ompi_file_iread_all_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iread_at_all_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iread_at_all_f08.F90 index ce12f53ba16..b62a3de3ce4 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iread_at_all_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iread_at_all_f08.F90 @@ -17,7 +17,7 @@ subroutine MPI_File_iread_at_all_f08(fh,offset,buf,count,datatype,request,ierror implicit none TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iread_at_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iread_at_f08.F90 index 945fafd844b..177a855ba35 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iread_at_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iread_at_f08.F90 @@ -17,7 +17,7 @@ subroutine MPI_File_iread_at_f08(fh,offset,buf,count,datatype,request,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iread_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iread_f08.F90 index a370b062ad5..4b403188c9d 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iread_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iread_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_iread_f08(fh,buf,count,datatype,request,ierror) use :: ompi_mpifh_bindings, only : ompi_file_iread_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iread_shared_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iread_shared_f08.F90 index bd9bb3aa093..91e40cc4e52 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iread_shared_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iread_shared_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_iread_shared_f08(fh,buf,count,datatype,request,ierror) use :: ompi_mpifh_bindings, only : ompi_file_iread_shared_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_all_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_all_f08.F90 index 49eda45013c..e42f494ab10 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_all_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_all_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_iwrite_all_f08(fh,buf,count,datatype,request,ierror) use :: ompi_mpifh_bindings, only : ompi_file_iwrite_all_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_all_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_all_f08.F90 index 1d2d98fe5da..149c7ba6d5b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_all_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_all_f08.F90 @@ -17,7 +17,7 @@ subroutine MPI_File_iwrite_at_all_f08(fh,offset,buf,count,datatype,request,ierro implicit none TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_f08.F90 index 8f7a3f55697..08135a0bd2b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_f08.F90 @@ -17,7 +17,7 @@ subroutine MPI_File_iwrite_at_f08(fh,offset,buf,count,datatype,request,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_f08.F90 index 72d7b499730..e6e17ad77f3 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_iwrite_f08(fh,buf,count,datatype,request,ierror) use :: ompi_mpifh_bindings, only : ompi_file_iwrite_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_shared_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_shared_f08.F90 index 8f5500c664f..de15107a306 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_shared_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_shared_f08.F90 @@ -15,7 +15,7 @@ subroutine MPI_File_iwrite_shared_f08(fh,buf,count,datatype,request,ierror) use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request use :: ompi_mpifh_bindings, only : ompi_file_iwrite_shared_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf TYPE(MPI_File), INTENT(IN) :: fh INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype diff --git a/ompi/mpi/fortran/use-mpi-f08/file_read_all_begin_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_read_all_begin_f08.F90 index f4a8cb36842..a2b2060e246 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_read_all_begin_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_read_all_begin_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_read_all_begin_f08(fh,buf,count,datatype,ierror) use :: ompi_mpifh_bindings, only : ompi_file_read_all_begin_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_read_all_end_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_read_all_end_f08.F90 index 051ee558012..0a5d251a128 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_read_all_end_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_read_all_end_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_read_all_end_f08(fh,buf,status,ierror) use :: ompi_mpifh_bindings, only : ompi_file_read_all_end_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf TYPE(MPI_Status), INTENT(OUT) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_read_at_all_begin_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_read_at_all_begin_f08.F90 index 8e79583995f..8d4527e19bd 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_read_at_all_begin_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_read_at_all_begin_f08.F90 @@ -17,7 +17,7 @@ subroutine MPI_File_read_at_all_begin_f08(fh,offset,buf,count,datatype,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_read_at_all_end_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_read_at_all_end_f08.F90 index 3fc8c90d258..0cf1a58bda5 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_read_at_all_end_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_read_at_all_end_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_read_at_all_end_f08(fh,buf,status,ierror) use :: ompi_mpifh_bindings, only : ompi_file_read_at_all_end_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf TYPE(MPI_Status), INTENT(OUT) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_read_ordered_begin_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_read_ordered_begin_f08.F90 index 816ea90b39c..3f67832e930 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_read_ordered_begin_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_read_ordered_begin_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_read_ordered_begin_f08(fh,buf,count,datatype,ierror) use :: ompi_mpifh_bindings, only : ompi_file_read_ordered_begin_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_read_ordered_end_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_read_ordered_end_f08.F90 index 9dac5eaa06b..8ddde76a44d 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_read_ordered_end_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_read_ordered_end_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_read_ordered_end_f08(fh,buf,status,ierror) use :: ompi_mpifh_bindings, only : ompi_file_read_ordered_end_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf TYPE(MPI_Status), INTENT(OUT) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_write_all_begin_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_write_all_begin_f08.F90 index 7e3f2567dde..6bdbc0ec7ca 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_write_all_begin_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_write_all_begin_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_write_all_begin_f08(fh,buf,count,datatype,ierror) use :: ompi_mpifh_bindings, only : ompi_file_write_all_begin_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_write_all_end_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_write_all_end_f08.F90 index 530422099f8..8a42355052a 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_write_all_end_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_write_all_end_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_write_all_end_f08(fh,buf,status,ierror) use :: ompi_mpifh_bindings, only : ompi_file_write_all_end_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf TYPE(MPI_Status), INTENT(OUT) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_write_at_all_begin_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_write_at_all_begin_f08.F90 index 13a2677d8d3..22a80139d15 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_write_at_all_begin_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_write_at_all_begin_f08.F90 @@ -17,7 +17,7 @@ subroutine MPI_File_write_at_all_begin_f08(fh,offset,buf,count,datatype,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_write_at_all_end_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_write_at_all_end_f08.F90 index 087a03ea5fd..5a180b34252 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_write_at_all_end_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_write_at_all_end_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_write_at_all_end_f08(fh,buf,status,ierror) use :: ompi_mpifh_bindings, only : ompi_file_write_at_all_end_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf TYPE(MPI_Status), INTENT(OUT) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_write_ordered_begin_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_write_ordered_begin_f08.F90 index 0e1aab6f447..cec70707280 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_write_ordered_begin_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_write_ordered_begin_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_write_ordered_begin_f08(fh,buf,count,datatype,ierror) use :: ompi_mpifh_bindings, only : ompi_file_write_ordered_begin_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/file_write_ordered_end_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_write_ordered_end_f08.F90 index 7a531313270..2226ec5b5d8 100644 --- a/ompi/mpi/fortran/use-mpi-f08/file_write_ordered_end_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/file_write_ordered_end_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_File_write_ordered_end_f08(fh,buf,status,ierror) use :: ompi_mpifh_bindings, only : ompi_file_write_ordered_end_f implicit none TYPE(MPI_File), INTENT(IN) :: fh - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf TYPE(MPI_Status), INTENT(OUT) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/gather_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/gather_init_f08.F90 similarity index 64% rename from ompi/mpiext/pcollreq/use-mpi-f08/gather_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/gather_init_f08.F90 index a5e93e9f1cc..d18bfd1bc19 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/gather_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/gather_init_f08.F90 @@ -1,22 +1,24 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Gather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,& +#include "mpi-f08-rename.h" + +subroutine MPI_Gather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,& recvtype,root,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_gather_init_f + use :: ompi_mpifh_bindings, only : ompi_gather_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount, root TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype @@ -26,8 +28,8 @@ subroutine MPIX_Gather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,& INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_gather_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcount,& + call ompi_gather_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcount,& recvtype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Gather_init_f08 +end subroutine MPI_Gather_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/gatherv_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/gatherv_init_f08.F90 similarity index 61% rename from ompi/mpiext/pcollreq/use-mpi-f08/gatherv_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/gatherv_init_f08.F90 index 411b557a231..5ab996416ed 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/gatherv_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/gatherv_init_f08.F90 @@ -1,24 +1,26 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Gatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& +#include "mpi-f08-rename.h" + +subroutine MPI_Gatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& displs,recvtype,root,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_gatherv_init_f + use :: ompi_mpifh_bindings, only : ompi_gatherv_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -27,8 +29,8 @@ subroutine MPIX_Gatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_gatherv_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcounts,& + call ompi_gatherv_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcounts,& displs,recvtype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Gatherv_init_f08 +end subroutine MPI_Gatherv_init_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/get_accumulate_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/get_accumulate_f08.F90 index 583e4e00008..9fd4f2ead5b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/get_accumulate_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/get_accumulate_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2014 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -18,10 +18,10 @@ subroutine MPI_Get_accumulate_f08(origin_addr,origin_count,origin_datatype,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Win, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_get_accumulate_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, result_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: result_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: result_addr TYPE(MPI_Datatype), INTENT(IN) :: result_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp TYPE(MPI_Datatype), INTENT(IN) :: target_datatype diff --git a/ompi/mpi/fortran/use-mpi-f08/get_address_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/get_address_f08.F90 index e17d200c1ed..187086d0fc5 100644 --- a/ompi/mpi/fortran/use-mpi-f08/get_address_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/get_address_f08.F90 @@ -15,7 +15,7 @@ subroutine MPI_Get_address_f08(location,address,ierror) use :: mpi_f08_types, only : MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_get_address_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: location + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: location INTEGER(MPI_ADDRESS_KIND), INTENT(OUT) :: address INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/get_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/get_f08.F90 index 38fa5337c35..a51ca425ccd 100644 --- a/ompi/mpi/fortran/use-mpi-f08/get_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/get_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2018 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,7 +16,7 @@ subroutine MPI_Get_f08(origin_addr,origin_count,origin_datatype,target_rank,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Win, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_get_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp diff --git a/ompi/mpi/fortran/use-mpi-f08/group_from_session_pset_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/group_from_session_pset_f08.F90 new file mode 100644 index 00000000000..77cfed44a23 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/group_from_session_pset_f08.F90 @@ -0,0 +1,29 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2021 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" +#include "mpi-f08-rename.h" + +subroutine MPI_Group_from_session_pset_f08(session, pset_name, newgroup, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Group + use :: ompi_mpifh_bindings, only : ompi_group_from_session_pset_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Group), INTENT(OUT) :: newgroup + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_group_from_session_pset_f(session%MPI_VAL, pset_name, newgroup%MPI_VAL, c_ierror, len(pset_name)) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Group_from_session_pset_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/iallgather_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/iallgather_f08.F90 index e6eb5d07ecc..f178b948529 100644 --- a/ompi/mpi/fortran/use-mpi-f08/iallgather_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/iallgather_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -15,8 +15,8 @@ subroutine MPI_Iallgather_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvt use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_iallgather_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype diff --git a/ompi/mpi/fortran/use-mpi-f08/iallgatherv_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/iallgatherv_f08.F90 index e9b4b2af7de..3d44e27b0c7 100644 --- a/ompi/mpi/fortran/use-mpi-f08/iallgatherv_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/iallgatherv_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,10 +16,10 @@ subroutine MPI_Iallgatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_iallgatherv_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/iallreduce_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/iallreduce_f08.F90 index e1319a59aef..e0bbffec347 100644 --- a/ompi/mpi/fortran/use-mpi-f08/iallreduce_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/iallreduce_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -15,8 +15,8 @@ subroutine MPI_Iallreduce_f08(sendbuf,recvbuf,count,datatype,op,comm,request,ier use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_iallreduce_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op diff --git a/ompi/mpi/fortran/use-mpi-f08/ialltoall_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ialltoall_f08.F90 index 7a4963ae108..3df84b0352d 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ialltoall_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ialltoall_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,8 +16,8 @@ subroutine MPI_Ialltoall_f08(sendbuf,sendcount,sendtype,recvbuf,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ialltoall_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype diff --git a/ompi/mpi/fortran/use-mpi-f08/ialltoallv_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ialltoallv_f08.F90 index c5b91bc499b..65bc9858931 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ialltoallv_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ialltoallv_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,9 +16,9 @@ subroutine MPI_Ialltoallv_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ialltoallv_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/ialltoallw_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ialltoallw_f08.F90 index 696a828e24e..c5432df5815 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ialltoallw_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ialltoallw_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -17,10 +17,10 @@ subroutine MPI_Ialltoallw_f08(sendbuf,sendcounts,sdispls,sendtypes,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ialltoallw_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: sendtypes(*), recvtypes(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + TYPE(MPI_Datatype), INTENT(IN) OMPI_ASYNCHRONOUS :: sendtypes(*), recvtypes(*) TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/ibcast_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ibcast_f08.F90 index 9d8492e69a7..c6778c3173b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ibcast_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ibcast_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,7 +16,7 @@ subroutine MPI_Ibcast_f08(buffer,count,datatype,root,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ibcast_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: buffer + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buffer INTEGER, INTENT(IN) :: count, root TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/ibsend_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ibsend_f08.F90 index f89926bed6e..9e51ddeb526 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ibsend_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ibsend_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -15,7 +15,7 @@ subroutine MPI_Ibsend_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ibsend_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/iexscan_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/iexscan_f08.F90 index aee9b90b8cf..072b3d15e72 100644 --- a/ompi/mpi/fortran/use-mpi-f08/iexscan_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/iexscan_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -15,8 +15,8 @@ subroutine MPI_Iexscan_f08(sendbuf,recvbuf,count,datatype,op,comm,request,ierror use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_iexscan_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op diff --git a/ompi/mpi/fortran/use-mpi-f08/igather_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/igather_f08.F90 index e86dbe4e699..f69ddca8ef2 100644 --- a/ompi/mpi/fortran/use-mpi-f08/igather_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/igather_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,8 +16,8 @@ subroutine MPI_Igather_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_igather_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount, root TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype diff --git a/ompi/mpi/fortran/use-mpi-f08/igatherv_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/igatherv_f08.F90 index 65577d4559c..5a8d817deb7 100644 --- a/ompi/mpi/fortran/use-mpi-f08/igatherv_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/igatherv_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,10 +16,10 @@ subroutine MPI_Igatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_igatherv_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/improbe_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/improbe_f08.F90 index 4a5b870464f..e1039819975 100644 --- a/ompi/mpi/fortran/use-mpi-f08/improbe_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/improbe_f08.F90 @@ -17,7 +17,7 @@ subroutine MPI_Improbe_f08(source,tag,comm,flag,message,status,ierror) TYPE(MPI_Comm), INTENT(IN) :: comm LOGICAL, INTENT(OUT) :: flag TYPE(MPI_Message), INTENT(OUT) :: message - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/ineighbor_allgather_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ineighbor_allgather_f08.F90 index 2db49aff9f4..d487136a03b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ineighbor_allgather_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ineighbor_allgather_f08.F90 @@ -16,8 +16,8 @@ subroutine MPI_Ineighbor_allgather_f08(sendbuf,sendcount,sendtype,recvbuf,recvco use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ineighbor_allgather_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype diff --git a/ompi/mpi/fortran/use-mpi-f08/ineighbor_allgatherv_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ineighbor_allgatherv_f08.F90 index f87252202bb..74b606c4ae3 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ineighbor_allgatherv_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ineighbor_allgatherv_f08.F90 @@ -17,10 +17,10 @@ subroutine MPI_Ineighbor_allgatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvc use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ineighbor_allgatherv_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN) :: recvcounts(*), displs(*) + INTEGER OMPI_ASYNCHRONOUS, INTENT(IN) :: recvcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoall_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoall_f08.F90 index 71e59019e90..e87e9d567d2 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoall_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoall_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2013 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -17,8 +17,8 @@ subroutine MPI_Ineighbor_alltoall_f08(sendbuf,sendcount,sendtype,recvbuf,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ineighbor_alltoall_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype diff --git a/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoallv_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoallv_f08.F90 index fff34a9a416..ed73dae8593 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoallv_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoallv_f08.F90 @@ -17,9 +17,9 @@ subroutine MPI_Ineighbor_alltoallv_f08(sendbuf,sendcounts,sdispls,sendtype,recvb use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ineighbor_alltoallv_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - INTEGER, INTENT(IN) :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoallw_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoallw_f08.F90 index a017ae3d255..6a3970b4f8e 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoallw_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ineighbor_alltoallw_f08.F90 @@ -16,12 +16,12 @@ subroutine MPI_Ineighbor_alltoallw_f08(sendbuf,sendcounts,sdispls,sendtypes,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_ineighbor_alltoallw_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - INTEGER, INTENT(IN) :: sendcounts(*), recvcounts(*) - INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: sdispls(*), rdispls(*) - TYPE(MPI_Datatype), INTENT(IN) :: sendtypes(*) - TYPE(MPI_Datatype), INTENT(IN) :: recvtypes(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER OMPI_ASYNCHRONOUS, INTENT(IN) :: sendcounts(*), recvcounts(*) + INTEGER(MPI_ADDRESS_KIND) OMPI_ASYNCHRONOUS, INTENT(IN) :: sdispls(*), rdispls(*) + TYPE(MPI_Datatype) OMPI_ASYNCHRONOUS, INTENT(IN) :: sendtypes(*) + TYPE(MPI_Datatype) OMPI_ASYNCHRONOUS, INTENT(IN) :: recvtypes(*) TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/info_get_string_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/info_get_string_f08.F90 new file mode 100644 index 00000000000..19e97056b05 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/info_get_string_f08.F90 @@ -0,0 +1,28 @@ +! -*- f90 -*- +! +! Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2012 Los Alamos National Security, LLC. +! All Rights reserved. +! Copyright (c) 2019-2020 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! $COPYRIGHT$ + +#include "mpi-f08-rename.h" + +subroutine MPI_Info_get_string_f08(info,key,buflen,value,flag,ierror) + use :: mpi_f08_types, only : MPI_Info + ! See note in mpi-f-interfaces-bind.h for why we "use mpi" here and + ! call a PMPI_* subroutine below. + use :: mpi, only : PMPI_Info_get_string + implicit none + TYPE(MPI_Info), INTENT(IN) :: info + CHARACTER(LEN=*), INTENT(IN) :: key + INTEGER, INTENT(INOUT) :: buflen + CHARACTER(LEN=*), INTENT(OUT) :: value + LOGICAL, INTENT(OUT) :: flag + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call PMPI_Info_get_string(info%MPI_VAL,key,buflen,value,flag,c_ierror) + if (present(ierror)) ierror = c_ierror +end subroutine MPI_Info_get_string_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/intercomm_create_from_groups_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/intercomm_create_from_groups_f08.F90 new file mode 100644 index 00000000000..9b92a9db9ab --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/intercomm_create_from_groups_f08.F90 @@ -0,0 +1,35 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Intercomm_create_from_groups_f08(local_group, local_leader, remote_group, & + remote_leader, stringtag, info, errhandler, & + newintercomm, ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Group, MPI_Errhandler, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_intercomm_create_from_groups_f + implicit none + TYPE(MPI_Group), INTENT(IN) :: local_group, remote_group + INTEGER, INTENT(IN):: local_leader, remote_leader + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newintercomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_intercomm_create_from_groups_f(local_group%MPI_VAL, local_leader, & + remote_group%MPI_VAL, & + remote_leader, stringtag, info%MPI_VAL, & + errhandler%MPI_VAL, & + newintercomm%MPI_VAL, c_ierror, len(stringtag)) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Intercomm_create_from_groups_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/iprobe_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/iprobe_f08.F90 index 9d44c3c0673..b53af9a8efd 100644 --- a/ompi/mpi/fortran/use-mpi-f08/iprobe_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/iprobe_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_Iprobe_f08(source,tag,comm,flag,status,ierror) INTEGER, INTENT(IN) :: source, tag TYPE(MPI_Comm), INTENT(IN) :: comm LOGICAL, INTENT(OUT) :: flag - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror @@ -29,7 +29,7 @@ subroutine PMPI_Iprobe(source, tag, comm, flag, status, ierror) integer, intent(in) :: tag integer, intent(in) :: comm logical, intent(out) :: flag - TYPE(MPI_Status), intent(out) :: status + TYPE(MPI_Status) :: status integer, intent(out) :: ierror end subroutine PMPI_Iprobe end interface diff --git a/ompi/mpi/fortran/use-mpi-f08/ireduce_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ireduce_f08.F90 index 818efd186a0..f69dd777fb9 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ireduce_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ireduce_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -15,8 +15,8 @@ subroutine MPI_Ireduce_f08(sendbuf,recvbuf,count,datatype,op,root,comm,request,i use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ireduce_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count, root TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op diff --git a/ompi/mpi/fortran/use-mpi-f08/ireduce_scatter_block_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ireduce_scatter_block_f08.F90 index a44d2cf54ba..0e2a3906131 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ireduce_scatter_block_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ireduce_scatter_block_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -15,8 +15,8 @@ subroutine MPI_Ireduce_scatter_block_f08(sendbuf,recvbuf,recvcount,datatype,op,c use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ireduce_scatter_block_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: recvcount TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op diff --git a/ompi/mpi/fortran/use-mpi-f08/ireduce_scatter_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ireduce_scatter_f08.F90 index 87bb26eeb25..8f1c4db8d46 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ireduce_scatter_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ireduce_scatter_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -15,9 +15,9 @@ subroutine MPI_Ireduce_scatter_f08(sendbuf,recvbuf,recvcounts,datatype,op,comm,r use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ireduce_scatter_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*) TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/iscan_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/iscan_f08.F90 index bf8a8524180..5504b6f16c0 100644 --- a/ompi/mpi/fortran/use-mpi-f08/iscan_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/iscan_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -15,8 +15,8 @@ subroutine MPI_Iscan_f08(sendbuf,recvbuf,count,datatype,op,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_iscan_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op diff --git a/ompi/mpi/fortran/use-mpi-f08/iscatter_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/iscatter_f08.F90 index 63c6ded5a47..3849525959e 100644 --- a/ompi/mpi/fortran/use-mpi-f08/iscatter_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/iscatter_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,8 +16,8 @@ subroutine MPI_Iscatter_f08(sendbuf,sendcount,sendtype,recvbuf,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_iscatter_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount, root TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype diff --git a/ompi/mpi/fortran/use-mpi-f08/iscatterv_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/iscatterv_f08.F90 index 1d7e81447e9..ddc1b429a69 100644 --- a/ompi/mpi/fortran/use-mpi-f08/iscatterv_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/iscatterv_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,10 +16,10 @@ subroutine MPI_Iscatterv_f08(sendbuf,sendcounts,displs,sendtype,recvbuf,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_iscatterv_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: recvcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/isendrecv_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/isendrecv_f08.F90 new file mode 100644 index 00000000000..0975c50f4ab --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/isendrecv_f08.F90 @@ -0,0 +1,36 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2012 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018-2020 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +#include "mpi-f08-rename.h" + +subroutine MPI_Isendrecv_f08(sendbuf,sendcount,sendtype,dest,sendtag,recvbuf, & + recvcount,recvtype,source,recvtag,comm,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request + use :: ompi_mpifh_bindings, only : ompi_isendrecv_f + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf + INTEGER, INTENT(IN) :: sendcount, dest, sendtag, recvcount, source, recvtag + TYPE(MPI_Datatype), INTENT(IN) :: sendtype + TYPE(MPI_Datatype), INTENT(IN) :: recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_isendrecv_f(sendbuf,sendcount,sendtype%MPI_VAL,dest,sendtag,recvbuf, & + recvcount,recvtype%MPI_VAL,source,recvtag,comm%MPI_VAL,request%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Isendrecv_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/isendrecv_replace_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/isendrecv_replace_f08.F90 new file mode 100644 index 00000000000..91a18ca4ac6 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/isendrecv_replace_f08.F90 @@ -0,0 +1,34 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2012 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018-2020 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +#include "mpi-f08-rename.h" + +subroutine MPI_Isendrecv_replace_f08(buf,count,datatype,dest,sendtag,source, & + recvtag,comm,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request + use :: ompi_mpifh_bindings, only : ompi_isendrecv_replace_f + implicit none + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf + INTEGER, INTENT(IN) :: count, dest, sendtag, source, recvtag + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_isendrecv_replace_f(buf,count,datatype%MPI_VAL,dest,sendtag,source, & + recvtag,comm%MPI_VAL,request%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Isendrecv_replace_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/issend_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/issend_f08.F90 index f162a4e5839..d9ba894d04d 100644 --- a/ompi/mpi/fortran/use-mpi-f08/issend_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/issend_f08.F90 @@ -15,7 +15,7 @@ subroutine MPI_Issend_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_issend_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.F90 b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.F90 index 2bbd07eb5f9..71cefb1f128 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.F90 @@ -10,6 +10,8 @@ ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2017-2018 FUJITSU LIMITED. All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.h.in b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.h.in index db1e96f22bd..f67a295eacc 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.h.in +++ b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.h.in @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2015 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2012 The University of Tennessee and The University @@ -10,6 +10,8 @@ ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2017-2018 FUJITSU LIMITED. All rights reserved. +! Copyright (c) 2021 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran @@ -35,7 +37,7 @@ subroutine MPI_Bsend_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -48,7 +50,7 @@ interface MPI_Buffer_attach subroutine MPI_Buffer_attach_f08(buffer,size,ierror) implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buffer - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buffer + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buffer INTEGER, INTENT(IN) :: size INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Buffer_attach_f08 @@ -89,7 +91,7 @@ subroutine MPI_Ibsend_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -105,7 +107,7 @@ subroutine MPI_Iprobe_f08(source,tag,comm,flag,status,ierror) INTEGER, INTENT(IN) :: source, tag TYPE(MPI_Comm), INTENT(IN) :: comm LOGICAL, INTENT(OUT) :: flag - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Iprobe_f08 end interface MPI_Iprobe @@ -115,7 +117,7 @@ subroutine MPI_Irecv_f08(buf,count,datatype,source,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, source, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -129,7 +131,7 @@ subroutine MPI_Irsend_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -143,7 +145,7 @@ subroutine MPI_Isend_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -152,12 +154,43 @@ subroutine MPI_Isend_f08(buf,count,datatype,dest,tag,comm,request,ierror) end subroutine MPI_Isend_f08 end interface MPI_Isend +interface MPI_Isendrecv +subroutine MPI_Isendrecv_f08(sendbuf,sendcount,sendtype,dest,sendtag,recvbuf, & + recvcount,recvtype,source,recvtag,comm,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + INTEGER, INTENT(IN) :: sendcount, dest, sendtag, recvcount, source, recvtag + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Isendrecv_f08 +end interface MPI_Isendrecv + +interface MPI_Isendrecv_replace +subroutine MPI_Isendrecv_replace_f08(buf,count,datatype,dest,sendtag,source,recvtag, & + comm,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + INTEGER, INTENT(IN) :: count, dest, sendtag, source, recvtag + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Isendrecv_replace_f08 +end interface MPI_Isendrecv_replace + interface MPI_Issend subroutine MPI_Issend_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -250,7 +283,7 @@ subroutine MPI_Probe_f08(source,tag,comm,status,ierror) implicit none INTEGER, INTENT(IN) :: source, tag TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Probe_f08 end interface MPI_Probe @@ -274,7 +307,7 @@ subroutine MPI_Recv_init_f08(buf,count,datatype,source,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, source, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -321,7 +354,7 @@ subroutine MPI_Rsend_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -379,7 +412,7 @@ subroutine MPI_Send_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -388,6 +421,71 @@ subroutine MPI_Send_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) end subroutine MPI_Send_init_f08 end interface MPI_Send_init +interface MPI_Session_get_info +subroutine MPI_Session_get_info_f08(session, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_info_f08 +end interface MPI_Session_get_info + +interface MPI_Session_get_nth_pset +subroutine MPI_Session_get_nth_pset_f08(session, info, n, pset_len, pset_name, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, INTENT(IN) :: n + INTEGER, INTENT(INOUT) :: pset_len + CHARACTER(LEN=*), INTENT(OUT) :: pset_name + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_nth_pset_f08 +end interface MPI_Session_get_nth_pset + +interface MPI_Session_get_num_psets +subroutine MPI_Session_get_num_psets_f08(session, info, npset_names, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, INTENT(OUT) :: npset_names + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_num_psets_f08 +end interface MPI_Session_get_num_psets + +interface MPI_Session_get_pset_info +subroutine MPI_Session_get_pset_info_f08(session, pset_name, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_pset_info_f08 +end interface MPI_Session_get_pset_info + +interface MPI_Session_init +subroutine MPI_Session_init_f08(info,errhandler,session,ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_Errhandler + implicit none + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_init_f08 +end interface MPI_Session_init + +interface MPI_Session_finalize +subroutine MPI_Session_finalize_f08(session,ierror) + use :: mpi_f08_types, only : MPI_Session + implicit none + TYPE(MPI_Session), INTENT(INOUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_finalize_f08 +end interface MPI_Session_finalize + interface MPI_Ssend subroutine MPI_Ssend_f08(buf,count,datatype,dest,tag,comm,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm @@ -406,7 +504,7 @@ subroutine MPI_Ssend_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -544,7 +642,7 @@ subroutine MPI_Get_address_f08(location,address,ierror) use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ location - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: location + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: location INTEGER(MPI_ADDRESS_KIND), INTENT(OUT) :: address INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Get_address_f08 @@ -939,8 +1037,8 @@ subroutine MPI_Iallgather_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvt use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -949,6 +1047,23 @@ subroutine MPI_Iallgather_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvt end subroutine MPI_Iallgather_f08 end interface MPI_Iallgather +interface MPI_Allgather_init +subroutine MPI_Allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & + comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Allgather_init_f08 +end interface MPI_Allgather_init + interface MPI_Allgatherv subroutine MPI_Allgatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs, & recvtype,comm,ierror) @@ -971,10 +1086,10 @@ subroutine MPI_Iallgatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,dis use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request @@ -982,6 +1097,24 @@ subroutine MPI_Iallgatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,dis end subroutine MPI_Iallgatherv_f08 end interface MPI_Iallgatherv +interface MPI_Allgatherv_init +subroutine MPI_Allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs, & + recvtype,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Allgatherv_init_f08 +end interface MPI_Allgatherv_init + interface MPI_Allreduce subroutine MPI_Allreduce_f08(sendbuf,recvbuf,count,datatype,op,comm,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm @@ -1002,8 +1135,8 @@ subroutine MPI_Iallreduce_f08(sendbuf,recvbuf,count,datatype,op,comm,request,ier use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -1013,6 +1146,23 @@ subroutine MPI_Iallreduce_f08(sendbuf,recvbuf,count,datatype,op,comm,request,ier end subroutine MPI_Iallreduce_f08 end interface MPI_Iallreduce +interface MPI_Allreduce_init +subroutine MPI_Allreduce_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Op), INTENT(IN) :: op + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Allreduce_init_f08 +end interface MPI_Allreduce_init + interface MPI_Alltoall subroutine MPI_Alltoall_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & comm,ierror) @@ -1034,9 +1184,9 @@ subroutine MPI_Ialltoall_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvty use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcount, recvcount + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request @@ -1044,6 +1194,23 @@ subroutine MPI_Ialltoall_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvty end subroutine MPI_Ialltoall_f08 end interface MPI_Ialltoall +interface MPI_Alltoall_init +subroutine MPI_Alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & + comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcount, recvcount + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Alltoall_init_f08 +end interface MPI_Alltoall_init + interface MPI_Alltoallv subroutine MPI_Alltoallv_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,recvcounts, & rdispls,recvtype,comm,ierror) @@ -1065,9 +1232,9 @@ subroutine MPI_Ialltoallv_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,recvco use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request @@ -1075,6 +1242,23 @@ subroutine MPI_Ialltoallv_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,recvco end subroutine MPI_Ialltoallv_f08 end interface MPI_Ialltoallv +interface MPI_Alltoallv_init +subroutine MPI_Alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,recvcounts, & + rdispls,recvtype,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Alltoallv_init_f08 +end interface MPI_Alltoallv_init + interface MPI_Alltoallw subroutine MPI_Alltoallw_f08(sendbuf,sendcounts,sdispls,sendtypes,recvbuf,recvcounts, & rdispls,recvtypes,comm,ierror) @@ -1096,16 +1280,33 @@ subroutine MPI_Ialltoallw_f08(sendbuf,sendcounts,sdispls,sendtypes,recvbuf,recvc use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: sendtypes(*), recvtypes(*) + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + TYPE(MPI_Datatype), INTENT(IN) OMPI_ASYNCHRONOUS :: sendtypes(*), recvtypes(*) TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Ialltoallw_f08 end interface MPI_Ialltoallw +interface MPI_Alltoallw_init +subroutine MPI_Alltoallw_init_f08(sendbuf,sendcounts,sdispls,sendtypes,recvbuf,recvcounts, & + rdispls,recvtypes,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + TYPE(MPI_Datatype), INTENT(IN) OMPI_ASYNCHRONOUS :: sendtypes(*), recvtypes(*) + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Alltoallw_init_f08 +end interface MPI_Alltoallw_init + interface MPI_Barrier subroutine MPI_Barrier_f08(comm,ierror) use :: mpi_f08_types, only : MPI_Comm @@ -1125,6 +1326,17 @@ subroutine MPI_Ibarrier_f08(comm,request,ierror) end subroutine MPI_Ibarrier_f08 end interface MPI_Ibarrier +interface MPI_Barrier_init +subroutine MPI_Barrier_init_f08(comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Info, MPI_Request + implicit none + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Barrier_init_f08 +end interface MPI_Barrier_init + interface MPI_Bcast subroutine MPI_Bcast_f08(buffer,count,datatype,root,comm,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm @@ -1143,7 +1355,7 @@ subroutine MPI_Ibcast_f08(buffer,count,datatype,root,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buffer - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: buffer + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buffer INTEGER, INTENT(IN) :: count, root TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -1152,6 +1364,21 @@ subroutine MPI_Ibcast_f08(buffer,count,datatype,root,comm,request,ierror) end subroutine MPI_Ibcast_f08 end interface MPI_Ibcast +interface MPI_Bcast_init +subroutine MPI_Bcast_init_f08(buffer,count,datatype,root,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buffer + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buffer + INTEGER, INTENT(IN) :: count, root + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Bcast_init_f08 +end interface MPI_Bcast_init + interface MPI_Exscan subroutine MPI_Exscan_f08(sendbuf,recvbuf,count,datatype,op,comm,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm @@ -1172,8 +1399,8 @@ subroutine MPI_Iexscan_f08(sendbuf,recvbuf,count,datatype,op,comm,request,ierror use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -1183,6 +1410,23 @@ subroutine MPI_Iexscan_f08(sendbuf,recvbuf,count,datatype,op,comm,request,ierror end subroutine MPI_Iexscan_f08 end interface MPI_Iexscan +interface MPI_Exscan_init +subroutine MPI_Exscan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Op), INTENT(IN) :: op + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Exscan_init_f08 +end interface MPI_Exscan_init + interface MPI_Gather subroutine MPI_Gather_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & root,comm,ierror) @@ -1204,8 +1448,8 @@ subroutine MPI_Igather_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount, root TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -1214,6 +1458,23 @@ subroutine MPI_Igather_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype end subroutine MPI_Igather_f08 end interface MPI_Igather +interface MPI_Gather_init +subroutine MPI_Gather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & + root,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount, root + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Gather_init_f08 +end interface MPI_Gather_init + interface MPI_Gatherv subroutine MPI_Gatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs, & recvtype,root,comm,ierror) @@ -1236,10 +1497,10 @@ subroutine MPI_Igatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request @@ -1247,6 +1508,24 @@ subroutine MPI_Igatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs end subroutine MPI_Igatherv_f08 end interface MPI_Igatherv +interface MPI_Gatherv_init +subroutine MPI_Gatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs, & + recvtype,root,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, root + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Gatherv_init_f08 +end interface MPI_Gatherv_init + interface MPI_Op_commutative subroutine MPI_Op_commutative_f08(op,commute,ierror) use :: mpi_f08_types, only : MPI_Op @@ -1298,8 +1577,8 @@ subroutine MPI_Ireduce_f08(sendbuf,recvbuf,count,datatype,op,root,comm,request,i use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count, root TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -1309,6 +1588,23 @@ subroutine MPI_Ireduce_f08(sendbuf,recvbuf,count,datatype,op,root,comm,request,i end subroutine MPI_Ireduce_f08 end interface MPI_Ireduce +interface MPI_Reduce_init +subroutine MPI_Reduce_init_f08(sendbuf,recvbuf,count,datatype,op,root,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: count, root + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Op), INTENT(IN) :: op + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Reduce_init_f08 +end interface MPI_Reduce_init + interface MPI_Reduce_local subroutine MPI_Reduce_local_f08(inbuf,inoutbuf,count,datatype,op,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op @@ -1345,9 +1641,9 @@ subroutine MPI_Ireduce_scatter_f08(sendbuf,recvbuf,recvcounts,datatype,op,comm, use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*) + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*) TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op TYPE(MPI_Comm), INTENT(IN) :: comm @@ -1356,6 +1652,24 @@ subroutine MPI_Ireduce_scatter_f08(sendbuf,recvbuf,recvcounts,datatype,op,comm, end subroutine MPI_Ireduce_scatter_f08 end interface MPI_Ireduce_scatter +interface MPI_Reduce_scatter_init +subroutine MPI_Reduce_scatter_init_f08(sendbuf,recvbuf,recvcounts,datatype,op,comm, & + info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*) + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Op), INTENT(IN) :: op + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Reduce_scatter_init_f08 +end interface MPI_Reduce_scatter_init + interface MPI_Reduce_scatter_block subroutine MPI_Reduce_scatter_block_f08(sendbuf,recvbuf,recvcount,datatype,op,comm, & ierror) @@ -1378,8 +1692,8 @@ subroutine MPI_Ireduce_scatter_block_f08(sendbuf,recvbuf,recvcount,datatype,op,c use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: recvcount TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -1389,6 +1703,24 @@ subroutine MPI_Ireduce_scatter_block_f08(sendbuf,recvbuf,recvcount,datatype,op,c end subroutine MPI_Ireduce_scatter_block_f08 end interface MPI_Ireduce_scatter_block +interface MPI_Reduce_scatter_block_init +subroutine MPI_Reduce_scatter_block_init_f08(sendbuf,recvbuf,recvcount,datatype,op,comm, & + info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: recvcount + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Op), INTENT(IN) :: op + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Reduce_scatter_block_init_f08 +end interface MPI_Reduce_scatter_block_init + interface MPI_Scan subroutine MPI_Scan_f08(sendbuf,recvbuf,count,datatype,op,comm,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm @@ -1409,8 +1741,8 @@ subroutine MPI_Iscan_f08(sendbuf,recvbuf,count,datatype,op,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -1420,6 +1752,23 @@ subroutine MPI_Iscan_f08(sendbuf,recvbuf,count,datatype,op,comm,request,ierror) end subroutine MPI_Iscan_f08 end interface MPI_Iscan +interface MPI_Scan_init +subroutine MPI_Scan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Op), INTENT(IN) :: op + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Scan_init_f08 +end interface MPI_Scan_init + interface MPI_Scatter subroutine MPI_Scatter_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & root,comm,ierror) @@ -1441,8 +1790,8 @@ subroutine MPI_Iscatter_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtyp use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount, root TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -1451,6 +1800,23 @@ subroutine MPI_Iscatter_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtyp end subroutine MPI_Iscatter_f08 end interface MPI_Iscatter +interface MPI_Scatter_init +subroutine MPI_Scatter_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & + root,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount, root + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Scatter_init_f08 +end interface MPI_Scatter_init + interface MPI_Scatterv subroutine MPI_Scatterv_f08(sendbuf,sendcounts,displs,sendtype,recvbuf,recvcount, & recvtype,root,comm,ierror) @@ -1473,10 +1839,10 @@ subroutine MPI_Iscatterv_f08(sendbuf,sendcounts,displs,sendtype,recvbuf,recvcoun use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN), ASYNCHRONOUS :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, ASYNCHRONOUS :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: recvcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request @@ -1484,6 +1850,24 @@ subroutine MPI_Iscatterv_f08(sendbuf,sendcounts,displs,sendtype,recvbuf,recvcoun end subroutine MPI_Iscatterv_f08 end interface MPI_Iscatterv +interface MPI_Scatterv_init +subroutine MPI_Scatterv_init_f08(sendbuf,sendcounts,displs,sendtype,recvbuf,recvcount, & + recvtype,root,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: recvcount, root + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), displs(*) + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Scatterv_init_f08 +end interface MPI_Scatterv_init + interface MPI_Comm_compare subroutine MPI_Comm_compare_f08(comm1,comm2,result,ierror) use :: mpi_f08_types, only : MPI_Comm @@ -1506,6 +1890,20 @@ subroutine MPI_Comm_create_f08(comm,group,newcomm,ierror) end subroutine MPI_Comm_create_f08 end interface MPI_Comm_create +interface MPI_Comm_create_from_group +subroutine MPI_Comm_create_from_group_f08(group, stringtag, info, errhandler, newcomm, ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Group, MPI_Info, MPI_Errhandler + implicit none + TYPE(MPI_Group), INTENT(IN) :: group + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newcomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + +end subroutine MPI_Comm_create_from_group_f08 +end interface MPI_Comm_create_from_group + interface MPI_Comm_create_group subroutine MPI_Comm_create_group_f08(comm,group,tag,newcomm,ierror) use :: mpi_f08_types, only : MPI_Comm, MPI_Group @@ -1569,12 +1967,25 @@ subroutine MPI_Comm_idup_f08(comm,newcomm,request,ierror) use :: mpi_f08_types, only : MPI_Comm, MPI_Request implicit none TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Comm), INTENT(OUT) :: newcomm + TYPE(MPI_Comm), INTENT(OUT) OMPI_ASYNCHRONOUS :: newcomm TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Comm_idup_f08 end interface MPI_Comm_idup +interface MPI_Comm_idup_with_info +subroutine MPI_Comm_idup_with_info_f08(comm,info,newcomm,request,ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Request, MPI_info + implicit none + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Comm), INTENT(OUT) :: newcomm + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Comm_idup_with_info_f08 +end interface MPI_Comm_idup_with_info + + interface MPI_Comm_free subroutine MPI_Comm_free_f08(comm,ierror) use :: mpi_f08_types, only : MPI_Comm @@ -1767,6 +2178,17 @@ subroutine MPI_Group_free_f08(group,ierror) end subroutine MPI_Group_free_f08 end interface MPI_Group_free +interface MPI_Group_from_session_pset +subroutine MPI_Group_from_session_pset_f08(session, pset_name, newgroup, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Group + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Group), INTENT(OUT) :: newgroup + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Group_from_session_pset_f08 +end interface MPI_Group_from_session_pset + interface MPI_Group_incl subroutine MPI_Group_incl_f08(group,n,ranks,newgroup,ierror) use :: mpi_f08_types, only : MPI_Group @@ -2028,11 +2450,11 @@ end subroutine MPI_Win_get_attr_f08 end interface MPI_Win_get_attr interface MPI_Win_get_info -subroutine MPI_Win_get_info_f08(win,info,ierror) +subroutine MPI_Win_get_info_f08(win,info_used,ierror) use :: mpi_f08_types, only : MPI_Win, MPI_Info implicit none TYPE(MPI_Win), INTENT(IN) :: win - TYPE(MPI_Info), INTENT(OUT) :: info + TYPE(MPI_Info), INTENT(OUT) :: info_used INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Win_get_info_f08 end interface MPI_Win_get_info @@ -2310,11 +2732,11 @@ end subroutine MPI_Topo_test_f08 end interface MPI_Topo_test interface MPI_Aint_add -function MPI_Aint_add_f08(base,diff) +function MPI_Aint_add_f08(base,disp) use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - INTEGER(MPI_ADDRESS_KIND) :: base - INTEGER(MPI_ADDRESS_KIND) :: diff + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: base + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: disp INTEGER(MPI_ADDRESS_KIND) :: MPI_Aint_add_f08 end function MPI_Aint_add_f08 end interface MPI_Aint_add @@ -2323,8 +2745,8 @@ interface MPI_Aint_diff function MPI_Aint_diff_f08(addr1,addr2) use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - INTEGER(MPI_ADDRESS_KIND) :: addr1 - INTEGER(MPI_ADDRESS_KIND) :: addr2 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr1 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr2 INTEGER(MPI_ADDRESS_KIND) :: MPI_Aint_diff_f08 end function MPI_Aint_diff_f08 end interface MPI_Aint_diff @@ -2683,6 +3105,19 @@ subroutine MPI_Info_set_f08(info,key,value,ierror) end subroutine MPI_Info_set_f08 end interface MPI_Info_set +interface MPI_Info_get_string +subroutine MPI_Info_get_string_f08(info,key,buflen,value,flag,ierror) + use :: mpi_f08_types, only : MPI_Info + implicit none + TYPE(MPI_Info), INTENT(IN) :: info + CHARACTER(LEN=*), INTENT(IN) :: key + INTEGER, INTENT(INOUT) :: buflen + CHARACTER(LEN=*), INTENT(OUT) :: value + LOGICAL, INTENT(OUT) :: flag + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Info_get_string_f08 +end interface MPI_Info_get_string + interface MPI_Close_port subroutine MPI_Close_port_f08(port_name,ierror) implicit none @@ -2823,7 +3258,7 @@ subroutine MPI_Accumulate_f08(origin_addr,origin_count,origin_datatype,target_ra use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2841,7 +3276,7 @@ subroutine MPI_Raccumulate_f08(origin_addr,origin_count,origin_datatype,target_r use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Win, MPI_Request, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2859,7 +3294,7 @@ subroutine MPI_Get_f08(origin_addr,origin_count,origin_datatype,target_rank, & use :: mpi_f08_types, only : MPI_Datatype, MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2875,7 +3310,7 @@ subroutine MPI_Rget_f08(origin_addr,origin_count,origin_datatype,target_rank, & use :: mpi_f08_types, only : MPI_Datatype, MPI_Request, MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2893,10 +3328,10 @@ subroutine MPI_Get_accumulate_f08(origin_addr,origin_count,origin_datatype,resul use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr,result_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, result_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: result_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: result_addr TYPE(MPI_Datatype), INTENT(IN) :: result_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp TYPE(MPI_Datatype), INTENT(IN) :: target_datatype @@ -2913,10 +3348,10 @@ subroutine MPI_Rget_accumulate_f08(origin_addr,origin_count,origin_datatype,resu use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Request, MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr,result_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, result_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: result_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: result_addr TYPE(MPI_Datatype), INTENT(IN) :: result_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp TYPE(MPI_Datatype), INTENT(IN) :: target_datatype @@ -2933,7 +3368,7 @@ subroutine MPI_Put_f08(origin_addr,origin_count,origin_datatype,target_rank, & use :: mpi_f08_types, only : MPI_Datatype, MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2949,7 +3384,7 @@ subroutine MPI_Rput_f08(origin_addr,origin_count,origin_datatype,target_rank, & use :: mpi_f08_types, only : MPI_Datatype, MPI_Win, MPI_Request, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2966,8 +3401,8 @@ subroutine MPI_Fetch_and_op_f08(origin_addr,result_addr,datatype,target_rank, & use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr,result_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: result_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: result_addr TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, INTENT(IN) :: target_rank INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -2983,8 +3418,8 @@ subroutine MPI_Compare_and_swap_f08(origin_addr,compare_addr,result_addr,datatyp use :: mpi_f08_types, only : MPI_Datatype, MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr,compare_addr,result_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: origin_addr,compare_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: result_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr,compare_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: result_addr TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, INTENT(IN) :: target_rank INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp @@ -3007,7 +3442,7 @@ subroutine MPI_Win_create_f08(base,size,disp_unit,info,comm,win,ierror) use :: mpi_f08_types, only : MPI_Info, MPI_Comm, MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: base + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size INTEGER, INTENT(IN) :: disp_unit TYPE(MPI_Info), INTENT(IN) :: info @@ -3033,7 +3468,7 @@ subroutine MPI_Win_attach_f08(win,base,size,ierror) use :: mpi_f08_types, only : MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: base + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror @@ -3045,7 +3480,7 @@ subroutine MPI_Win_detach_f08(win,base,ierror) use :: mpi_f08_types, only : MPI_Win, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: base + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: base TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Win_detach_f08 @@ -3434,7 +3869,7 @@ subroutine MPI_File_iread_f08(fh,buf,count,datatype,request,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request @@ -3449,7 +3884,7 @@ subroutine MPI_File_iread_at_f08(fh,offset,buf,count,datatype,request,ierror) TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request @@ -3463,7 +3898,7 @@ subroutine MPI_File_iread_all_f08(fh,buf,count,datatype,request,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request @@ -3478,7 +3913,7 @@ subroutine MPI_File_iread_at_all_f08(fh,offset,buf,count,datatype,request,ierror TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request @@ -3492,7 +3927,7 @@ subroutine MPI_File_iread_shared_f08(fh,buf,count,datatype,request,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request @@ -3506,7 +3941,7 @@ subroutine MPI_File_iwrite_f08(fh,buf,count,datatype,request,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request @@ -3521,7 +3956,7 @@ subroutine MPI_File_iwrite_at_f08(fh,offset,buf,count,datatype,request,ierror) TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request @@ -3535,7 +3970,7 @@ subroutine MPI_File_iwrite_all_f08(fh,buf,count,datatype,request,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request @@ -3550,7 +3985,7 @@ subroutine MPI_File_iwrite_at_all_f08(fh,offset,buf,count,datatype,request,ierro TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Request), INTENT(OUT) :: request @@ -3563,7 +3998,7 @@ subroutine MPI_File_iwrite_shared_f08(fh,buf,count,datatype,request,ierror) use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf TYPE(MPI_File), INTENT(IN) :: fh INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype @@ -3629,7 +4064,7 @@ subroutine MPI_File_read_all_begin_f08(fh,buf,count,datatype,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror @@ -3642,7 +4077,7 @@ subroutine MPI_File_read_all_end_f08(fh,buf,status,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_File_read_all_end_f08 @@ -3685,7 +4120,7 @@ subroutine MPI_File_read_at_all_begin_f08(fh,offset,buf,count,datatype,ierror) TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror @@ -3698,7 +4133,7 @@ subroutine MPI_File_read_at_all_end_f08(fh,buf,status,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_File_read_at_all_end_f08 @@ -3724,7 +4159,7 @@ subroutine MPI_File_read_ordered_begin_f08(fh,buf,count,datatype,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror @@ -3737,7 +4172,7 @@ subroutine MPI_File_read_ordered_end_f08(fh,buf,status,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_File_read_ordered_end_f08 @@ -3866,7 +4301,7 @@ subroutine MPI_File_write_all_begin_f08(fh,buf,count,datatype,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror @@ -3879,7 +4314,7 @@ subroutine MPI_File_write_all_end_f08(fh,buf,status,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_File_write_all_end_f08 @@ -3907,7 +4342,7 @@ subroutine MPI_File_write_at_all_f08(fh,offset,buf,count,datatype,status,ierror) TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Status) :: status @@ -3922,7 +4357,7 @@ subroutine MPI_File_write_at_all_begin_f08(fh,offset,buf,count,datatype,ierror) TYPE(MPI_File), INTENT(IN) :: fh INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror @@ -3935,7 +4370,7 @@ subroutine MPI_File_write_at_all_end_f08(fh,buf,status,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_File_write_at_all_end_f08 @@ -3961,7 +4396,7 @@ subroutine MPI_File_write_ordered_begin_f08(fh,buf,count,datatype,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror @@ -3974,7 +4409,7 @@ subroutine MPI_File_write_ordered_end_f08(fh,buf,status,ierror) implicit none TYPE(MPI_File), INTENT(IN) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: buf TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_File_write_ordered_end_f08 @@ -4092,7 +4527,7 @@ interface MPI_F_sync_reg subroutine MPI_F_sync_reg_f08(buf) implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf end subroutine MPI_F_sync_reg_f08 end interface MPI_F_sync_reg @@ -4113,7 +4548,7 @@ subroutine MPI_Mprobe_f08(source,tag,comm,message,status,ierror) INTEGER, INTENT(IN) :: source, tag TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Message), INTENT(OUT) :: message - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Mprobe_f08 end interface MPI_Mprobe @@ -4126,7 +4561,7 @@ subroutine MPI_Improbe_f08(source,tag,comm,flag,message,status,ierror) TYPE(MPI_Comm), INTENT(IN) :: comm LOGICAL, INTENT(OUT) :: flag TYPE(MPI_Message), INTENT(OUT) :: message - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Improbe_f08 end interface MPI_Improbe @@ -4136,7 +4571,7 @@ subroutine MPI_Imrecv_f08(buf,count,datatype,message,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Message, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Message), INTENT(INOUT) :: message @@ -4180,8 +4615,8 @@ subroutine MPI_Ineighbor_allgather_f08(sendbuf,sendcount,sendtype,recvbuf,recvco use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -4190,6 +4625,23 @@ subroutine MPI_Ineighbor_allgather_f08(sendbuf,sendcount,sendtype,recvbuf,recvco end subroutine MPI_Ineighbor_allgather_f08 end interface MPI_Ineighbor_allgather +interface MPI_Neighbor_allgather_init +subroutine MPI_Neighbor_allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & + comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Neighbor_allgather_init_f08 +end interface MPI_Neighbor_allgather_init + interface MPI_Neighbor_allgatherv subroutine MPI_Neighbor_allgatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs, & recvtype,comm,ierror) @@ -4212,16 +4664,34 @@ subroutine MPI_Ineighbor_allgatherv_f08(sendbuf,sendcount,sendtype,recvbuf,recvc use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Ineighbor_allgatherv_f08 +end interface MPI_Ineighbor_allgatherv + +interface MPI_Neighbor_allgatherv_init +subroutine MPI_Neighbor_allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs, & + recvtype,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf INTEGER, INTENT(IN) :: sendcount INTEGER, INTENT(IN) :: recvcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror -end subroutine MPI_Ineighbor_allgatherv_f08 -end interface MPI_Ineighbor_allgatherv +end subroutine MPI_Neighbor_allgatherv_init_f08 +end interface MPI_Neighbor_allgatherv_init interface MPI_Neighbor_alltoall subroutine MPI_Neighbor_alltoall_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & @@ -4244,8 +4714,8 @@ subroutine MPI_Ineighbor_alltoall_f08(sendbuf,sendcount,sendtype,recvbuf,recvcou use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -4254,6 +4724,23 @@ subroutine MPI_Ineighbor_alltoall_f08(sendbuf,sendcount,sendtype,recvbuf,recvcou end subroutine MPI_Ineighbor_alltoall_f08 end interface MPI_Ineighbor_alltoall +interface MPI_Neighbor_alltoall_init +subroutine MPI_Neighbor_alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & + comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Neighbor_alltoall_init_f08 +end interface MPI_Neighbor_alltoall_init + interface MPI_Neighbor_alltoallv subroutine MPI_Neighbor_alltoallv_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,recvcounts, & rdispls,recvtype,comm,ierror) @@ -4275,15 +4762,32 @@ subroutine MPI_Ineighbor_alltoallv_f08(sendbuf,sendcounts,sdispls,sendtype,recvb use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Ineighbor_alltoallv_f08 +end interface MPI_Ineighbor_alltoallv + +interface MPI_Neighbor_alltoallv_init +subroutine MPI_Neighbor_alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,recvcounts, & + rdispls,recvtype,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf INTEGER, INTENT(IN) :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror -end subroutine MPI_Ineighbor_alltoallv_f08 -end interface MPI_Ineighbor_alltoallv +end subroutine MPI_Neighbor_alltoallv_init_f08 +end interface MPI_Neighbor_alltoallv_init interface MPI_Neighbor_alltoallw subroutine MPI_Neighbor_alltoallw_f08(sendbuf,sendcounts,sdispls,sendtypes,recvbuf,recvcounts, & @@ -4307,13 +4811,51 @@ subroutine MPI_Ineighbor_alltoallw_f08(sendbuf,sendcounts,sdispls,sendtypes,recv use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request, MPI_ADDRESS_KIND implicit none @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), recvcounts(*) + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) OMPI_ASYNCHRONOUS :: sdispls(*), rdispls(*) + TYPE(MPI_Datatype), INTENT(IN) OMPI_ASYNCHRONOUS :: sendtypes(*), recvtypes(*) + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Ineighbor_alltoallw_f08 +end interface MPI_Ineighbor_alltoallw + +interface MPI_Neighbor_alltoallw_init +subroutine MPI_Neighbor_alltoallw_init_f08(sendbuf,sendcounts,sdispls,sendtypes,recvbuf,recvcounts, & + rdispls,recvtypes,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request, MPI_ADDRESS_KIND + implicit none + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf, recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, INTENT(IN) :: sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf INTEGER, INTENT(IN) :: sendcounts(*), recvcounts(*) INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: sdispls(*), rdispls(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtypes(*), recvtypes(*) TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror -end subroutine MPI_Ineighbor_alltoallw_f08 -end interface MPI_Ineighbor_alltoallw +end subroutine MPI_Neighbor_alltoallw_init_f08 +end interface MPI_Neighbor_alltoallw_init + +interface MPI_Status_f2f08 +subroutine MPI_Status_f2f08_f08(f_status,f08_status,ierror) + use :: mpi_f08_types, only : MPI_Status, MPI_STATUS_SIZE + implicit none + INTEGER, INTENT(IN) :: f_status(MPI_STATUS_SIZE) + TYPE(MPI_Status), INTENT(OUT) :: f08_status + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Status_f2f08_f08 +end interface MPI_Status_f2f08 + +interface MPI_Status_f082f +subroutine MPI_Status_f082f_f08(f08_status,f_status,ierror) + use :: mpi_f08_types, only : MPI_Status, MPI_STATUS_SIZE + implicit none + TYPE(MPI_Status), INTENT(IN) :: f08_status + INTEGER, INTENT(OUT) :: f_status(MPI_STATUS_SIZE) + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Status_f082f_f08 +end interface MPI_Status_f082f diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-rename.h b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-rename.h index 29f853af3ba..06d2bb0e426 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-rename.h +++ b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-rename.h @@ -27,6 +27,10 @@ #define MPI_Irsend_f08 PMPI_Irsend_f08 #define MPI_Isend PMPI_Isend #define MPI_Isend_f08 PMPI_Isend_f08 +#define MPI_Isendrecv PMPI_Isendrecv +#define MPI_Isendrecv_f08 PMPI_Isendrecv_f08 +#define MPI_Isendrecv_replace PMPI_Isendrecv_replace +#define MPI_Isendrecv_replace_f08 PMPI_Isendrecv_replace_f08 #define MPI_Issend PMPI_Issend #define MPI_Issend_f08 PMPI_Issend_f08 #define MPI_Precv_init PMPI_Precv_init @@ -63,6 +67,20 @@ #define MPI_Sendrecv_replace_f08 PMPI_Sendrecv_replace_f08 #define MPI_Send_init PMPI_Send_init #define MPI_Send_init_f08 PMPI_Send_init_f08 +#define MPI_Session_get_info PMPI_Session_get_info +#define MPI_Session_get_info_f08 PMPI_Session_get_info_f08 +#define MPI_Session_get_nth_pset PMPI_Session_get_nth_pset +#define MPI_Session_get_nth_pset_f08 PMPI_Session_get_nth_pset_f08 +#define MPI_Session_get_nth_psetlen PMPI_Session_get_nth_psetlen +#define MPI_Session_get_nth_psetlen_f08 PMPI_Session_get_nth_psetlen_f08 +#define MPI_Session_get_num_psets PMPI_Session_get_num_psets +#define MPI_Session_get_num_psets_f08 PMPI_Session_get_num_psets_f08 +#define MPI_Session_get_pset_info PMPI_Session_get_pset_info +#define MPI_Session_get_pset_info_f08 PMPI_Session_get_pset_info_f08 +#define MPI_Session_init PMPI_Session_init +#define MPI_Session_init_f08 PMPI_Session_init_f08 +#define MPI_Session_finalize PMPI_Session_finalize +#define MPI_Session_finalize_f08 PMPI_Session_finalize_f08 #define MPI_Ssend PMPI_Ssend #define MPI_Ssend_f08 PMPI_Ssend_f08 #define MPI_Ssend_init PMPI_Ssend_init @@ -155,46 +173,68 @@ #define MPI_Allgather_f08 PMPI_Allgather_f08 #define MPI_Iallgather PMPI_Iallgather #define MPI_Iallgather_f08 PMPI_Iallgather_f08 +#define MPI_Allgather_init PMPI_Allgather_init +#define MPI_Allgather_init_f08 PMPI_Allgather_init_f08 #define MPI_Allgatherv PMPI_Allgatherv #define MPI_Allgatherv_f08 PMPI_Allgatherv_f08 #define MPI_Iallgatherv PMPI_Iallgatherv #define MPI_Iallgatherv_f08 PMPI_Iallgatherv_f08 +#define MPI_Allgatherv_init PMPI_Allgatherv_init +#define MPI_Allgatherv_init_f08 PMPI_Allgatherv_init_f08 #define MPI_Allreduce PMPI_Allreduce #define MPI_Allreduce_f08 PMPI_Allreduce_f08 #define MPI_Iallreduce PMPI_Iallreduce #define MPI_Iallreduce_f08 PMPI_Iallreduce_f08 +#define MPI_Allreduce_init PMPI_Allreduce_init +#define MPI_Allreduce_init_f08 PMPI_Allreduce_init_f08 #define MPI_Alltoall PMPI_Alltoall #define MPI_Alltoall_f08 PMPI_Alltoall_f08 #define MPI_Ialltoall PMPI_Ialltoall #define MPI_Ialltoall_f08 PMPI_Ialltoall_f08 +#define MPI_Alltoall_init PMPI_Alltoall_init +#define MPI_Alltoall_init_f08 PMPI_Alltoall_init_f08 #define MPI_Alltoallv PMPI_Alltoallv #define MPI_Alltoallv_f08 PMPI_Alltoallv_f08 #define MPI_Ialltoallv PMPI_Ialltoallv #define MPI_Ialltoallv_f08 PMPI_Ialltoallv_f08 +#define MPI_Alltoallv_init PMPI_Alltoallv_init +#define MPI_Alltoallv_init_f08 PMPI_Alltoallv_init_f08 #define MPI_Alltoallw PMPI_Alltoallw #define MPI_Alltoallw_f08 PMPI_Alltoallw_f08 #define MPI_Ialltoallw PMPI_Ialltoallw #define MPI_Ialltoallw_f08 PMPI_Ialltoallw_f08 +#define MPI_Alltoallw_init PMPI_Alltoallw_init +#define MPI_Alltoallw_init_f08 PMPI_Alltoallw_init_f08 #define MPI_Barrier PMPI_Barrier #define MPI_Barrier_f08 PMPI_Barrier_f08 #define MPI_Ibarrier PMPI_Ibarrier #define MPI_Ibarrier_f08 PMPI_Ibarrier_f08 +#define MPI_Barrier_init PMPI_Barrier_init +#define MPI_Barrier_init_f08 PMPI_Barrier_init_f08 #define MPI_Bcast PMPI_Bcast #define MPI_Bcast_f08 PMPI_Bcast_f08 #define MPI_Ibcast PMPI_Ibcast #define MPI_Ibcast_f08 PMPI_Ibcast_f08 +#define MPI_Bcast_init PMPI_Bcast_init +#define MPI_Bcast_init_f08 PMPI_Bcast_init_f08 #define MPI_Exscan PMPI_Exscan #define MPI_Exscan_f08 PMPI_Exscan_f08 #define MPI_Iexscan PMPI_Iexscan #define MPI_Iexscan_f08 PMPI_Iexscan_f08 +#define MPI_Exscan_init PMPI_Exscan_init +#define MPI_Exscan_init_f08 PMPI_Exscan_init_f08 #define MPI_Gather PMPI_Gather #define MPI_Gather_f08 PMPI_Gather_f08 #define MPI_Igather PMPI_Igather #define MPI_Igather_f08 PMPI_Igather_f08 +#define MPI_Gather_init PMPI_Gather_init +#define MPI_Gather_init_f08 PMPI_Gather_init_f08 #define MPI_Gatherv PMPI_Gatherv #define MPI_Gatherv_f08 PMPI_Gatherv_f08 #define MPI_Igatherv PMPI_Igatherv #define MPI_Igatherv_f08 PMPI_Igatherv_f08 +#define MPI_Gatherv_init PMPI_Gatherv_init +#define MPI_Gatherv_init_f08 PMPI_Gatherv_init_f08 #define MPI_Op_commutative PMPI_Op_commutative #define MPI_Op_commutative_f08 PMPI_Op_commutative_f08 #define MPI_Op_create PMPI_Op_create @@ -205,34 +245,48 @@ #define MPI_Reduce_f08 PMPI_Reduce_f08 #define MPI_Ireduce PMPI_Ireduce #define MPI_Ireduce_f08 PMPI_Ireduce_f08 +#define MPI_Reduce_init PMPI_Reduce_init +#define MPI_Reduce_init_f08 PMPI_Reduce_init_f08 #define MPI_Reduce_local PMPI_Reduce_local #define MPI_Reduce_local_f08 PMPI_Reduce_local_f08 #define MPI_Reduce_scatter PMPI_Reduce_scatter #define MPI_Reduce_scatter_f08 PMPI_Reduce_scatter_f08 #define MPI_Ireduce_scatter PMPI_Ireduce_scatter #define MPI_Ireduce_scatter_f08 PMPI_Ireduce_scatter_f08 +#define MPI_Reduce_scatter_init PMPI_Reduce_scatter_init +#define MPI_Reduce_scatter_init_f08 PMPI_Reduce_scatter_init_f08 #define MPI_Reduce_scatter_block PMPI_Reduce_scatter_block #define MPI_Reduce_scatter_block_f08 PMPI_Reduce_scatter_block_f08 #define MPI_Ireduce_scatter_block PMPI_Ireduce_scatter_block #define MPI_Ireduce_scatter_block_f08 PMPI_Ireduce_scatter_block_f08 +#define MPI_Reduce_scatter_block_init PMPI_Reduce_scatter_block_init +#define MPI_Reduce_scatter_block_init_f08 PMPI_Reduce_scatter_block_init_f08 #define MPI_Scan PMPI_Scan #define MPI_Scan_f08 PMPI_Scan_f08 #define MPI_Iscan PMPI_Iscan #define MPI_Iscan_f08 PMPI_Iscan_f08 +#define MPI_Scan_init PMPI_Scan_init +#define MPI_Scan_init_f08 PMPI_Scan_init_f08 #define MPI_Scatter PMPI_Scatter #define MPI_Scatter_f08 PMPI_Scatter_f08 #define MPI_Iscatter PMPI_Iscatter #define MPI_Iscatter_f08 PMPI_Iscatter_f08 +#define MPI_Scatter_init PMPI_Scatter_init +#define MPI_Scatter_init_f08 PMPI_Scatter_init_f08 #define MPI_Scatterv PMPI_Scatterv #define MPI_Scatterv_f08 PMPI_Scatterv_f08 #define MPI_Iscatterv PMPI_Iscatterv #define MPI_Iscatterv_f08 PMPI_Iscatterv_f08 +#define MPI_Scatterv_init PMPI_Scatterv_init +#define MPI_Scatterv_init_f08 PMPI_Scatterv_init_f08 #define MPI_Comm_compare PMPI_Comm_compare #define MPI_Comm_compare_f08 PMPI_Comm_compare_f08 #define MPI_Comm_create PMPI_Comm_create #define MPI_Comm_create_f08 PMPI_Comm_create_f08 #define MPI_Comm_create_group PMPI_Comm_create_group #define MPI_Comm_create_group_f08 PMPI_Comm_create_group_f08 +#define MPI_Comm_create_from_group PMPI_Comm_create_from_group +#define MPI_Comm_create_from_group_f08 PMPI_Comm_create_from_group_f08 #define MPI_Comm_create_keyval PMPI_Comm_create_keyval #define MPI_Comm_create_keyval_f08 PMPI_Comm_create_keyval_f08 #define MPI_Comm_delete_attr PMPI_Comm_delete_attr @@ -243,6 +297,8 @@ #define MPI_Comm_dup_with_info_f08 PMPI_Comm_dup_with_info_f08 #define MPI_Comm_idup PMPI_Comm_idup #define MPI_Comm_idup_f08 PMPI_Comm_idup_f08 +#define MPI_Comm_idup_with_info PMPI_Comm_idup_with_info +#define MPI_Comm_idup_with_info_f08 PMPI_Comm_idup_with_info_f08 #define MPI_Comm_free PMPI_Comm_free #define MPI_Comm_free_f08 PMPI_Comm_free_f08 #define MPI_Comm_free_keyval PMPI_Comm_free_keyval @@ -279,6 +335,8 @@ #define MPI_Group_difference_f08 PMPI_Group_difference_f08 #define MPI_Group_excl PMPI_Group_excl #define MPI_Group_excl_f08 PMPI_Group_excl_f08 +#define MPI_Group_from_session_pset PMPI_Group_from_session_pset +#define MPI_Group_from_session_pset_f08 PMPI_Group_from_session_pset_f08 #define MPI_Group_free PMPI_Group_free #define MPI_Group_free_f08 PMPI_Group_free_f08 #define MPI_Group_incl PMPI_Group_incl @@ -299,6 +357,8 @@ #define MPI_Group_union_f08 PMPI_Group_union_f08 #define MPI_Intercomm_create PMPI_Intercomm_create #define MPI_Intercomm_create_f08 PMPI_Intercomm_create_f08 +#define MPI_Intercomm_create_from_groups PMPI_Intercomm_create_from_groups +#define MPI_Intercomm_create_from_groups_f08 PMPI_Intercomm_create_from_groups_f08 #define MPI_Intercomm_merge PMPI_Intercomm_merge #define MPI_Intercomm_merge_f08 PMPI_Intercomm_merge_f08 #define MPI_Type_create_keyval PMPI_Type_create_keyval @@ -449,6 +509,8 @@ #define MPI_Info_get_nkeys_f08 PMPI_Info_get_nkeys_f08 #define MPI_Info_get_nthkey PMPI_Info_get_nthkey #define MPI_Info_get_nthkey_f08 PMPI_Info_get_nthkey_f08 +#define MPI_Info_get_string PMPI_Info_get_string +#define MPI_Info_get_string_f08 PMPI_Info_get_string_f08 #define MPI_Info_get_valuelen PMPI_Info_get_valuelen #define MPI_Info_get_valuelen_f08 PMPI_Info_get_valuelen_f08 #define MPI_Info_set PMPI_Info_set @@ -551,7 +613,9 @@ #define MPI_Is_thread_main_f08 PMPI_Is_thread_main_f08 #define MPI_Query_thread PMPI_Query_thread #define MPI_Query_thread_f08 PMPI_Query_thread_f08 +#define MPI_Status_f082f PMPI_Status_f082f #define MPI_Status_f082f_f08 PMPI_Status_f082f_f08 +#define MPI_Status_f2f08 PMPI_Status_f2f08 #define MPI_Status_f2f08_f08 PMPI_Status_f2f08_f08 #define MPI_Status_set_cancelled PMPI_Status_set_cancelled #define MPI_Status_set_cancelled_f08 PMPI_Status_set_cancelled_f08 @@ -699,21 +763,31 @@ #define MPI_Neighbor_allgather_f08 PMPI_Neighbor_allgather_f08 #define MPI_Ineighbor_allgather PMPI_Ineighbor_allgather #define MPI_Ineighbor_allgather_f08 PMPI_Ineighbor_allgather_f08 +#define MPI_Neighbor_allgather_init PMPI_Neighbor_allgather_init +#define MPI_Neighbor_allgather_init_f08 PMPI_Neighbor_allgather_init_f08 #define MPI_Neighbor_allgatherv PMPI_Neighbor_allgatherv #define MPI_Neighbor_allgatherv_f08 PMPI_Neighbor_allgatherv_f08 #define MPI_Ineighbor_allgatherv PMPI_Ineighbor_allgatherv #define MPI_Ineighbor_allgatherv_f08 PMPI_Ineighbor_allgatherv_f08 +#define MPI_Neighbor_allgatherv_init PMPI_Neighbor_allgatherv_init +#define MPI_Neighbor_allgatherv_init_f08 PMPI_Neighbor_allgatherv_init_f08 #define MPI_Neighbor_alltoall PMPI_Neighbor_alltoall #define MPI_Neighbor_alltoall_f08 PMPI_Neighbor_alltoall_f08 #define MPI_Ineighbor_alltoall PMPI_Ineighbor_alltoall #define MPI_Ineighbor_alltoall_f08 PMPI_Ineighbor_alltoall_f08 +#define MPI_Neighbor_alltoall_init PMPI_Neighbor_alltoall_init +#define MPI_Neighbor_alltoall_init_f08 PMPI_Neighbor_alltoall_init_f08 #define MPI_Neighbor_alltoallv PMPI_Neighbor_alltoallv #define MPI_Neighbor_alltoallv_f08 PMPI_Neighbor_alltoallv_f08 #define MPI_Ineighbor_alltoallv PMPI_Ineighbor_alltoallv -#define MPI_Ineighbor_alltoallv_f08 PMPI_Ineighbor_alltoallv_f08 +#define MPI_Ineighbor_alltoallv_f08 PMPI_Ineighbor_alltoallv_init_f08 +#define MPI_Neighbor_alltoallv_init PMPI_Neighbor_alltoallv_init +#define MPI_Neighbor_alltoallv_init_f08 PMPI_Neighbor_alltoallv_init_f08 #define MPI_Neighbor_alltoallw PMPI_Neighbor_alltoallw #define MPI_Neighbor_alltoallw_f08 PMPI_Neighbor_alltoallw_f08 #define MPI_Ineighbor_alltoallw PMPI_Ineighbor_alltoallw #define MPI_Ineighbor_alltoallw_f08 PMPI_Ineighbor_alltoallw_f08 +#define MPI_Neighbor_alltoallw_init PMPI_Neighbor_alltoallw_init +#define MPI_Neighbor_alltoallw_init_f08 PMPI_Neighbor_alltoallw_init_f08 #endif diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-types.F90 b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-types.F90 index c1abace16b3..5d0d7c09427 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-types.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-types.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2015-2020 Research Organization for Information Science @@ -8,6 +8,7 @@ ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! Copyright (c) 2020 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights +! Copyright (c) 2019-2021 Triad National Security, LLC. All rights ! reserved. ! $COPYRIGHT$ ! @@ -27,50 +28,6 @@ module mpi_f08_types include "mpif-constants.h" include "mpif-io-constants.h" - ! - ! derived types - ! - - type, BIND(C) :: MPI_Comm - integer :: MPI_VAL - end type MPI_Comm - - type, BIND(C) :: MPI_Datatype - integer :: MPI_VAL - end type MPI_Datatype - - type, BIND(C) :: MPI_Errhandler - integer :: MPI_VAL - end type MPI_Errhandler - - type, BIND(C) :: MPI_File - integer :: MPI_VAL - end type MPI_File - - type, BIND(C) :: MPI_Group - integer :: MPI_VAL - end type MPI_Group - - type, BIND(C) :: MPI_Info - integer :: MPI_VAL - end type MPI_Info - - type, BIND(C) :: MPI_Message - integer :: MPI_VAL - end type MPI_Message - - type, BIND(C) :: MPI_Op - integer :: MPI_VAL - end type MPI_Op - - type, BIND(C) :: MPI_Request - integer :: MPI_VAL - end type MPI_Request - - type, BIND(C) :: MPI_Win - integer :: MPI_VAL - end type MPI_Win - ! ! Pre-defined handles ! @@ -206,138 +163,4 @@ module mpi_f08_types !------------------------------ #include "mpif-f08-types.h" -!... Interfaces for operators with handles -!----------------------------------------- -interface operator (.EQ.) - module procedure ompi_comm_op_eq - module procedure ompi_datatype_op_eq - module procedure ompi_errhandler_op_eq - module procedure ompi_file_op_eq - module procedure ompi_group_op_eq - module procedure ompi_info_op_eq - module procedure ompi_message_op_eq - module procedure ompi_op_op_eq - module procedure ompi_request_op_eq - module procedure ompi_win_op_eq -end interface - -interface operator (.NE.) - module procedure ompi_comm_op_ne - module procedure ompi_datatype_op_ne - module procedure ompi_errhandler_op_ne - module procedure ompi_file_op_ne - module procedure ompi_group_op_ne - module procedure ompi_info_op_ne - module procedure ompi_message_op_ne - module procedure ompi_op_op_ne - module procedure ompi_request_op_ne - module procedure ompi_win_op_ne -end interface - -contains - -!... .EQ. operator -!----------------- - logical function ompi_comm_op_eq(a, b) - type(MPI_Comm), intent(in) :: a, b - ompi_comm_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_comm_op_eq - - logical function ompi_datatype_op_eq(a, b) - type(MPI_Datatype), intent(in) :: a, b - ompi_datatype_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_datatype_op_eq - - logical function ompi_errhandler_op_eq(a, b) - type(MPI_Errhandler), intent(in) :: a, b - ompi_errhandler_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_errhandler_op_eq - - logical function ompi_file_op_eq(a, b) - type(MPI_File), intent(in) :: a, b - ompi_file_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_file_op_eq - - logical function ompi_group_op_eq(a, b) - type(MPI_Group), intent(in) :: a, b - ompi_group_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_group_op_eq - - logical function ompi_info_op_eq(a, b) - type(MPI_Info), intent(in) :: a, b - ompi_info_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_info_op_eq - - logical function ompi_message_op_eq(a, b) - type(MPI_Message), intent(in) :: a, b - ompi_message_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_message_op_eq - - logical function ompi_op_op_eq(a, b) - type(MPI_Op), intent(in) :: a, b - ompi_op_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_op_op_eq - - logical function ompi_request_op_eq(a, b) - type(MPI_Request), intent(in) :: a, b - ompi_request_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_request_op_eq - - logical function ompi_win_op_eq(a, b) - type(MPI_Win), intent(in) :: a, b - ompi_win_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) - end function ompi_win_op_eq - -!... .NE. operator -!----------------- - logical function ompi_comm_op_ne(a, b) - type(MPI_Comm), intent(in) :: a, b - ompi_comm_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_comm_op_ne - - logical function ompi_datatype_op_ne(a, b) - type(MPI_Datatype), intent(in) :: a, b - ompi_datatype_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_datatype_op_ne - - logical function ompi_errhandler_op_ne(a, b) - type(MPI_Errhandler), intent(in) :: a, b - ompi_errhandler_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_errhandler_op_ne - - logical function ompi_file_op_ne(a, b) - type(MPI_File), intent(in) :: a, b - ompi_file_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_file_op_ne - - logical function ompi_group_op_ne(a, b) - type(MPI_Group), intent(in) :: a, b - ompi_group_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_group_op_ne - - logical function ompi_info_op_ne(a, b) - type(MPI_Info), intent(in) :: a, b - ompi_info_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_info_op_ne - - logical function ompi_message_op_ne(a, b) - type(MPI_Message), intent(in) :: a, b - ompi_message_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_message_op_ne - - logical function ompi_op_op_ne(a, b) - type(MPI_Op), intent(in) :: a, b - ompi_op_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_op_op_ne - - logical function ompi_request_op_ne(a, b) - type(MPI_Request), intent(in) :: a, b - ompi_request_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_request_op_ne - - logical function ompi_win_op_ne(a, b) - type(MPI_Win), intent(in) :: a, b - ompi_win_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) - end function ompi_win_op_ne - end module mpi_f08_types diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/pmpi-f08-interfaces.F90 b/ompi/mpi/fortran/use-mpi-f08/mod/pmpi-f08-interfaces.F90 index ddd81d17f74..1089248c42b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/pmpi-f08-interfaces.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mod/pmpi-f08-interfaces.F90 @@ -10,6 +10,8 @@ ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2017-2018 FUJITSU LIMITED. All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran diff --git a/ompi/mpi/fortran/use-mpi-f08/mprobe_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/mprobe_f08.F90 index adb54ba98eb..bcde2f5ea49 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mprobe_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mprobe_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_Mprobe_f08(source,tag,comm,message,status,ierror) INTEGER, INTENT(IN) :: source, tag TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Message), INTENT(OUT) :: message - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_allgather_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/neighbor_allgather_init_f08.F90 similarity index 60% rename from ompi/mpiext/pcollreq/use-mpi-f08/neighbor_allgather_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/neighbor_allgather_init_f08.F90 index 3beedf850b8..0137d7ef79b 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_allgather_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/neighbor_allgather_init_f08.F90 @@ -1,21 +1,23 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2013 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Neighbor_allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Neighbor_allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_neighbor_allgather_init_f + use :: ompi_mpifh_bindings, only : ompi_neighbor_allgather_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype @@ -25,8 +27,8 @@ subroutine MPIX_Neighbor_allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,r INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_neighbor_allgather_init_f(sendbuf,sendcount,sendtype%MPI_VAL,& + call ompi_neighbor_allgather_init_f(sendbuf,sendcount,sendtype%MPI_VAL,& recvbuf,recvcount,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Neighbor_allgather_init_f08 +end subroutine MPI_Neighbor_allgather_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_allgatherv_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/neighbor_allgatherv_init_f08.F90 similarity index 59% rename from ompi/mpiext/pcollreq/use-mpi-f08/neighbor_allgatherv_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/neighbor_allgatherv_init_f08.F90 index 524f51cf484..eebeaecfbae 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_allgatherv_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/neighbor_allgatherv_init_f08.F90 @@ -1,24 +1,26 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2013 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Neighbor_allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& +#include "mpi-f08-rename.h" + +subroutine MPI_Neighbor_allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& displs,recvtype,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_neighbor_allgatherv_init_f + use :: ompi_mpifh_bindings, only : ompi_neighbor_allgatherv_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -27,8 +29,8 @@ subroutine MPIX_Neighbor_allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf, INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_neighbor_allgatherv_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcounts,& + call ompi_neighbor_allgatherv_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcounts,& displs,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Neighbor_allgatherv_init_f08 +end subroutine MPI_Neighbor_allgatherv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_alltoall_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/neighbor_alltoall_init_f08.F90 similarity index 63% rename from ompi/mpiext/pcollreq/use-mpi-f08/neighbor_alltoall_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/neighbor_alltoall_init_f08.F90 index 8f79b893451..a77283d4e78 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_alltoall_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/neighbor_alltoall_init_f08.F90 @@ -1,22 +1,24 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2013 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Neighbor_alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,& +#include "mpi-f08-rename.h" + +subroutine MPI_Neighbor_alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,& recvcount,recvtype,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_neighbor_alltoall_init_f + use :: ompi_mpifh_bindings, only : ompi_neighbor_alltoall_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype @@ -26,8 +28,8 @@ subroutine MPIX_Neighbor_alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,& INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_neighbor_alltoall_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,& + call ompi_neighbor_alltoall_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,& recvcount,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Neighbor_alltoall_init_f08 +end subroutine MPI_Neighbor_alltoall_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_alltoallv_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/neighbor_alltoallv_init_f08.F90 similarity index 60% rename from ompi/mpiext/pcollreq/use-mpi-f08/neighbor_alltoallv_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/neighbor_alltoallv_init_f08.F90 index c973d652f06..1ed7d8a502b 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_alltoallv_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/neighbor_alltoallv_init_f08.F90 @@ -1,23 +1,25 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2013 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Neighbor_alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,& +#include "mpi-f08-rename.h" + +subroutine MPI_Neighbor_alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,& recvcounts,rdispls,recvtype,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_neighbor_alltoallv_init_f + use :: ompi_mpifh_bindings, only : ompi_neighbor_alltoallv_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -26,9 +28,9 @@ subroutine MPIX_Neighbor_alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype, INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_neighbor_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype%MPI_VAL,& + call ompi_neighbor_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype%MPI_VAL,& recvbuf,recvcounts,rdispls,recvtype%MPI_VAL,& comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Neighbor_alltoallv_init_f08 +end subroutine MPI_Neighbor_alltoallv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_alltoallw_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/neighbor_alltoallw_init_f08.F90 similarity index 50% rename from ompi/mpiext/pcollreq/use-mpi-f08/neighbor_alltoallw_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/neighbor_alltoallw_init_f08.F90 index 4ad00d1f2fb..6ab2219e1c8 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/neighbor_alltoallw_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/neighbor_alltoallw_init_f08.F90 @@ -1,34 +1,36 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2013 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Neighbor_alltoallw_init_f08(sendbuf,sendcounts,sdispls,sendtypes,& +#include "mpi-f08-rename.h" + +subroutine MPI_Neighbor_alltoallw_init_f08(sendbuf,sendcounts,sdispls,sendtypes,& recvbuf,recvcounts,rdispls,recvtypes,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request, MPI_ADDRESS_KIND - use :: mpiext_pcollreq_f08, only : ompix_neighbor_alltoallw_init_f + use :: ompi_mpifh_bindings, only : ompi_neighbor_alltoallw_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), recvcounts(*) - INTEGER(MPI_ADDRESS_KIND), INTENT(IN), ASYNCHRONOUS :: sdispls(*), rdispls(*) - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: sendtypes(*) - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: recvtypes(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), recvcounts(*) + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) OMPI_ASYNCHRONOUS :: sdispls(*), rdispls(*) + TYPE(MPI_Datatype), INTENT(IN) OMPI_ASYNCHRONOUS :: sendtypes(*) + TYPE(MPI_Datatype), INTENT(IN) OMPI_ASYNCHRONOUS :: recvtypes(*) TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Info), INTENT(IN) :: info TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_neighbor_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes(1)%MPI_VAL,& + call ompi_neighbor_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes(1)%MPI_VAL,& recvbuf,recvcounts,rdispls,recvtypes(1)%MPI_VAL,& comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Neighbor_alltoallw_init_f08 +end subroutine MPI_Neighbor_alltoallw_init_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/parrived_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/parrived_f08.F90 index 6ab022af817..7116fb1511b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/parrived_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/parrived_f08.F90 @@ -11,6 +11,8 @@ #include "ompi/mpi/fortran/configure-fortran-output.h" subroutine MPI_Parrived_f08(request,partition,flag,ierror) + ! See note in mpi-f-interfaces-bind.h for why we "use mpi" here and + ! call a PMPI_* subroutine below. use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: mpi, only : PMPI_Parrived implicit none diff --git a/ompi/mpi/fortran/use-mpi-f08/precv_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/precv_init_f08.F90 index 122cfba7d7f..9e5c8e417f6 100644 --- a/ompi/mpi/fortran/use-mpi-f08/precv_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/precv_init_f08.F90 @@ -6,23 +6,28 @@ ! Copyright (c) 2018 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2020 Sandia National Laboratories. All rights reserved. +! Copyright (c) 2021 Bull S.A.S. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPI_Precv_init_f08(buf,partitions,count,datatype,dest,tag,comm,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request +#include "mpi-f08-rename.h" + +subroutine MPI_Precv_init_f08(buf,partitions,count,datatype,dest,tag,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request use :: ompi_mpifh_bindings, only : ompi_precv_init_f implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: partitions,count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompi_precv_init_f(buf,partitions,count,datatype%MPI_VAL,dest,tag,comm%MPI_VAL,request%MPI_VAL,c_ierror) + call ompi_precv_init_f(buf,partitions,count,datatype%MPI_VAL,dest,tag,comm%MPI_VAL, & + info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror end subroutine MPI_Precv_init_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/probe_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/probe_f08.F90 index 18459ba5ad4..e6be94ca868 100644 --- a/ompi/mpi/fortran/use-mpi-f08/probe_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/probe_f08.F90 @@ -15,7 +15,7 @@ subroutine MPI_Probe_f08(source,tag,comm,status,ierror) implicit none INTEGER, INTENT(IN) :: source, tag TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Status), INTENT(OUT) :: status + TYPE(MPI_Status) :: status INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/Makefile.am b/ompi/mpi/fortran/use-mpi-f08/profile/Makefile.am index 5a52d6c151d..c855a01d4db 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/Makefile.am +++ b/ompi/mpi/fortran/use-mpi-f08/profile/Makefile.am @@ -11,14 +11,14 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2021 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Sandia National Laboratories. All rights reserved. # Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. # Copyright (c) 2012-2013 Inria. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2015-2020 Research Organization for Information Science -# and Technology (RIST). All rights reserved. +# Copyright (c) 2015-2021 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -28,6 +28,13 @@ include $(top_srcdir)/Makefile.ompi-rules +# Note that Automake's Fortran-buidling rules uses CPPFLAGS and +# AM_CPPFLAGS. This can cause weirdness (e.g., +# https://github.com/open-mpi/ompi/issues/7253). Let's just zero +# those out and rely on AM_FCFLAGS. +CPPFLAGS = +AM_CPPFLAGS = + # This Makefile is only relevant if we're building the "use mpi_f08" # MPI bindings. if OMPI_BUILD_FORTRAN_USEMPIF08_BINDINGS @@ -55,14 +62,21 @@ pmpi_api_files = \ paint_add_f08.F90 \ paint_diff_f08.F90 \ pallgather_f08.F90 \ + pallgather_init_f08.F90 \ pallgatherv_f08.F90 \ + pallgatherv_init_f08.F90 \ palloc_mem_f08.F90 \ pallreduce_f08.F90 \ palltoall_f08.F90 \ + palltoall_init_f08.F90 \ palltoallv_f08.F90 \ + palltoallv_init_f08.F90 \ palltoallw_f08.F90 \ + palltoallw_init_f08.F90 \ pbarrier_f08.F90 \ + pbarrier_init_f08.F90 \ pbcast_f08.F90 \ + pbcast_init_f08.F90 \ pbsend_f08.F90 \ pbsend_init_f08.F90 \ pbuffer_attach_f08.F90 \ @@ -90,6 +104,7 @@ pmpi_api_files = \ pcomm_dup_f08.F90 \ pcomm_dup_with_info_f08.F90 \ pcomm_idup_f08.F90 \ + pcomm_idup_with_info_f08.F90 \ pcomm_free_f08.F90 \ pcomm_free_keyval_f08.F90 \ pcomm_get_attr_f08.F90 \ @@ -122,6 +137,7 @@ pmpi_api_files = \ perror_class_f08.F90 \ perror_string_f08.F90 \ pexscan_f08.F90 \ + pexscan_init_f08.F90 \ pf_sync_reg_f08.F90 \ pfetch_and_op_f08.F90 \ pfile_call_errhandler_f08.F90 \ @@ -187,7 +203,9 @@ pmpi_api_files = \ pfinalize_f08.F90 \ pfree_mem_f08.F90 \ pgather_f08.F90 \ + pgather_init_f08.F90 \ pgatherv_f08.F90 \ + pgatherv_init_f08.F90 \ pget_accumulate_f08.F90 \ pget_address_f08.F90 \ pget_count_f08.F90 \ @@ -243,6 +261,7 @@ pmpi_api_files = \ pinfo_get_f08.F90 \ pinfo_get_nkeys_f08.F90 \ pinfo_get_nthkey_f08.F90 \ + pinfo_get_string_f08.F90 \ pinfo_get_valuelen_f08.F90 \ pinfo_set_f08.F90 \ pinit_f08.F90 \ @@ -260,16 +279,23 @@ pmpi_api_files = \ piscatter_f08.F90 \ piscatterv_f08.F90 \ pisend_f08.F90 \ + pisendrecv_f08.F90 \ + pisendrecv_replace_f08.F90 \ pissend_f08.F90 \ pis_thread_main_f08.F90 \ plookup_name_f08.F90 \ pmprobe_f08.F90 \ pmrecv_f08.F90 \ pneighbor_allgather_f08.F90 \ + pneighbor_allgather_init_f08.F90 \ pneighbor_allgatherv_f08.F90 \ + pneighbor_allgatherv_init_f08.F90 \ pneighbor_alltoall_f08.F90 \ + pneighbor_alltoall_init_f08.F90 \ pneighbor_alltoallv_f08.F90 \ + pneighbor_alltoallv_init_f08.F90 \ pneighbor_alltoallw_f08.F90 \ + pneighbor_alltoallw_init_f08.F90 \ pop_commutative_f08.F90 \ pop_create_f08.F90 \ popen_port_f08.F90 \ @@ -287,9 +313,12 @@ pmpi_api_files = \ precv_f08.F90 \ precv_init_f08.F90 \ preduce_f08.F90 \ + preduce_init_f08.F90 \ preduce_local_f08.F90 \ preduce_scatter_f08.F90 \ + preduce_scatter_init_f08.F90 \ preduce_scatter_block_f08.F90 \ + preduce_scatter_block_init_f08.F90 \ pregister_datarep_f08.F90 \ prequest_free_f08.F90 \ prequest_get_status_f08.F90 \ @@ -299,8 +328,11 @@ pmpi_api_files = \ prsend_f08.F90 \ prsend_init_f08.F90 \ pscan_f08.F90 \ + pscan_init_f08.F90 \ pscatter_f08.F90 \ + pscatter_init_f08.F90 \ pscatterv_f08.F90 \ + pscatterv_init_f08.F90 \ psend_f08.F90 \ psend_init_f08.F90 \ psendrecv_f08.F90 \ @@ -424,11 +456,3 @@ $(nodist_libmpi_usempif08_pmpi_la_SOURCES): MAINTAINERCLEANFILES = $(nodist_libmpi_usempif08_pmpi_la_SOURCES) endif - -# Don't want these targets in here - -tags-recursive: -tags: -TAGS: -GTAGS: -ID: diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_create_from_group_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_create_from_group_f08.F90 new file mode 100644 index 00000000000..84098a44dc2 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_create_from_group_f08.F90 @@ -0,0 +1,29 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Comm_create_from_group_f08(group, stringtag, info, errhandler, newcomm, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Group, MPI_Errhandler, MPI_Info, MPI_Comm + use :: ompi_mpifh_bindings, only : ompi_comm_create_from_group_f + implicit none + TYPE(MPI_Group), INTENT(IN) :: group + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newcomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_comm_create_from_group_f(group%MPI_VAL, stringtag, info%MPI_VAL, errhandler%MPI_VAL, & + newcomm%MPI_VAL, c_ierror, len(stringtag)) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Comm_create_from_group_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pgroup_from_session_pset_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pgroup_from_session_pset_f08.F90 new file mode 100644 index 00000000000..a719b361302 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pgroup_from_session_pset_f08.F90 @@ -0,0 +1,29 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2021 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" +#include "mpi-f08-rename.h" + +subroutine PMPI_Group_from_session_pset_f08(session, pset_name, newgroup, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Group + use :: ompi_mpifh_bindings, only : ompi_group_from_session_pset_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Group), INTENT(OUT) :: newgroup + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_group_from_session_pset_f(session%MPI_VAL, pset_name, newgroup%MPI_VAL, c_ierror, len(pset_name)) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Group_from_session_pset_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pintercomm_create_from_groups_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pintercomm_create_from_groups_f08.F90 new file mode 100644 index 00000000000..668188d1adb --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pintercomm_create_from_groups_f08.F90 @@ -0,0 +1,35 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Intercomm_create_from_groups_f08(local_group, local_leader, remote_group, & + remote_leader, stringtag, info, errhandler, & + newintercomm, ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Group, MPI_Errhandler, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_intercomm_create_from_groups_f + implicit none + TYPE(MPI_Group), INTENT(IN) :: local_group, remote_group + INTEGER, INTENT(IN):: local_leader, remote_leader + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newintercomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_intercomm_create_from_groups_f(local_group%MPI_VAL, local_leader, & + remote_group%MPI_VAL, & + remote_leader, stringtag, info%MPI_VAL, & + errhandler%MPI_VAL, & + newintercomm%MPI_VAL, c_ierror, len(stringtag)) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Intercomm_create_from_groups_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_finalize_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_finalize_f08.F90 new file mode 100644 index 00000000000..01316dd79ca --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_finalize_f08.F90 @@ -0,0 +1,24 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_finalize_f08(session,ierror) + use :: mpi_f08_types, only : MPI_Session + use :: ompi_mpifh_bindings, only : ompi_session_finalize_f + implicit none + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_finalize_f(session%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_finalize_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_info_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_info_f08.F90 new file mode 100644 index 00000000000..bfe72d516e6 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_info_f08.F90 @@ -0,0 +1,25 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_get_info_f08(session, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_session_get_info_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_info_f(session%MPI_VAL, info%MPI_VAL, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_get_info_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_nth_pset_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_nth_pset_f08.F90 new file mode 100644 index 00000000000..249a25ddc1b --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_nth_pset_f08.F90 @@ -0,0 +1,27 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2020 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_get_nth_pset_f08(session, info, n, pset_len, pset_name, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_INFO_NULL + use :: ompi_mpifh_bindings, only : ompi_session_get_nth_pset_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, OPTIONAL, INTENT(IN) :: n + INTEGER, OPTIONAL, INTENT(INOUT) :: pset_len + CHARACTER(LEN=*), INTENT(OUT) :: pset_name + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_nth_pset_f(session%MPI_VAL, MPI_INFO_NULL%MPI_VAL, n, pset_len, pset_name, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_get_nth_pset_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_num_psets_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_num_psets_f08.F90 new file mode 100644 index 00000000000..01fd0dc9c1b --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_num_psets_f08.F90 @@ -0,0 +1,25 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_get_num_psets_f08(session, info, npset_names, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_INFO_NULL + use :: ompi_mpifh_bindings, only : ompi_session_get_num_psets_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: npset_names + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_num_psets_f(session%MPI_VAL, MPI_INFO_NULL%MPI_VAL, npset_names, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_get_num_psets_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_pset_info_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_pset_info_f08.F90 new file mode 100644 index 00000000000..0271b976f3a --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_pset_info_f08.F90 @@ -0,0 +1,26 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_get_pset_info_f08(session, pset_name, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_session_get_pset_info_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_pset_info_f(session%MPI_VAL, pset_name, info%MPI_VAL, c_ierror, len(pset_name)) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_get_pset_info_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_init_f08.F90 new file mode 100644 index 00000000000..555aa10e9dd --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_init_f08.F90 @@ -0,0 +1,26 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_init_f08(info,errhandler,session,ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_Errhandler + use :: ompi_mpifh_bindings, only : ompi_session_init_f + implicit none + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(OUT) :: errhandler + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_init_f(info%MPI_VAL,errhandler%MPI_VAL,session%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_init_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/psend_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/psend_init_f08.F90 index 80e9f9d4233..3c68b2c1cae 100644 --- a/ompi/mpi/fortran/use-mpi-f08/psend_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/psend_init_f08.F90 @@ -6,23 +6,26 @@ ! Copyright (c) 2018 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2020 Sandia National Laboratories. All rights reserved. +! Copyright (c) 2021 Bull S.A.S. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPI_Psend_init_f08(buf,partitions,count,datatype,dest,tag,comm,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request +subroutine MPI_Psend_init_f08(buf,partitions,count,datatype,dest,tag,comm,info,request,ierror) + use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request use :: ompi_mpifh_bindings, only : ompi_psend_init_f implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf INTEGER, INTENT(IN) :: partitions,count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompi_psend_init_f(buf,partitions,count,datatype%MPI_VAL,dest,tag,comm%MPI_VAL,request%MPI_VAL,c_ierror) + call ompi_psend_init_f(buf,partitions,count,datatype%MPI_VAL,dest,tag,comm%MPI_VAL, & + info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror end subroutine MPI_Psend_init_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/put_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/put_f08.F90 index efc0e86213c..b58e4d88623 100644 --- a/ompi/mpi/fortran/use-mpi-f08/put_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/put_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,7 +16,7 @@ subroutine MPI_Put_f08(origin_addr,origin_count,origin_datatype,target_rank,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Win, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_put_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp diff --git a/ompi/mpi/fortran/use-mpi-f08/raccumulate_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/raccumulate_f08.F90 index f612d3700e2..1935be341d4 100644 --- a/ompi/mpi/fortran/use-mpi-f08/raccumulate_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/raccumulate_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2014 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -17,7 +17,7 @@ subroutine MPI_Raccumulate_f08(origin_addr,origin_count,origin_datatype,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Win, MPI_Request, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_raccumulate_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp diff --git a/ompi/mpi/fortran/use-mpi-f08/recv_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/recv_init_f08.F90 index 121e97f097f..5207511c281 100644 --- a/ompi/mpi/fortran/use-mpi-f08/recv_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/recv_init_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_Recv_init_f08(buf,count,datatype,source,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_recv_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf INTEGER, INTENT(IN) :: count, source, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/reduce_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/reduce_init_f08.F90 similarity index 61% rename from ompi/mpiext/pcollreq/use-mpi-f08/reduce_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/reduce_init_f08.F90 index 24493a1b512..00dbda4965f 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/reduce_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/reduce_init_f08.F90 @@ -1,21 +1,23 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Reduce_init_f08(sendbuf,recvbuf,count,datatype,op,root,comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Reduce_init_f08(sendbuf,recvbuf,count,datatype,op,root,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_reduce_init_f + use :: ompi_mpifh_bindings, only : ompi_reduce_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count, root TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -25,8 +27,8 @@ subroutine MPIX_Reduce_init_f08(sendbuf,recvbuf,count,datatype,op,root,comm,info INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_reduce_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& + call ompi_reduce_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& op%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Reduce_init_f08 +end subroutine MPI_Reduce_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/reduce_scatter_block_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/reduce_scatter_block_init_f08.F90 similarity index 60% rename from ompi/mpiext/pcollreq/use-mpi-f08/reduce_scatter_block_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/reduce_scatter_block_init_f08.F90 index b9b27823ecd..460f2a158c2 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/reduce_scatter_block_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/reduce_scatter_block_init_f08.F90 @@ -1,21 +1,23 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Reduce_scatter_block_init_f08(sendbuf,recvbuf,recvcount,datatype,op,comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Reduce_scatter_block_init_f08(sendbuf,recvbuf,recvcount,datatype,op,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_reduce_scatter_block_init_f + use :: ompi_mpifh_bindings, only : ompi_reduce_scatter_block_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: recvcount TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -25,8 +27,8 @@ subroutine MPIX_Reduce_scatter_block_init_f08(sendbuf,recvbuf,recvcount,datatype INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_reduce_scatter_block_init_f(sendbuf,recvbuf,recvcount,& + call ompi_reduce_scatter_block_init_f(sendbuf,recvbuf,recvcount,& datatype%MPI_VAL,op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Reduce_scatter_block_init_f08 +end subroutine MPI_Reduce_scatter_block_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/reduce_scatter_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/reduce_scatter_init_f08.F90 similarity index 56% rename from ompi/mpiext/pcollreq/use-mpi-f08/reduce_scatter_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/reduce_scatter_init_f08.F90 index 66c0717cdd7..e7531662b47 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/reduce_scatter_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/reduce_scatter_init_f08.F90 @@ -1,22 +1,24 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Reduce_scatter_init_f08(sendbuf,recvbuf,recvcounts,datatype,op,comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Reduce_scatter_init_f08(sendbuf,recvbuf,recvcounts,datatype,op,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_reduce_scatter_init_f + use :: ompi_mpifh_bindings, only : ompi_reduce_scatter_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*) + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: recvcounts(*) TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op TYPE(MPI_Comm), INTENT(IN) :: comm @@ -25,8 +27,8 @@ subroutine MPIX_Reduce_scatter_init_f08(sendbuf,recvbuf,recvcounts,datatype,op,c INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_reduce_scatter_init_f(sendbuf,recvbuf,recvcounts,datatype%MPI_VAL,& + call ompi_reduce_scatter_init_f(sendbuf,recvbuf,recvcounts,datatype%MPI_VAL,& op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Reduce_scatter_init_f08 +end subroutine MPI_Reduce_scatter_init_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/rget_accumulate_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/rget_accumulate_f08.F90 index 831da177e1c..f442abf2358 100644 --- a/ompi/mpi/fortran/use-mpi-f08/rget_accumulate_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/rget_accumulate_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2014 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -18,10 +18,10 @@ subroutine MPI_Rget_accumulate_f08(origin_addr,origin_count,origin_datatype,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Win, MPI_Request, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_rget_accumulate_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, result_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: result_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: result_addr TYPE(MPI_Datatype), INTENT(IN) :: result_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp TYPE(MPI_Datatype), INTENT(IN) :: target_datatype diff --git a/ompi/mpi/fortran/use-mpi-f08/rget_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/rget_f08.F90 index fcc81c3d8d0..5e419fa8bfb 100644 --- a/ompi/mpi/fortran/use-mpi-f08/rget_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/rget_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2014 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,7 +16,7 @@ subroutine MPI_Rget_f08(origin_addr,origin_count,origin_datatype,target_rank,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Win, MPI_Request, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_rget_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp diff --git a/ompi/mpi/fortran/use-mpi-f08/rput_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/rput_f08.F90 index 9372e713388..efbca357754 100644 --- a/ompi/mpi/fortran/use-mpi-f08/rput_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/rput_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2010-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2014 Los Alamos National Security, LLC. ! All Rights reserved. ! Copyright (c) 2018-2020 Research Organization for Information Science @@ -16,7 +16,7 @@ subroutine MPI_Rput_f08(origin_addr,origin_count,origin_datatype,target_rank,& use :: mpi_f08_types, only : MPI_Datatype, MPI_Win, MPI_Request, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_rput_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: origin_addr + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: origin_addr INTEGER, INTENT(IN) :: origin_count, target_rank, target_count TYPE(MPI_Datatype), INTENT(IN) :: origin_datatype INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: target_disp diff --git a/ompi/mpi/fortran/use-mpi-f08/rsend_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/rsend_init_f08.F90 index ca87afc5cef..28a3e8a6d31 100644 --- a/ompi/mpi/fortran/use-mpi-f08/rsend_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/rsend_init_f08.F90 @@ -15,7 +15,7 @@ subroutine MPI_Rsend_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_rsend_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/scan_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/scan_init_f08.F90 similarity index 61% rename from ompi/mpiext/pcollreq/use-mpi-f08/scan_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/scan_init_f08.F90 index 6819722929c..0b7fe3b993d 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/scan_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/scan_init_f08.F90 @@ -1,21 +1,23 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Scan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) +#include "mpi-f08-rename.h" + +subroutine MPI_Scan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_scan_init_f + use :: ompi_mpifh_bindings, only : ompi_scan_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: count TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Op), INTENT(IN) :: op @@ -25,8 +27,8 @@ subroutine MPIX_Scan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,reques INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_scan_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& + call ompi_scan_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Scan_init_f08 +end subroutine MPI_Scan_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/scatter_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/scatter_init_f08.F90 similarity index 64% rename from ompi/mpiext/pcollreq/use-mpi-f08/scatter_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/scatter_init_f08.F90 index 372207fbaaf..e892d27dea8 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/scatter_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/scatter_init_f08.F90 @@ -1,22 +1,24 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Scatter_init_f08(sendbuf,sendcount,sendtype,recvbuf,& +#include "mpi-f08-rename.h" + +subroutine MPI_Scatter_init_f08(sendbuf,sendcount,sendtype,recvbuf,& recvcount,recvtype,root,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_scatter_init_f + use :: ompi_mpifh_bindings, only : ompi_scatter_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: sendcount, recvcount, root TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype @@ -26,8 +28,8 @@ subroutine MPIX_Scatter_init_f08(sendbuf,sendcount,sendtype,recvbuf,& INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_scatter_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcount,& + call ompi_scatter_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcount,& recvtype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Scatter_init_f08 +end subroutine MPI_Scatter_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/scatterv_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/scatterv_init_f08.F90 similarity index 61% rename from ompi/mpiext/pcollreq/use-mpi-f08/scatterv_init_f08.F90 rename to ompi/mpi/fortran/use-mpi-f08/scatterv_init_f08.F90 index fe4a4a9d1b9..bbf3c77cdd0 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/scatterv_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/scatterv_init_f08.F90 @@ -1,24 +1,26 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2022 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science +! Copyright (c) 2018-2021 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" -subroutine MPIX_Scatterv_init_f08(sendbuf,sendcounts,displs,sendtype,recvbuf,& +#include "mpi-f08-rename.h" + +subroutine MPI_Scatterv_init_f08(sendbuf,sendcounts,displs,sendtype,recvbuf,& recvcount,recvtype,root,comm,info,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_scatterv_init_f + use :: ompi_mpifh_bindings, only : ompi_scatterv_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: sendbuf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: recvbuf INTEGER, INTENT(IN) :: recvcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), displs(*) + INTEGER, INTENT(IN) OMPI_ASYNCHRONOUS :: sendcounts(*), displs(*) TYPE(MPI_Datatype), INTENT(IN) :: sendtype TYPE(MPI_Datatype), INTENT(IN) :: recvtype TYPE(MPI_Comm), INTENT(IN) :: comm @@ -27,8 +29,8 @@ subroutine MPIX_Scatterv_init_f08(sendbuf,sendcounts,displs,sendtype,recvbuf,& INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompix_scatterv_init_f(sendbuf,sendcounts,displs,sendtype%MPI_VAL,recvbuf,& + call ompi_scatterv_init_f(sendbuf,sendcounts,displs,sendtype%MPI_VAL,recvbuf,& recvcount,recvtype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) if (present(ierror)) ierror = c_ierror -end subroutine MPIX_Scatterv_init_f08 +end subroutine MPI_Scatterv_init_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/send_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/send_init_f08.F90 index bf7e9e1025f..769501bddb6 100644 --- a/ompi/mpi/fortran/use-mpi-f08/send_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/send_init_f08.F90 @@ -15,7 +15,7 @@ subroutine MPI_Send_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_send_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/session_finalize_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_finalize_f08.F90 new file mode 100644 index 00000000000..55bf9e4e479 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_finalize_f08.F90 @@ -0,0 +1,24 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_finalize_f08(session,ierror) + use :: mpi_f08_types, only : MPI_Session + use :: ompi_mpifh_bindings, only : ompi_session_finalize_f + implicit none + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_finalize_f(session%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_finalize_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/session_get_info_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_get_info_f08.F90 new file mode 100644 index 00000000000..c0e1eb16577 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_get_info_f08.F90 @@ -0,0 +1,25 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_get_info_f08(session, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_session_get_info_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_info_f(session%MPI_VAL, info%MPI_VAL, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_get_info_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/session_get_nth_pset_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_get_nth_pset_f08.F90 new file mode 100644 index 00000000000..fa41b9f2ac3 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_get_nth_pset_f08.F90 @@ -0,0 +1,27 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2020 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_get_nth_pset_f08(session, info, n, pset_len, pset_name, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_INFO_NULL + use :: ompi_mpifh_bindings, only : ompi_session_get_nth_pset_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, OPTIONAL, INTENT(IN) :: n + INTEGER, OPTIONAL, INTENT(INOUT) :: pset_len + CHARACTER(LEN=*), INTENT(OUT) :: pset_name + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_nth_pset_f(session%MPI_VAL, MPI_INFO_NULL%MPI_VAL, n, pset_len, pset_name, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_get_nth_pset_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/session_get_num_psets_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_get_num_psets_f08.F90 new file mode 100644 index 00000000000..b5d114efea8 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_get_num_psets_f08.F90 @@ -0,0 +1,25 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_get_num_psets_f08(session, info, npset_names, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_INFO_NULL + use :: ompi_mpifh_bindings, only : ompi_session_get_num_psets_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: npset_names + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_num_psets_f(session%MPI_VAL, MPI_INFO_NULL%MPI_VAL, npset_names, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_get_num_psets_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/session_get_pset_info_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_get_pset_info_f08.F90 new file mode 100644 index 00000000000..51383469b1c --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_get_pset_info_f08.F90 @@ -0,0 +1,26 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_get_pset_info_f08(session, pset_name, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_session_get_pset_info_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_pset_info_f(session%MPI_VAL, pset_name, info%MPI_VAL, c_ierror, len(pset_name)) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_get_pset_info_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/session_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_init_f08.F90 new file mode 100644 index 00000000000..b9eee1338b1 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_init_f08.F90 @@ -0,0 +1,30 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2021 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +#include "mpi-f08-rename.h" + +subroutine MPI_Session_init_f08(info,errhandler,session,ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_Errhandler + use :: ompi_mpifh_bindings, only : ompi_session_init_f + implicit none + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(OUT) :: errhandler + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_init_f(info%MPI_VAL,errhandler%MPI_VAL,session%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_init_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/ssend_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/ssend_init_f08.F90 index f2ea586fc26..e40b75c6108 100644 --- a/ompi/mpi/fortran/use-mpi-f08/ssend_init_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/ssend_init_f08.F90 @@ -15,7 +15,7 @@ subroutine MPI_Ssend_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request use :: ompi_mpifh_bindings, only : ompi_ssend_init_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS, INTENT(IN) :: buf INTEGER, INTENT(IN) :: count, dest, tag TYPE(MPI_Datatype), INTENT(IN) :: datatype TYPE(MPI_Comm), INTENT(IN) :: comm diff --git a/ompi/mpi/fortran/use-mpi-f08/win_attach_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/win_attach_f08.F90 index 43210f25efd..fb78b337158 100644 --- a/ompi/mpi/fortran/use-mpi-f08/win_attach_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/win_attach_f08.F90 @@ -13,7 +13,7 @@ subroutine MPI_Win_attach_f08(win,base,size,ierror) use :: mpi_f08_types, only : MPI_Win, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_win_attach_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/win_create_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/win_create_f08.F90 index 3e452edd539..2376f399ce5 100644 --- a/ompi/mpi/fortran/use-mpi-f08/win_create_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/win_create_f08.F90 @@ -16,7 +16,7 @@ subroutine MPI_Win_create_f08(base,size,disp_unit,info,comm,win,ierror) use :: mpi_f08_types, only : MPI_Info, MPI_Comm, MPI_Win, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_win_create_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS:: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size INTEGER, INTENT(IN) :: disp_unit TYPE(MPI_Info), INTENT(IN) :: info diff --git a/ompi/mpi/fortran/use-mpi-f08/win_detach_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/win_detach_f08.F90 index 54d09cb2ea4..a85f5b62523 100644 --- a/ompi/mpi/fortran/use-mpi-f08/win_detach_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/win_detach_f08.F90 @@ -13,7 +13,7 @@ subroutine MPI_Win_detach_f08(win,base,ierror) use :: mpi_f08_types, only : MPI_Win, MPI_ADDRESS_KIND use :: ompi_mpifh_bindings, only : ompi_win_detach_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: base TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-file-interfaces.h.in b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-file-interfaces.h.in index c00ebc5d8c7..a42973736ea 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-file-interfaces.h.in +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-file-interfaces.h.in @@ -1,6 +1,6 @@ ! -*- fortran -*- ! -! Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2007 Los Alamos National Security, LLC. All rights ! reserved. ! Copyright (c) 2019-2020 Research Organization for Information Science @@ -12,7 +12,7 @@ ! $HEADER$ -interface MPI_File_call_errhandler +interface subroutine MPI_File_call_errhandler(fh, errorcode, ierror) integer, intent(in) :: fh @@ -22,7 +22,7 @@ end subroutine MPI_File_call_errhandler end interface -interface MPI_File_close +interface subroutine MPI_File_close(fh, ierror) integer, intent(inout) :: fh @@ -31,7 +31,7 @@ end subroutine MPI_File_close end interface -interface MPI_File_create_errhandler +interface subroutine MPI_File_create_errhandler(function, errhandler, ierror) external :: function @@ -41,7 +41,7 @@ end subroutine MPI_File_create_errhandler end interface -interface MPI_File_delete +interface subroutine MPI_File_delete(filename, info, ierror) character(len=*), intent(in) :: filename @@ -51,7 +51,7 @@ end subroutine MPI_File_delete end interface -interface MPI_File_get_amode +interface subroutine MPI_File_get_amode(fh, amode, ierror) integer, intent(in) :: fh @@ -61,7 +61,7 @@ end subroutine MPI_File_get_amode end interface -interface MPI_File_get_atomicity +interface subroutine MPI_File_get_atomicity(fh, flag, ierror) integer, intent(in) :: fh @@ -71,7 +71,7 @@ end subroutine MPI_File_get_atomicity end interface -interface MPI_File_get_byte_offset +interface subroutine MPI_File_get_byte_offset(fh, offset, disp, ierror) include 'mpif-config.h' @@ -83,7 +83,7 @@ end subroutine MPI_File_get_byte_offset end interface -interface MPI_File_get_errhandler +interface subroutine MPI_File_get_errhandler(file, errhandler, ierror) integer, intent(in) :: file @@ -93,7 +93,7 @@ end subroutine MPI_File_get_errhandler end interface -interface MPI_File_get_group +interface subroutine MPI_File_get_group(fh, group, ierror) integer, intent(in) :: fh @@ -103,7 +103,7 @@ end subroutine MPI_File_get_group end interface -interface MPI_File_get_info +interface subroutine MPI_File_get_info(fh, info_used, ierror) integer, intent(in) :: fh @@ -113,7 +113,7 @@ end subroutine MPI_File_get_info end interface -interface MPI_File_get_position +interface subroutine MPI_File_get_position(fh, offset, ierror) include 'mpif-config.h' @@ -124,7 +124,7 @@ end subroutine MPI_File_get_position end interface -interface MPI_File_get_position_shared +interface subroutine MPI_File_get_position_shared(fh, offset, ierror) include 'mpif-config.h' @@ -135,7 +135,7 @@ end subroutine MPI_File_get_position_shared end interface -interface MPI_File_get_size +interface subroutine MPI_File_get_size(fh, size, ierror) include 'mpif-config.h' @@ -146,7 +146,7 @@ end subroutine MPI_File_get_size end interface -interface MPI_File_get_type_extent +interface subroutine MPI_File_get_type_extent(fh, datatype, extent, ierror) include 'mpif-config.h' @@ -158,7 +158,7 @@ end subroutine MPI_File_get_type_extent end interface -interface MPI_File_get_view +interface subroutine MPI_File_get_view(fh, disp, etype, filetype, datarep& , ierror) @@ -173,7 +173,7 @@ end subroutine MPI_File_get_view end interface -interface MPI_File_iread +interface subroutine MPI_File_iread(fh, buf, count, datatype, request& , ierror) @@ -188,7 +188,7 @@ end subroutine MPI_File_iread end interface -interface MPI_File_iread_all +interface subroutine MPI_File_iread_all(fh, buf, count, datatype, request& , ierror) @@ -203,7 +203,7 @@ end subroutine MPI_File_iread_all end interface -interface MPI_File_iread_at +interface subroutine MPI_File_iread_at(fh, offset, buf, count, datatype, & request, ierror) @@ -220,7 +220,7 @@ end subroutine MPI_File_iread_at end interface -interface MPI_File_iread_at_all +interface subroutine MPI_File_iread_at_all(fh, offset, buf, count, datatype, & request, ierror) @@ -237,7 +237,7 @@ end subroutine MPI_File_iread_at_all end interface -interface MPI_File_iread_shared +interface subroutine MPI_File_iread_shared(fh, buf, count, datatype, request& , ierror) @@ -252,7 +252,7 @@ end subroutine MPI_File_iread_shared end interface -interface MPI_File_iwrite +interface subroutine MPI_File_iwrite(fh, buf, count, datatype, request& , ierror) @@ -267,7 +267,7 @@ end subroutine MPI_File_iwrite end interface -interface MPI_File_iwrite_all +interface subroutine MPI_File_iwrite_all(fh, buf, count, datatype, request& , ierror) @@ -282,7 +282,7 @@ end subroutine MPI_File_iwrite_all end interface -interface MPI_File_iwrite_at +interface subroutine MPI_File_iwrite_at(fh, offset, buf, count, datatype, & request, ierror) @@ -299,7 +299,7 @@ end subroutine MPI_File_iwrite_at end interface -interface MPI_File_iwrite_at_all +interface subroutine MPI_File_iwrite_at_all(fh, offset, buf, count, datatype, & request, ierror) @@ -316,7 +316,7 @@ end subroutine MPI_File_iwrite_at_all end interface -interface MPI_File_iwrite_shared +interface subroutine MPI_File_iwrite_shared(fh, buf, count, datatype, request& , ierror) @@ -331,7 +331,7 @@ end subroutine MPI_File_iwrite_shared end interface -interface MPI_File_open +interface subroutine MPI_File_open(comm, filename, amode, info, fh& , ierror) @@ -345,7 +345,7 @@ end subroutine MPI_File_open end interface -interface MPI_File_preallocate +interface subroutine MPI_File_preallocate(fh, size, ierror) include 'mpif-config.h' @@ -356,7 +356,7 @@ end subroutine MPI_File_preallocate end interface -interface MPI_File_read +interface subroutine MPI_File_read(fh, buf, count, datatype, status& , ierror) @@ -372,7 +372,7 @@ end subroutine MPI_File_read end interface -interface MPI_File_read_all +interface subroutine MPI_File_read_all(fh, buf, count, datatype, status& , ierror) @@ -388,7 +388,7 @@ end subroutine MPI_File_read_all end interface -interface MPI_File_read_all_begin +interface subroutine MPI_File_read_all_begin(fh, buf, count, datatype, ierror) integer, intent(in) :: fh @@ -401,7 +401,7 @@ end subroutine MPI_File_read_all_begin end interface -interface MPI_File_read_all_end +interface subroutine MPI_File_read_all_end(fh, buf, status, ierror) include 'mpif-config.h' @@ -414,7 +414,7 @@ end subroutine MPI_File_read_all_end end interface -interface MPI_File_read_at +interface subroutine MPI_File_read_at(fh, offset, buf, count, datatype, & status, ierror) @@ -431,7 +431,7 @@ end subroutine MPI_File_read_at end interface -interface MPI_File_read_at_all +interface subroutine MPI_File_read_at_all(fh, offset, buf, count, datatype, & status, ierror) @@ -448,7 +448,7 @@ end subroutine MPI_File_read_at_all end interface -interface MPI_File_read_at_all_begin +interface subroutine MPI_File_read_at_all_begin(fh, offset, buf, count, datatype& , ierror) @@ -464,7 +464,7 @@ end subroutine MPI_File_read_at_all_begin end interface -interface MPI_File_read_at_all_end +interface subroutine MPI_File_read_at_all_end(fh, buf, status, ierror) include 'mpif-config.h' @@ -477,7 +477,7 @@ end subroutine MPI_File_read_at_all_end end interface -interface MPI_File_read_ordered +interface subroutine MPI_File_read_ordered(fh, buf, count, datatype, status& , ierror) @@ -493,7 +493,7 @@ end subroutine MPI_File_read_ordered end interface -interface MPI_File_read_ordered_begin +interface subroutine MPI_File_read_ordered_begin(fh, buf, count, datatype, ierror) integer, intent(in) :: fh @@ -506,7 +506,7 @@ end subroutine MPI_File_read_ordered_begin end interface -interface MPI_File_read_ordered_end +interface subroutine MPI_File_read_ordered_end(fh, buf, status, ierror) include 'mpif-config.h' @@ -519,7 +519,7 @@ end subroutine MPI_File_read_ordered_end end interface -interface MPI_File_read_shared +interface subroutine MPI_File_read_shared(fh, buf, count, datatype, status& , ierror) @@ -535,7 +535,7 @@ end subroutine MPI_File_read_shared end interface -interface MPI_File_seek +interface subroutine MPI_File_seek(fh, offset, whence, ierror) include 'mpif-config.h' @@ -547,7 +547,7 @@ end subroutine MPI_File_seek end interface -interface MPI_File_seek_shared +interface subroutine MPI_File_seek_shared(fh, offset, whence, ierror) include 'mpif-config.h' @@ -559,7 +559,7 @@ end subroutine MPI_File_seek_shared end interface -interface MPI_File_set_atomicity +interface subroutine MPI_File_set_atomicity(fh, flag, ierror) integer, intent(in) :: fh @@ -569,7 +569,7 @@ end subroutine MPI_File_set_atomicity end interface -interface MPI_File_set_errhandler +interface subroutine MPI_File_set_errhandler(file, errhandler, ierror) integer, intent(in) :: file @@ -579,7 +579,7 @@ end subroutine MPI_File_set_errhandler end interface -interface MPI_File_set_info +interface subroutine MPI_File_set_info(fh, info, ierror) integer, intent(in) :: fh @@ -589,7 +589,7 @@ end subroutine MPI_File_set_info end interface -interface MPI_File_set_size +interface subroutine MPI_File_set_size(fh, size, ierror) include 'mpif-config.h' @@ -600,7 +600,7 @@ end subroutine MPI_File_set_size end interface -interface MPI_File_set_view +interface subroutine MPI_File_set_view(fh, disp, etype, filetype, datarep, & info, ierror) @@ -616,7 +616,7 @@ end subroutine MPI_File_set_view end interface -interface MPI_File_sync +interface subroutine MPI_File_sync(fh, ierror) integer, intent(in) :: fh @@ -625,7 +625,7 @@ end subroutine MPI_File_sync end interface -interface MPI_File_write +interface subroutine MPI_File_write(fh, buf, count, datatype, status& , ierror) @@ -641,7 +641,7 @@ end subroutine MPI_File_write end interface -interface MPI_File_write_all +interface subroutine MPI_File_write_all(fh, buf, count, datatype, status& , ierror) @@ -657,7 +657,7 @@ end subroutine MPI_File_write_all end interface -interface MPI_File_write_all_begin +interface subroutine MPI_File_write_all_begin(fh, buf, count, datatype, ierror) integer, intent(in) :: fh @@ -670,7 +670,7 @@ end subroutine MPI_File_write_all_begin end interface -interface MPI_File_write_all_end +interface subroutine MPI_File_write_all_end(fh, buf, status, ierror) include 'mpif-config.h' @@ -683,7 +683,7 @@ end subroutine MPI_File_write_all_end end interface -interface MPI_File_write_at +interface subroutine MPI_File_write_at(fh, offset, buf, count, datatype, & status, ierror) @@ -700,7 +700,7 @@ end subroutine MPI_File_write_at end interface -interface MPI_File_write_at_all +interface subroutine MPI_File_write_at_all(fh, offset, buf, count, datatype, & status, ierror) @@ -717,7 +717,7 @@ end subroutine MPI_File_write_at_all end interface -interface MPI_File_write_at_all_begin +interface subroutine MPI_File_write_at_all_begin(fh, offset, buf, count, datatype& , ierror) @@ -733,7 +733,7 @@ end subroutine MPI_File_write_at_all_begin end interface -interface MPI_File_write_at_all_end +interface subroutine MPI_File_write_at_all_end(fh, buf, status, ierror) include 'mpif-config.h' @@ -746,7 +746,7 @@ end subroutine MPI_File_write_at_all_end end interface -interface MPI_File_write_ordered +interface subroutine MPI_File_write_ordered(fh, buf, count, datatype, status& , ierror) @@ -762,7 +762,7 @@ end subroutine MPI_File_write_ordered end interface -interface MPI_File_write_ordered_begin +interface subroutine MPI_File_write_ordered_begin(fh, buf, count, datatype, ierror) integer, intent(in) :: fh @@ -775,7 +775,7 @@ end subroutine MPI_File_write_ordered_begin end interface -interface MPI_File_write_ordered_end +interface subroutine MPI_File_write_ordered_end(fh, buf, status, ierror) include 'mpif-config.h' @@ -788,7 +788,7 @@ end subroutine MPI_File_write_ordered_end end interface -interface MPI_File_write_shared +interface subroutine MPI_File_write_shared(fh, buf, count, datatype, status& , ierror) diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in index 1e7b4861f38..ed878001c21 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in @@ -1,6 +1,6 @@ ! -*- fortran -*- ! -! Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2007 Los Alamos National Security, LLC. All rights ! reserved. ! Copyright (c) 2012 The University of Tennessee and The University @@ -11,6 +11,10 @@ ! reserved. ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! Copyright (c) 2021 Bull S.A.S. All rights reserved. +! Copyright (c) 2021 IBM Corporation. All rights reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -18,7 +22,7 @@ ! $HEADER$ -interface MPI_Abort +interface subroutine MPI_Abort(comm, errorcode, ierror) integer, intent(in) :: comm @@ -29,7 +33,7 @@ end subroutine MPI_Abort end interface -interface MPI_Accumulate +interface subroutine MPI_Accumulate(origin_addr, origin_count, origin_datatype, target_rank, target_disp, & target_count, target_datatype, op, win, ierror) @@ -50,7 +54,7 @@ end subroutine MPI_Accumulate end interface -interface MPI_Add_error_class +interface subroutine MPI_Add_error_class(errorclass, ierror) integer, intent(out) :: errorclass @@ -60,7 +64,7 @@ end subroutine MPI_Add_error_class end interface -interface MPI_Add_error_code +interface subroutine MPI_Add_error_code(errorclass, errorcode, ierror) integer, intent(in) :: errorclass @@ -71,7 +75,7 @@ end subroutine MPI_Add_error_code end interface -interface MPI_Add_error_string +interface subroutine MPI_Add_error_string(errorcode, string, ierror) integer, intent(in) :: errorcode @@ -82,7 +86,7 @@ end subroutine MPI_Add_error_string end interface -interface MPI_Aint_add +interface function MPI_Aint_add(base, diff) include 'mpif-config.h' @@ -94,7 +98,7 @@ end function MPI_Aint_add end interface -interface MPI_Aint_diff +interface function MPI_Aint_diff(addr1, addr2) include 'mpif-config.h' @@ -106,7 +110,7 @@ end function MPI_Aint_diff end interface -interface MPI_Allgather +interface subroutine MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, comm, ierror) @@ -125,7 +129,29 @@ end subroutine MPI_Allgather end interface -interface MPI_Allgatherv +interface + +subroutine MPI_Allgather_init(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, & + comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: recvbuf + integer, intent(in) :: recvcount + integer, intent(in) :: recvtype + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Allgather_init + +end interface + + +interface subroutine MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, & displs, recvtype, comm, ierror) @@ -145,6 +171,29 @@ end subroutine MPI_Allgatherv end interface +interface + +subroutine MPI_Allgatherv_init(sendbuf, sendcount, sendtype, recvbuf, recvcounts, & + displs, recvtype, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, dimension(*), intent(in) :: recvcounts + integer, dimension(*), intent(in) :: displs + integer, intent(in) :: recvtype + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Allgatherv_init + +end interface + + +! This interface requires a name because there are multiple subroutines. interface MPI_Alloc_mem subroutine MPI_Alloc_mem(size, info, baseptr, ierror) @@ -167,7 +216,7 @@ end subroutine MPI_Alloc_mem_cptr end interface -interface MPI_Allreduce +interface subroutine MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, & comm, ierror) @@ -185,7 +234,27 @@ end subroutine MPI_Allreduce end interface -interface MPI_Alltoall +interface + +subroutine MPI_Allreduce_init(sendbuf, recvbuf, count, datatype, op, & + comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(in) :: op + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Allreduce_init + +end interface + + +interface subroutine MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, comm, ierror) @@ -204,7 +273,28 @@ end subroutine MPI_Alltoall end interface -interface MPI_Alltoallv +interface + +subroutine MPI_Alltoall_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, & + recvtype, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: recvcount + integer, intent(in) :: recvtype + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Alltoall_init + +end interface + + +interface subroutine MPI_Alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, & recvcounts, rdispls, recvtype, comm, ierror) @@ -225,7 +315,30 @@ end subroutine MPI_Alltoallv end interface -interface MPI_Alltoallw +interface + +subroutine MPI_Alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, recvbuf, & + recvcounts, rdispls, recvtype, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, dimension(*), intent(in) :: sendcounts + integer, dimension(*), intent(in) :: sdispls + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, dimension(*), intent(in) :: recvcounts + integer, dimension(*), intent(in) :: rdispls + integer, intent(in) :: recvtype + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Alltoallv_init + +end interface + + +interface subroutine MPI_Alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, & recvcounts, rdispls, recvtypes, comm, ierror) @@ -246,7 +359,30 @@ end subroutine MPI_Alltoallw end interface -interface MPI_Barrier +interface + +subroutine MPI_Alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, & + recvcounts, rdispls, recvtypes, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, dimension(*), intent(in) :: sendcounts + integer, dimension(*), intent(in) :: sdispls + integer, dimension(*), intent(in) :: sendtypes + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, dimension(*), intent(in) :: recvcounts + integer, dimension(*), intent(in) :: rdispls + integer, dimension(*), intent(in) :: recvtypes + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Alltoallw_init + +end interface + + +interface subroutine MPI_Barrier(comm, ierror) integer, intent(in) :: comm @@ -256,7 +392,19 @@ end subroutine MPI_Barrier end interface -interface MPI_Bcast +interface + +subroutine MPI_Barrier_init(comm, info, request, ierror) + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Barrier_init + +end interface + + +interface subroutine MPI_Bcast(buffer, count, datatype, root, comm& , ierror) @@ -272,7 +420,25 @@ end subroutine MPI_Bcast end interface -interface MPI_Bsend +interface + +subroutine MPI_Bcast_init(buffer, count, datatype, root, comm, & + info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buffer + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buffer + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(in) :: root + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Bcast_init + +end interface + + +interface subroutine MPI_Bsend(buf, count, datatype, dest, tag, & comm, ierror) @@ -289,7 +455,7 @@ end subroutine MPI_Bsend end interface -interface MPI_Bsend_init +interface subroutine MPI_Bsend_init(buf, count, datatype, dest, tag, & comm, request, ierror) @@ -307,7 +473,7 @@ end subroutine MPI_Bsend_init end interface -interface MPI_Buffer_attach +interface subroutine MPI_Buffer_attach(buffer, size, ierror) @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buffer @@ -319,7 +485,7 @@ end subroutine MPI_Buffer_attach end interface -interface MPI_Buffer_detach +interface subroutine MPI_Buffer_detach(buffer, size, ierror) @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buffer @@ -331,7 +497,7 @@ end subroutine MPI_Buffer_detach end interface -interface MPI_Cancel +interface subroutine MPI_Cancel(request, ierror) integer, intent(in) :: request @@ -341,7 +507,7 @@ end subroutine MPI_Cancel end interface -interface MPI_Cart_coords +interface subroutine MPI_Cart_coords(comm, rank, maxdims, coords, ierror) integer, intent(in) :: comm @@ -354,7 +520,7 @@ end subroutine MPI_Cart_coords end interface -interface MPI_Cart_create +interface subroutine MPI_Cart_create(old_comm, ndims, dims, periods, reorder, & comm_cart, ierror) @@ -370,7 +536,7 @@ end subroutine MPI_Cart_create end interface -interface MPI_Cart_get +interface subroutine MPI_Cart_get(comm, maxdims, dims, periods, coords& , ierror) @@ -385,7 +551,7 @@ end subroutine MPI_Cart_get end interface -interface MPI_Cart_map +interface subroutine MPI_Cart_map(comm, ndims, dims, periods, newrank& , ierror) @@ -400,7 +566,7 @@ end subroutine MPI_Cart_map end interface -interface MPI_Cart_rank +interface subroutine MPI_Cart_rank(comm, coords, rank, ierror) integer, intent(in) :: comm @@ -412,7 +578,7 @@ end subroutine MPI_Cart_rank end interface -interface MPI_Cart_shift +interface subroutine MPI_Cart_shift(comm, direction, disp, rank_source, rank_dest& , ierror) @@ -427,7 +593,7 @@ end subroutine MPI_Cart_shift end interface -interface MPI_Cart_sub +interface subroutine MPI_Cart_sub(comm, remain_dims, new_comm, ierror) integer, intent(in) :: comm @@ -439,7 +605,7 @@ end subroutine MPI_Cart_sub end interface -interface MPI_Cartdim_get +interface subroutine MPI_Cartdim_get(comm, ndims, ierror) integer, intent(in) :: comm @@ -450,7 +616,7 @@ end subroutine MPI_Cartdim_get end interface -interface MPI_Close_port +interface subroutine MPI_Close_port(port_name, ierror) character(len=*), intent(in) :: port_name @@ -460,7 +626,7 @@ end subroutine MPI_Close_port end interface -interface MPI_Comm_accept +interface subroutine MPI_Comm_accept(port_name, info, root, comm, newcomm& , ierror) @@ -475,7 +641,7 @@ end subroutine MPI_Comm_accept end interface -interface MPI_Comm_call_errhandler +interface subroutine MPI_Comm_call_errhandler(comm, errorcode, ierror) integer, intent(in) :: comm @@ -486,7 +652,7 @@ end subroutine MPI_Comm_call_errhandler end interface -interface MPI_Comm_compare +interface subroutine MPI_Comm_compare(comm1, comm2, result, ierror) integer, intent(in) :: comm1 @@ -498,7 +664,7 @@ end subroutine MPI_Comm_compare end interface -interface MPI_Comm_connect +interface subroutine MPI_Comm_connect(port_name, info, root, comm, newcomm& , ierror) @@ -513,7 +679,7 @@ end subroutine MPI_Comm_connect end interface -interface MPI_Comm_create +interface subroutine MPI_Comm_create(comm, group, newcomm, ierror) integer, intent(in) :: comm @@ -525,7 +691,7 @@ end subroutine MPI_Comm_create end interface -interface MPI_Comm_create_errhandler +interface subroutine MPI_Comm_create_errhandler(function, errhandler, ierror) external :: function @@ -535,8 +701,20 @@ end subroutine MPI_Comm_create_errhandler end interface +interface MPI_Comm_create_from_group + +subroutine MPI_Comm_create_from_group(group, stringtag, info, errhandler, newcomm, ierror) + integer, INTENT(IN) :: group + CHARACTER(LEN=*), INTENT(IN) :: stringtag + integer, INTENT(IN) :: info + integer, INTENT(IN) :: errhandler + integer, INTENT(OUT) :: newcomm + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Comm_create_from_group -interface MPI_Comm_create_group +end interface + +interface subroutine MPI_Comm_create_group(comm, group, tag, newcomm, ierror) integer, intent(in) :: comm @@ -549,7 +727,7 @@ end subroutine MPI_Comm_create_group end interface -interface MPI_Comm_create_keyval +interface subroutine MPI_Comm_create_keyval(comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierror) include 'mpif-config.h' @@ -563,7 +741,7 @@ end subroutine MPI_Comm_create_keyval end interface -interface MPI_Comm_delete_attr +interface subroutine MPI_Comm_delete_attr(comm, comm_keyval, ierror) integer, intent(in) :: comm @@ -574,7 +752,7 @@ end subroutine MPI_Comm_delete_attr end interface -interface MPI_Comm_disconnect +interface subroutine MPI_Comm_disconnect(comm, ierror) integer, intent(inout) :: comm @@ -584,7 +762,7 @@ end subroutine MPI_Comm_disconnect end interface -interface MPI_Comm_dup +interface subroutine MPI_Comm_dup(comm, newcomm, ierror) integer, intent(in) :: comm @@ -595,7 +773,7 @@ end subroutine MPI_Comm_dup end interface -interface MPI_Comm_dup_with_info +interface subroutine MPI_Comm_dup_with_info(comm, info, newcomm, ierror) integer, intent(in) :: comm @@ -607,7 +785,7 @@ end subroutine MPI_Comm_dup_with_info end interface -interface MPI_Comm_free +interface subroutine MPI_Comm_free(comm, ierror) integer, intent(inout) :: comm @@ -617,7 +795,7 @@ end subroutine MPI_Comm_free end interface -interface MPI_Comm_free_keyval +interface subroutine MPI_Comm_free_keyval(comm_keyval, ierror) integer, intent(inout) :: comm_keyval @@ -627,7 +805,7 @@ end subroutine MPI_Comm_free_keyval end interface -interface MPI_Comm_get_attr +interface subroutine MPI_Comm_get_attr(comm, comm_keyval, attribute_val, flag, ierror) include 'mpif-config.h' @@ -641,7 +819,7 @@ end subroutine MPI_Comm_get_attr end interface -interface MPI_Comm_get_errhandler +interface subroutine MPI_Comm_get_errhandler(comm, erhandler, ierror) integer, intent(in) :: comm @@ -652,7 +830,7 @@ end subroutine MPI_Comm_get_errhandler end interface -interface MPI_Comm_get_info +interface subroutine MPI_Comm_get_info(comm, info_used, ierror) integer, intent(in) :: comm @@ -663,7 +841,7 @@ end subroutine MPI_Comm_get_info end interface -interface MPI_Comm_get_name +interface subroutine MPI_Comm_get_name(comm, comm_name, resultlen, ierror) integer, intent(in) :: comm @@ -675,7 +853,7 @@ end subroutine MPI_Comm_get_name end interface -interface MPI_Comm_get_parent +interface subroutine MPI_Comm_get_parent(parent, ierror) integer, intent(out) :: parent @@ -685,7 +863,7 @@ end subroutine MPI_Comm_get_parent end interface -interface MPI_Comm_group +interface subroutine MPI_Comm_group(comm, group, ierror) integer, intent(in) :: comm @@ -696,7 +874,7 @@ end subroutine MPI_Comm_group end interface -interface MPI_Comm_idup +interface subroutine MPI_Comm_idup(comm, newcomm, request, ierror) integer, intent(in) :: comm @@ -707,8 +885,19 @@ end subroutine MPI_Comm_idup end interface +interface + +subroutine MPI_Comm_idup_with_info(comm, info, newcomm, request, ierror) + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: newcomm + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Comm_idup_with_info + +end interface -interface MPI_Comm_join +interface subroutine MPI_Comm_join(fd, intercomm, ierror) integer, intent(in) :: fd @@ -719,7 +908,7 @@ end subroutine MPI_Comm_join end interface -interface MPI_Comm_rank +interface subroutine MPI_Comm_rank(comm, rank, ierror) integer, intent(in) :: comm @@ -730,7 +919,7 @@ end subroutine MPI_Comm_rank end interface -interface MPI_Comm_remote_group +interface subroutine MPI_Comm_remote_group(comm, group, ierror) integer, intent(in) :: comm @@ -741,7 +930,7 @@ end subroutine MPI_Comm_remote_group end interface -interface MPI_Comm_remote_size +interface subroutine MPI_Comm_remote_size(comm, size, ierror) integer, intent(in) :: comm @@ -752,7 +941,7 @@ end subroutine MPI_Comm_remote_size end interface -interface MPI_Comm_set_attr +interface subroutine MPI_Comm_set_attr(comm, comm_keyval, attribute_val, ierror) include 'mpif-config.h' @@ -765,7 +954,7 @@ end subroutine MPI_Comm_set_attr end interface -interface MPI_Comm_set_errhandler +interface subroutine MPI_Comm_set_errhandler(comm, errhandler, ierror) integer, intent(in) :: comm @@ -776,7 +965,7 @@ end subroutine MPI_Comm_set_errhandler end interface -interface MPI_Comm_set_info +interface subroutine MPI_Comm_set_info(comm, info, ierror) include 'mpif-config.h' @@ -788,7 +977,7 @@ end subroutine MPI_Comm_set_info end interface -interface MPI_Comm_set_name +interface subroutine MPI_Comm_set_name(comm, comm_name, ierror) integer, intent(in) :: comm @@ -799,7 +988,7 @@ end subroutine MPI_Comm_set_name end interface -interface MPI_Comm_size +interface subroutine MPI_Comm_size(comm, size, ierror) integer, intent(in) :: comm @@ -810,7 +999,7 @@ end subroutine MPI_Comm_size end interface -interface MPI_Comm_spawn +interface subroutine MPI_Comm_spawn(command, argv, maxprocs, info, root, & comm, intercomm, array_of_errcodes, ierror) @@ -828,7 +1017,7 @@ end subroutine MPI_Comm_spawn end interface -interface MPI_Comm_spawn_multiple +interface subroutine MPI_Comm_spawn_multiple(count, array_of_commands, array_of_argv, array_of_maxprocs, array_of_info, & root, comm, intercomm, array_of_errcodes, ierror) @@ -847,7 +1036,7 @@ end subroutine MPI_Comm_spawn_multiple end interface -interface MPI_Comm_split +interface subroutine MPI_Comm_split(comm, color, key, newcomm, ierror) integer, intent(in) :: comm @@ -860,7 +1049,7 @@ end subroutine MPI_Comm_split end interface -interface MPI_Comm_split_type +interface subroutine MPI_Comm_split_type(comm, split_type, key, info, newcomm, ierror) integer, intent(in) :: comm @@ -874,7 +1063,7 @@ end subroutine MPI_Comm_split_type end interface -interface MPI_Comm_test_inter +interface subroutine MPI_Comm_test_inter(comm, flag, ierror) integer, intent(in) :: comm @@ -885,7 +1074,7 @@ end subroutine MPI_Comm_test_inter end interface -interface MPI_Compare_and_swap +interface subroutine MPI_Compare_and_swap(origin_addr, compare_addr, result_addr, & datatype, target_rank, target_disp, win, ierror) @@ -904,7 +1093,7 @@ end subroutine MPI_Compare_and_swap end interface -interface MPI_Dims_create +interface subroutine MPI_Dims_create(nnodes, ndims, dims, ierror) integer, intent(in) :: nnodes @@ -916,7 +1105,7 @@ end subroutine MPI_Dims_create end interface -interface MPI_Dist_graph_create +interface subroutine MPI_Dist_graph_create(comm_old,n,sources,degrees,destinations,weights, & info,reorder,comm_dist_graph,ierror) @@ -932,7 +1121,7 @@ end subroutine MPI_Dist_graph_create end interface -interface MPI_Dist_graph_create_adjacent +interface subroutine MPI_Dist_graph_create_adjacent(comm_old,indegree,sources,sourceweights, & outdegree,destinations,destweights,info,reorder, & @@ -950,7 +1139,7 @@ end subroutine MPI_Dist_graph_create_adjacent end interface -interface MPI_Dist_graph_neighbors +interface subroutine MPI_Dist_graph_neighbors(comm,maxindegree,sources,sourceweights, & maxoutdegree,destinations,destweights,ierror) @@ -965,7 +1154,7 @@ end subroutine MPI_Dist_graph_neighbors end interface -interface MPI_Dist_graph_neighbors_count +interface subroutine MPI_Dist_graph_neighbors_count(comm,indegree,outdegree,weighted,ierror) implicit none @@ -978,7 +1167,7 @@ end subroutine MPI_Dist_graph_neighbors_count end interface -interface MPI_Errhandler_free +interface subroutine MPI_Errhandler_free(errhandler, ierror) integer, intent(inout) :: errhandler @@ -988,7 +1177,7 @@ end subroutine MPI_Errhandler_free end interface -interface MPI_Error_class +interface subroutine MPI_Error_class(errorcode, errorclass, ierror) integer, intent(in) :: errorcode @@ -999,7 +1188,7 @@ end subroutine MPI_Error_class end interface -interface MPI_Error_string +interface subroutine MPI_Error_string(errorcode, string, resultlen, ierror) integer, intent(in) :: errorcode @@ -1011,7 +1200,7 @@ end subroutine MPI_Error_string end interface -interface MPI_Exscan +interface subroutine MPI_Exscan(sendbuf, recvbuf, count, datatype, op, & comm, ierror) @@ -1029,7 +1218,27 @@ end subroutine MPI_Exscan end interface -interface MPI_F_sync_reg +interface + +subroutine MPI_Exscan_init(sendbuf, recvbuf, count, datatype, op, & + comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(in) :: op + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Exscan_init + +end interface + + +interface subroutine MPI_F_sync_reg(buf) @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf @@ -1039,7 +1248,7 @@ end subroutine MPI_F_sync_reg end interface -interface MPI_Fetch_and_op +interface subroutine MPI_Fetch_and_op(origin_addr, result_addr, datatype, target_rank,& target_disp, op, win, ierror) @@ -1059,7 +1268,7 @@ end subroutine MPI_Fetch_and_op end interface -interface MPI_Finalize +interface subroutine MPI_Finalize(ierror) integer, intent(out) :: ierror @@ -1068,7 +1277,7 @@ end subroutine MPI_Finalize end interface -interface MPI_Finalized +interface subroutine MPI_Finalized(flag, ierror) logical, intent(out) :: flag @@ -1078,7 +1287,7 @@ end subroutine MPI_Finalized end interface -interface MPI_Free_mem +interface subroutine MPI_Free_mem(base, ierror) @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base @@ -1089,7 +1298,7 @@ end subroutine MPI_Free_mem end interface -interface MPI_Gather +interface subroutine MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, root, comm, ierror) @@ -1109,7 +1318,29 @@ end subroutine MPI_Gather end interface -interface MPI_Gatherv +interface + +subroutine MPI_Gather_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, & + recvtype, root, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: recvcount + integer, intent(in) :: recvtype + integer, intent(in) :: root + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Gather_init + +end interface + + +interface subroutine MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, & displs, recvtype, root, comm, ierror) @@ -1130,7 +1361,30 @@ end subroutine MPI_Gatherv end interface -interface MPI_Get +interface + +subroutine MPI_Gatherv_init(sendbuf, sendcount, sendtype, recvbuf, recvcounts, & + displs, recvtype, root, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, dimension(*), intent(in) :: recvcounts + integer, dimension(*), intent(in) :: displs + integer, intent(in) :: recvtype + integer, intent(in) :: root + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Gatherv_init + +end interface + + +interface subroutine MPI_Get(origin_addr, origin_count, origin_datatype, target_rank, target_disp, & target_count, target_datatype, win, ierror) @@ -1150,7 +1404,7 @@ end subroutine MPI_Get end interface -interface MPI_Get_accumulate +interface subroutine MPI_Get_accumulate(origin_addr, origin_count, origin_datatype,& result_addr, result_count, result_datatype,& @@ -1177,7 +1431,7 @@ end subroutine MPI_Get_accumulate end interface -interface MPI_Get_address +interface subroutine MPI_Get_address(location, address, ierror) include 'mpif-config.h' @@ -1190,7 +1444,7 @@ end subroutine MPI_Get_address end interface -interface MPI_Get_count +interface subroutine MPI_Get_count(status, datatype, count, ierror) include 'mpif-config.h' @@ -1203,7 +1457,7 @@ end subroutine MPI_Get_count end interface -interface MPI_Get_elements +interface subroutine MPI_Get_elements(status, datatype, count, ierror) include 'mpif-config.h' @@ -1216,7 +1470,7 @@ end subroutine MPI_Get_elements end interface -interface MPI_Get_elements_x +interface subroutine MPI_Get_elements_x(status, datatype, count, ierror) include 'mpif-config.h' @@ -1229,7 +1483,7 @@ end subroutine MPI_Get_elements_x end interface -interface MPI_Get_library_version +interface subroutine MPI_Get_library_version(version, resultlen, ierror) character(len=*), intent(out) :: version @@ -1240,7 +1494,7 @@ end subroutine MPI_Get_library_version end interface -interface MPI_Get_processor_name +interface subroutine MPI_Get_processor_name(name, resultlen, ierror) character(len=*), intent(out) :: name @@ -1251,7 +1505,7 @@ end subroutine MPI_Get_processor_name end interface -interface MPI_Get_version +interface subroutine MPI_Get_version(version, subversion, ierror) integer, intent(out) :: version @@ -1262,7 +1516,7 @@ end subroutine MPI_Get_version end interface -interface MPI_Graph_create +interface subroutine MPI_Graph_create(comm_old, nnodes, index, edges, reorder, & comm_graph, ierror) @@ -1278,7 +1532,7 @@ end subroutine MPI_Graph_create end interface -interface MPI_Graph_get +interface subroutine MPI_Graph_get(comm, maxindex, maxedges, index, edges& , ierror) @@ -1293,7 +1547,7 @@ end subroutine MPI_Graph_get end interface -interface MPI_Graph_map +interface subroutine MPI_Graph_map(comm, nnodes, index, edges, newrank& , ierror) @@ -1308,7 +1562,7 @@ end subroutine MPI_Graph_map end interface -interface MPI_Graph_neighbors +interface subroutine MPI_Graph_neighbors(comm, rank, maxneighbors, neighbors, ierror) integer, intent(in) :: comm @@ -1321,7 +1575,7 @@ end subroutine MPI_Graph_neighbors end interface -interface MPI_Graph_neighbors_count +interface subroutine MPI_Graph_neighbors_count(comm, rank, nneighbors, ierror) integer, intent(in) :: comm @@ -1333,7 +1587,7 @@ end subroutine MPI_Graph_neighbors_count end interface -interface MPI_Graphdims_get +interface subroutine MPI_Graphdims_get(comm, nnodes, nedges, ierror) integer, intent(in) :: comm @@ -1345,7 +1599,7 @@ end subroutine MPI_Graphdims_get end interface -interface MPI_Grequest_complete +interface subroutine MPI_Grequest_complete(request, ierror) integer, intent(in) :: request @@ -1355,7 +1609,7 @@ end subroutine MPI_Grequest_complete end interface -interface MPI_Grequest_start +interface subroutine MPI_Grequest_start(query_fn, free_fn, cancel_fn, extra_state, request& , ierror) @@ -1371,7 +1625,7 @@ end subroutine MPI_Grequest_start end interface -interface MPI_Group_compare +interface subroutine MPI_Group_compare(group1, group2, result, ierror) integer, intent(in) :: group1 @@ -1383,7 +1637,7 @@ end subroutine MPI_Group_compare end interface -interface MPI_Group_difference +interface subroutine MPI_Group_difference(group1, group2, newgroup, ierror) integer, intent(in) :: group1 @@ -1395,7 +1649,7 @@ end subroutine MPI_Group_difference end interface -interface MPI_Group_excl +interface subroutine MPI_Group_excl(group, n, ranks, newgroup, ierror) integer, intent(in) :: group @@ -1408,7 +1662,7 @@ end subroutine MPI_Group_excl end interface -interface MPI_Group_free +interface subroutine MPI_Group_free(group, ierror) integer, intent(inout) :: group @@ -1417,8 +1671,20 @@ end subroutine MPI_Group_free end interface +interface MPI_Group_from_session_pset + +subroutine MPI_Group_from_session_pset(session, pset_name, newgroup, ierror) + implicit none + integer, INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + integer, INTENT(OUT) :: newgroup + INTEGER, INTENT(OUT) :: ierror + integer :: c_ierror +end subroutine MPI_Group_from_session_pset + +end interface -interface MPI_Group_incl +interface subroutine MPI_Group_incl(group, n, ranks, newgroup, ierror) integer, intent(in) :: group @@ -1431,7 +1697,7 @@ end subroutine MPI_Group_incl end interface -interface MPI_Group_intersection +interface subroutine MPI_Group_intersection(group1, group2, newgroup, ierror) integer, intent(in) :: group1 @@ -1443,7 +1709,7 @@ end subroutine MPI_Group_intersection end interface -interface MPI_Group_range_excl +interface subroutine MPI_Group_range_excl(group, n, ranges, newgroup, ierror) integer, intent(in) :: group @@ -1456,7 +1722,7 @@ end subroutine MPI_Group_range_excl end interface -interface MPI_Group_range_incl +interface subroutine MPI_Group_range_incl(group, n, ranges, newgroup, ierror) integer, intent(in) :: group @@ -1469,7 +1735,7 @@ end subroutine MPI_Group_range_incl end interface -interface MPI_Group_rank +interface subroutine MPI_Group_rank(group, rank, ierror) integer, intent(in) :: group @@ -1480,7 +1746,7 @@ end subroutine MPI_Group_rank end interface -interface MPI_Group_size +interface subroutine MPI_Group_size(group, size, ierror) integer, intent(in) :: group @@ -1491,7 +1757,7 @@ end subroutine MPI_Group_size end interface -interface MPI_Group_translate_ranks +interface subroutine MPI_Group_translate_ranks(group1, n, ranks1, group2, ranks2& , ierror) @@ -1506,7 +1772,7 @@ end subroutine MPI_Group_translate_ranks end interface -interface MPI_Group_union +interface subroutine MPI_Group_union(group1, group2, newgroup, ierror) integer, intent(in) :: group1 @@ -1518,7 +1784,7 @@ end subroutine MPI_Group_union end interface -interface MPI_Iallgather +interface subroutine MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, comm, request, ierror) @@ -1538,7 +1804,7 @@ end subroutine MPI_Iallgather end interface -interface MPI_Iallgatherv +interface subroutine MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, & displs, recvtype, comm, request, ierror) @@ -1559,7 +1825,7 @@ end subroutine MPI_Iallgatherv end interface -interface MPI_Iallreduce +interface subroutine MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, & comm, request, ierror) @@ -1578,7 +1844,7 @@ end subroutine MPI_Iallreduce end interface -interface MPI_Ialltoall +interface subroutine MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, comm, request, ierror) @@ -1598,7 +1864,7 @@ end subroutine MPI_Ialltoall end interface -interface MPI_Ialltoallv +interface subroutine MPI_Ialltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, & recvcounts, rdispls, recvtype, comm, request, ierror) @@ -1620,7 +1886,7 @@ end subroutine MPI_Ialltoallv end interface -interface MPI_Ialltoallw +interface subroutine MPI_Ialltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, & recvcounts, rdispls, recvtypes, comm, request, ierror) @@ -1642,7 +1908,7 @@ end subroutine MPI_Ialltoallw end interface -interface MPI_Ibarrier +interface subroutine MPI_Ibarrier(comm, request, ierror) integer, intent(in) :: comm @@ -1653,7 +1919,7 @@ end subroutine MPI_Ibarrier end interface -interface MPI_Ibcast +interface subroutine MPI_Ibcast(buffer, count, datatype, root, comm& , request, ierror) @@ -1670,7 +1936,7 @@ end subroutine MPI_Ibcast end interface -interface MPI_Ibsend +interface subroutine MPI_Ibsend(buf, count, datatype, dest, tag, & comm, request, ierror) @@ -1688,7 +1954,7 @@ end subroutine MPI_Ibsend end interface -interface MPI_Iexscan +interface subroutine MPI_Iexscan(sendbuf, recvbuf, count, datatype, op, & comm, request, ierror) @@ -1707,7 +1973,7 @@ end subroutine MPI_Iexscan end interface -interface MPI_Igather +interface subroutine MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, root, comm, request, ierror) @@ -1728,7 +1994,7 @@ end subroutine MPI_Igather end interface -interface MPI_Igatherv +interface subroutine MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, & displs, recvtype, root, comm, request, ierror) @@ -1750,7 +2016,7 @@ end subroutine MPI_Igatherv end interface -interface MPI_Improbe +interface subroutine MPI_Improbe(source, tag, comm, flag, message, status, ierror) include 'mpif-config.h' @@ -1766,7 +2032,7 @@ end subroutine MPI_Improbe end interface -interface MPI_Imrecv +interface subroutine MPI_Imrecv(buf, count, datatype, message, request, ierror) @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf @@ -1781,7 +2047,7 @@ end subroutine MPI_Imrecv end interface -interface MPI_Ineighbor_allgather +interface subroutine MPI_Ineighbor_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, comm, request, ierror) @@ -1801,7 +2067,7 @@ end subroutine MPI_Ineighbor_allgather end interface -interface MPI_Ineighbor_allgatherv +interface subroutine MPI_Ineighbor_allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, & displs, recvtype, comm, request, ierror) @@ -1822,7 +2088,7 @@ end subroutine MPI_Ineighbor_allgatherv end interface -interface MPI_Ineighbor_alltoall +interface subroutine MPI_Ineighbor_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, comm, request, ierror) @@ -1842,7 +2108,7 @@ end subroutine MPI_Ineighbor_alltoall end interface -interface MPI_Ineighbor_alltoallv +interface subroutine MPI_Ineighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, & recvcounts, rdispls, recvtype, comm, request, ierror) @@ -1864,7 +2130,7 @@ end subroutine MPI_Ineighbor_alltoallv end interface -interface MPI_Ineighbor_alltoallw +interface subroutine MPI_Ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, & recvcounts, rdispls, recvtypes, comm, request, ierror) @@ -1887,7 +2153,7 @@ end subroutine MPI_Ineighbor_alltoallw end interface -interface MPI_Info_create +interface subroutine MPI_Info_create(info, ierror) integer, intent(out) :: info @@ -1897,7 +2163,7 @@ end subroutine MPI_Info_create end interface -interface MPI_Info_delete +interface subroutine MPI_Info_delete(info, key, ierror) integer, intent(in) :: info @@ -1908,7 +2174,7 @@ end subroutine MPI_Info_delete end interface -interface MPI_Info_dup +interface subroutine MPI_Info_dup(info, newinfo, ierror) integer, intent(in) :: info @@ -1919,7 +2185,7 @@ end subroutine MPI_Info_dup end interface -interface MPI_Info_free +interface subroutine MPI_Info_free(info, ierror) integer, intent(inout) :: info @@ -1929,7 +2195,7 @@ end subroutine MPI_Info_free end interface -interface MPI_Info_get +interface subroutine MPI_Info_get(info, key, valuelen, value, flag& , ierror) @@ -1944,7 +2210,7 @@ end subroutine MPI_Info_get end interface -interface MPI_Info_get_nkeys +interface subroutine MPI_Info_get_nkeys(info, nkeys, ierror) integer, intent(in) :: info @@ -1955,7 +2221,7 @@ end subroutine MPI_Info_get_nkeys end interface -interface MPI_Info_get_nthkey +interface subroutine MPI_Info_get_nthkey(info, n, key, ierror) integer, intent(in) :: info @@ -1966,8 +2232,21 @@ end subroutine MPI_Info_get_nthkey end interface +interface -interface MPI_Info_get_valuelen +subroutine MPI_Info_get_string(info, key, buflen, value, flag, ierror) + integer, intent(in) :: info + character(len=*), intent(in) :: key + integer, intent(inout) :: buflen + character(len=*), intent(out) :: value + logical, intent(out) :: flag + integer, intent(out) :: ierror +end subroutine MPI_Info_get_string + +end interface + + +interface subroutine MPI_Info_get_valuelen(info, key, valuelen, flag, ierror) integer, intent(in) :: info @@ -1980,7 +2259,7 @@ end subroutine MPI_Info_get_valuelen end interface -interface MPI_Info_set +interface subroutine MPI_Info_set(info, key, value, ierror) integer, intent(in) :: info @@ -1992,7 +2271,7 @@ end subroutine MPI_Info_set end interface -interface MPI_Init +interface subroutine MPI_Init(ierror) integer, intent(out) :: ierror @@ -2001,7 +2280,7 @@ end subroutine MPI_Init end interface -interface MPI_Init_thread +interface subroutine MPI_Init_thread(required, provided, ierror) integer, intent(in) :: required @@ -2012,7 +2291,7 @@ end subroutine MPI_Init_thread end interface -interface MPI_Initialized +interface subroutine MPI_Initialized(flag, ierror) logical, intent(out) :: flag @@ -2022,7 +2301,7 @@ end subroutine MPI_Initialized end interface -interface MPI_Intercomm_create +interface subroutine MPI_Intercomm_create(local_comm, local_leader, bridge_comm, remote_leader, tag, & newintercomm, ierror) @@ -2037,8 +2316,24 @@ end subroutine MPI_Intercomm_create end interface +interface MPI_Intercomm_create_from_groups + +subroutine MPI_Intercomm_create_from_groups(local_group, local_leader, remote_group, & + remote_leader, stringtag, info, errhandler, & + newintercomm, ierror) + implicit none + integer, INTENT(IN) :: local_group, remote_group + integer, INTENT(IN):: local_leader, remote_leader + CHARACTER(LEN=*), INTENT(IN) :: stringtag + integer, INTENT(IN) :: info + integer, INTENT(IN) :: errhandler + integer, INTENT(OUT) :: newintercomm + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Intercomm_create_from_groups + +end interface -interface MPI_Intercomm_merge +interface subroutine MPI_Intercomm_merge(intercomm, high, newintercomm, ierror) integer, intent(in) :: intercomm @@ -2050,7 +2345,7 @@ end subroutine MPI_Intercomm_merge end interface -interface MPI_Iprobe +interface subroutine MPI_Iprobe(source, tag, comm, flag, status& , ierror) @@ -2066,7 +2361,7 @@ end subroutine MPI_Iprobe end interface -interface MPI_Irecv +interface subroutine MPI_Irecv(buf, count, datatype, source, tag, & comm, request, ierror) @@ -2084,7 +2379,7 @@ end subroutine MPI_Irecv end interface -interface MPI_Ireduce +interface subroutine MPI_Ireduce(sendbuf, recvbuf, count, datatype, op, & root, comm, request, ierror) @@ -2104,7 +2399,7 @@ end subroutine MPI_Ireduce end interface -interface MPI_Ireduce_scatter +interface subroutine MPI_Ireduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, & comm, request, ierror) @@ -2123,7 +2418,7 @@ end subroutine MPI_Ireduce_scatter end interface -interface MPI_Ireduce_scatter_block +interface subroutine MPI_Ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, & comm, request, ierror) @@ -2142,7 +2437,7 @@ end subroutine MPI_Ireduce_scatter_block end interface -interface MPI_Irsend +interface subroutine MPI_Irsend(buf, count, datatype, dest, tag, & comm, request, ierror) @@ -2160,7 +2455,7 @@ end subroutine MPI_Irsend end interface -interface MPI_Is_thread_main +interface subroutine MPI_Is_thread_main(flag, ierror) logical, intent(out) :: flag @@ -2170,7 +2465,7 @@ end subroutine MPI_Is_thread_main end interface -interface MPI_Iscan +interface subroutine MPI_Iscan(sendbuf, recvbuf, count, datatype, op, & comm, request, ierror) @@ -2189,7 +2484,7 @@ end subroutine MPI_Iscan end interface -interface MPI_Iscatter +interface subroutine MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, root, comm, request, ierror) @@ -2210,7 +2505,7 @@ end subroutine MPI_Iscatter end interface -interface MPI_Iscatterv +interface subroutine MPI_Iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, & recvcount, recvtype, root, comm, request, ierror) @@ -2232,7 +2527,7 @@ end subroutine MPI_Iscatterv end interface -interface MPI_Isend +interface subroutine MPI_Isend(buf, count, datatype, dest, tag, & comm, request, ierror) @@ -2249,8 +2544,52 @@ end subroutine MPI_Isend end interface +interface + +subroutine MPI_Isendrecv(sendbuf, sendcount, sendtype, dest, sendtag, & + recvbuf, recvcount, recvtype, source, recvtag, comm, request, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + integer, intent(in) :: dest + integer, intent(in) :: sendtag + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: recvcount + integer, intent(in) :: recvtype + integer, intent(in) :: source + integer, intent(in) :: recvtag + integer, intent(in) :: comm + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Isendrecv + +end interface + +interface + +subroutine MPI_Isendrecv_replace(buf, count, datatype, dest, sendtag, & + source, recvtag, comm, request, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(in) :: dest + integer, intent(in) :: sendtag + integer, intent(in) :: source + integer, intent(in) :: recvtag + integer, intent(in) :: comm + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Isendrecv_replace + +end interface + -interface MPI_Issend +interface subroutine MPI_Issend(buf, count, datatype, dest, tag, & comm, request, ierror) @@ -2268,10 +2607,10 @@ end subroutine MPI_Issend end interface -interface MPI_Psend_init +interface subroutine MPI_Psend_init(buf, partitions, count, datatype, dest, tag, & - comm, request, ierror) + comm, info, request, ierror) @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf integer, intent(in) :: partitions @@ -2280,6 +2619,7 @@ subroutine MPI_Psend_init(buf, partitions, count, datatype, dest, tag, & integer, intent(in) :: dest integer, intent(in) :: tag integer, intent(in) :: comm + integer, intent(in) :: info integer, intent(out) :: request integer, intent(out) :: ierror end subroutine MPI_Psend_init @@ -2287,10 +2627,10 @@ end subroutine MPI_Psend_init end interface -interface MPI_Precv_init +interface subroutine MPI_Precv_init(buf, partitions, count, datatype, dest, tag, & - comm, request, ierror) + comm, info, request, ierror) @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf integer, intent(in) :: partitions @@ -2299,6 +2639,7 @@ subroutine MPI_Precv_init(buf, partitions, count, datatype, dest, tag, & integer, intent(in) :: dest integer, intent(in) :: tag integer, intent(in) :: comm + integer, intent(in) :: info integer, intent(out) :: request integer, intent(out) :: ierror end subroutine MPI_Precv_init @@ -2306,7 +2647,7 @@ end subroutine MPI_Precv_init end interface -interface MPI_Pready +interface subroutine MPI_Pready(partition, request, ierror) integer, intent(in) :: partition @@ -2317,7 +2658,7 @@ end subroutine MPI_Pready end interface -interface MPI_Pready_list +interface subroutine MPI_Pready_list(length, partitions, request, ierror) integer, intent(in) :: length @@ -2329,7 +2670,7 @@ end subroutine MPI_Pready_list end interface -interface MPI_Pready_range +interface subroutine MPI_Pready_range(partition_low, partition_high, request, ierror) integer, intent(in) :: partition_low @@ -2341,7 +2682,7 @@ end subroutine MPI_Pready_range end interface -interface MPI_Parrived +interface subroutine MPI_Parrived(request, partition, flag, ierror) integer, intent(in) :: request @@ -2353,7 +2694,7 @@ end subroutine MPI_Parrived end interface -interface MPI_Lookup_name +interface subroutine MPI_Lookup_name(service_name, info, port_name, ierror) character(len=*), intent(in) :: service_name @@ -2365,7 +2706,7 @@ end subroutine MPI_Lookup_name end interface -interface MPI_Mprobe +interface subroutine MPI_Mprobe(source, tag, comm, message, status, ierror) include 'mpif-config.h' @@ -2380,7 +2721,7 @@ end subroutine MPI_Mprobe end interface -interface MPI_Mrecv +interface subroutine MPI_Mrecv(buf, count, datatype, message, status, ierror) include 'mpif-config.h' @@ -2396,7 +2737,7 @@ end subroutine MPI_Mrecv end interface -interface MPI_Neighbor_allgather +interface subroutine MPI_Neighbor_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, comm, ierror) @@ -2415,7 +2756,28 @@ end subroutine MPI_Neighbor_allgather end interface -interface MPI_Neighbor_allgatherv +interface + +subroutine MPI_Neighbor_allgather_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, & + recvtype, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: recvcount + integer, intent(in) :: recvtype + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Neighbor_allgather_init + +end interface + + +interface subroutine MPI_Neighbor_allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, & displs, recvtype, comm, ierror) @@ -2435,7 +2797,29 @@ end subroutine MPI_Neighbor_allgatherv end interface -interface MPI_Neighbor_alltoall +interface + +subroutine MPI_Neighbor_allgatherv_init(sendbuf, sendcount, sendtype, recvbuf, recvcounts, & + displs, recvtype, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, dimension(*), intent(in) :: recvcounts + integer, dimension(*), intent(in) :: displs + integer, intent(in) :: recvtype + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Neighbor_allgatherv_init + +end interface + + +interface subroutine MPI_Neighbor_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, comm, ierror) @@ -2454,7 +2838,28 @@ end subroutine MPI_Neighbor_alltoall end interface -interface MPI_Neighbor_alltoallv +interface + +subroutine MPI_Neighbor_alltoall_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, & + recvtype, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: recvcount + integer, intent(in) :: recvtype + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Neighbor_alltoall_init + +end interface + + +interface subroutine MPI_Neighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, & recvcounts, rdispls, recvtype, comm, ierror) @@ -2475,7 +2880,30 @@ end subroutine MPI_Neighbor_alltoallv end interface -interface MPI_Neighbor_alltoallw +interface + +subroutine MPI_Neighbor_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, recvbuf, & + recvcounts, rdispls, recvtype, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, dimension(*), intent(in) :: sendcounts + integer, dimension(*), intent(in) :: sdispls + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, dimension(*), intent(in) :: recvcounts + integer, dimension(*), intent(in) :: rdispls + integer, intent(in) :: recvtype + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Neighbor_alltoallv_init + +end interface + + +interface subroutine MPI_Neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, & recvcounts, rdispls, recvtypes, comm, ierror) @@ -2497,7 +2925,31 @@ end subroutine MPI_Neighbor_alltoallw end interface -interface MPI_Op_commutative +interface + +subroutine MPI_Neighbor_alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, & + recvcounts, rdispls, recvtypes, comm, info, request, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, dimension(*), intent(in) :: sendcounts + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: sdispls + integer, dimension(*), intent(in) :: sendtypes + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, dimension(*), intent(in) :: recvcounts + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: rdispls + integer, dimension(*), intent(in) :: recvtypes + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Neighbor_alltoallw_init + +end interface + + +interface subroutine MPI_Op_commutative(op, commute, ierror) integer, intent(in) :: op @@ -2508,7 +2960,7 @@ end subroutine MPI_Op_commutative end interface -interface MPI_Op_create +interface subroutine MPI_Op_create(function, commute, op, ierror) external :: function @@ -2520,7 +2972,7 @@ end subroutine MPI_Op_create end interface -interface MPI_Op_free +interface subroutine MPI_Op_free(op, ierror) integer, intent(inout) :: op @@ -2530,7 +2982,7 @@ end subroutine MPI_Op_free end interface -interface MPI_Open_port +interface subroutine MPI_Open_port(info, port_name, ierror) integer, intent(in) :: info @@ -2541,7 +2993,7 @@ end subroutine MPI_Open_port end interface -interface MPI_Pack +interface subroutine MPI_Pack(inbuf, incount, datatype, outbuf, outsize, & position, comm, ierror) @@ -2560,7 +3012,7 @@ end subroutine MPI_Pack end interface -interface MPI_Pack_external +interface subroutine MPI_Pack_external(datarep, inbuf, incount, datatype, outbuf, & outsize, position, ierror) @@ -2580,7 +3032,7 @@ end subroutine MPI_Pack_external end interface -interface MPI_Pack_external_size +interface subroutine MPI_Pack_external_size(datarep, incount, datatype, size, ierror) include 'mpif-config.h' @@ -2594,7 +3046,7 @@ end subroutine MPI_Pack_external_size end interface -interface MPI_Pack_size +interface subroutine MPI_Pack_size(incount, datatype, comm, size, ierror) integer, intent(in) :: incount @@ -2607,7 +3059,7 @@ end subroutine MPI_Pack_size end interface -interface MPI_Pcontrol +interface subroutine MPI_Pcontrol(level) integer, intent(in) :: level @@ -2617,7 +3069,7 @@ end subroutine MPI_Pcontrol end interface -interface MPI_Probe +interface subroutine MPI_Probe(source, tag, comm, status, ierror) include 'mpif-config.h' @@ -2631,7 +3083,7 @@ end subroutine MPI_Probe end interface -interface MPI_Publish_name +interface subroutine MPI_Publish_name(service_name, info, port_name, ierror) character(len=*), intent(in) :: service_name @@ -2643,7 +3095,7 @@ end subroutine MPI_Publish_name end interface -interface MPI_Put +interface subroutine MPI_Put(origin_addr, origin_count, origin_datatype, target_rank, target_disp, & target_count, target_datatype, win, ierror) @@ -2663,7 +3115,7 @@ end subroutine MPI_Put end interface -interface MPI_Query_thread +interface subroutine MPI_Query_thread(provided, ierror) integer, intent(out) :: provided @@ -2673,7 +3125,7 @@ end subroutine MPI_Query_thread end interface -interface MPI_Raccumulate +interface subroutine MPI_Raccumulate(origin_addr, origin_count, origin_datatype,& target_rank, target_disp, target_count, & @@ -2696,7 +3148,7 @@ end subroutine MPI_Raccumulate end interface -interface MPI_Recv +interface subroutine MPI_Recv(buf, count, datatype, source, tag, & comm, status, ierror) @@ -2715,7 +3167,7 @@ end subroutine MPI_Recv end interface -interface MPI_Recv_init +interface subroutine MPI_Recv_init(buf, count, datatype, source, tag, & comm, request, ierror) @@ -2733,7 +3185,7 @@ end subroutine MPI_Recv_init end interface -interface MPI_Reduce +interface subroutine MPI_Reduce(sendbuf, recvbuf, count, datatype, op, & root, comm, ierror) @@ -2752,7 +3204,28 @@ end subroutine MPI_Reduce end interface -interface MPI_Reduce_local +interface + +subroutine MPI_Reduce_init(sendbuf, recvbuf, count, datatype, op, & + root, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(in) :: op + integer, intent(in) :: root + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Reduce_init + +end interface + + +interface subroutine MPI_Reduce_local(inbuf, inout, count, datatype, op, & ierror) @@ -2769,7 +3242,7 @@ end subroutine MPI_Reduce_local end interface -interface MPI_Reduce_scatter +interface subroutine MPI_Reduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, & comm, ierror) @@ -2787,7 +3260,27 @@ end subroutine MPI_Reduce_scatter end interface -interface MPI_Reduce_scatter_block +interface + +subroutine MPI_Reduce_scatter_init(sendbuf, recvbuf, recvcounts, datatype, op, & + comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, dimension(*), intent(in) :: recvcounts + integer, intent(in) :: datatype + integer, intent(in) :: op + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Reduce_scatter_init + +end interface + + +interface subroutine MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, & comm, ierror) @@ -2805,7 +3298,27 @@ end subroutine MPI_Reduce_scatter_block end interface -interface MPI_Register_datarep +interface + +subroutine MPI_Reduce_scatter_block_init(sendbuf, recvbuf, recvcount, datatype, op, & + comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: recvcount + integer, intent(in) :: datatype + integer, intent(in) :: op + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Reduce_scatter_block_init + +end interface + + +interface subroutine MPI_Register_datarep(datarep, read_conversion_fn, write_conversion_fn, dtype_file_extent_fn, extra_state& , ierror) @@ -2821,7 +3334,7 @@ end subroutine MPI_Register_datarep end interface -interface MPI_Request_free +interface subroutine MPI_Request_free(request, ierror) integer, intent(inout) :: request @@ -2831,7 +3344,7 @@ end subroutine MPI_Request_free end interface -interface MPI_Request_get_status +interface subroutine MPI_Request_get_status(request, flag, status, ierror) include 'mpif-config.h' @@ -2844,7 +3357,7 @@ end subroutine MPI_Request_get_status end interface -interface MPI_Rget +interface subroutine MPI_Rget(origin_addr, origin_count, origin_datatype, & target_rank, target_disp, target_count, & @@ -2866,7 +3379,7 @@ end subroutine MPI_Rget end interface -interface MPI_Rget_accumulate +interface subroutine MPI_Rget_accumulate(origin_addr, origin_count, origin_datatype,& result_addr, result_count, result_datatype,& @@ -2894,7 +3407,7 @@ end subroutine MPI_Rget_accumulate end interface -interface MPI_Rput +interface subroutine MPI_Rput(origin_addr, origin_count, origin_datatype, & target_rank, target_disp, target_count, & @@ -2916,7 +3429,7 @@ end subroutine MPI_Rput end interface -interface MPI_Rsend +interface subroutine MPI_Rsend(ibuf, count, datatype, dest, tag, & comm, ierror) @@ -2933,7 +3446,7 @@ end subroutine MPI_Rsend end interface -interface MPI_Rsend_init +interface subroutine MPI_Rsend_init(buf, count, datatype, dest, tag, & comm, request, ierror) @@ -2951,7 +3464,7 @@ end subroutine MPI_Rsend_init end interface -interface MPI_Scan +interface subroutine MPI_Scan(sendbuf, recvbuf, count, datatype, op, & comm, ierror) @@ -2969,7 +3482,27 @@ end subroutine MPI_Scan end interface -interface MPI_Scatter +interface + +subroutine MPI_Scan_init(sendbuf, recvbuf, count, datatype, op, & + comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(in) :: op + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Scan_init + +end interface + + +interface subroutine MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, & recvtype, root, comm, ierror) @@ -2989,7 +3522,29 @@ end subroutine MPI_Scatter end interface -interface MPI_Scatterv +interface + +subroutine MPI_Scatter_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, & + recvtype, root, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, intent(in) :: sendcount + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: recvcount + integer, intent(in) :: recvtype + integer, intent(in) :: root + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Scatter_init + +end interface + + +interface subroutine MPI_Scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, & recvcount, recvtype, root, comm, ierror) @@ -3010,7 +3565,30 @@ end subroutine MPI_Scatterv end interface -interface MPI_Send +interface + +subroutine MPI_Scatterv_init(sendbuf, sendcounts, displs, sendtype, recvbuf, & + recvcount, recvtype, root, comm, info, request, ierror) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + integer, dimension(*), intent(in) :: sendcounts + integer, dimension(*), intent(in) :: displs + integer, intent(in) :: sendtype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + integer, intent(in) :: recvcount + integer, intent(in) :: recvtype + integer, intent(in) :: root + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Scatterv_init + +end interface + + +interface subroutine MPI_Send(buf, count, datatype, dest, tag, & comm, ierror) @@ -3027,7 +3605,7 @@ end subroutine MPI_Send end interface -interface MPI_Send_init +interface subroutine MPI_Send_init(buf, count, datatype, dest, tag, & comm, request, ierror) @@ -3045,7 +3623,7 @@ end subroutine MPI_Send_init end interface -interface MPI_Sendrecv +interface subroutine MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, & recvbuf, recvcount, recvtype, source, recvtag, comm, status, ierror) @@ -3070,7 +3648,7 @@ end subroutine MPI_Sendrecv end interface -interface MPI_Sendrecv_replace +interface subroutine MPI_Sendrecv_replace(buf, count, datatype, dest, sendtag, & source, recvtag, comm, status, ierror) @@ -3090,8 +3668,63 @@ end subroutine MPI_Sendrecv_replace end interface +interface MPI_Session_get_info +subroutine MPI_Session_get_info(session, info, ierror) + integer, INTENT(IN) :: session + integer, INTENT(OUT) :: info + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_info +end interface + +interface MPI_Session_get_nth_pset +subroutine MPI_Session_get_nth_pset(session, info, n, pset_len, pset_name, ierror) + integer, INTENT(IN) :: session + integer, INTENT(IN) :: info + INTEGER, INTENT(IN) :: n + INTEGER, INTENT(INOUT) :: pset_len + CHARACTER(LEN=*), INTENT(OUT) :: pset_name + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_nth_pset +end interface + +interface MPI_Session_get_nth_psetlen +subroutine MPI_Session_get_nth_psetlen(session, n, pset_len, ierror) + implicit none + integer, INTENT(IN) :: session + INTEGER, INTENT(IN) :: n + INTEGER, INTENT(OUT) :: pset_len + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_nth_psetlen +end interface + +interface MPI_Session_get_pset_info +subroutine MPI_Session_get_pset_info(session, pset_name, info, ierror) + integer, INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + integer, INTENT(OUT) :: info + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_pset_info +end interface + +interface MPI_Session_init +subroutine MPI_Session_init(info,errhandler,session,ierror) + integer, intent(IN) :: info + integer, intent(IN) :: errhandler + integer, intent(OUT) :: session + integer, intent(OUT) :: ierror +end subroutine MPI_Session_init +end interface + +interface + +subroutine MPI_Session_finalize(session,ierror) + integer, intent(inout) :: session + integer, intent(OUT) :: ierror +end subroutine MPI_Session_finalize + +end interface -interface MPI_Ssend +interface subroutine MPI_Ssend(buf, count, datatype, dest, tag, & comm, ierror) @@ -3108,7 +3741,7 @@ end subroutine MPI_Ssend end interface -interface MPI_Ssend_init +interface subroutine MPI_Ssend_init(buf, count, datatype, dest, tag, & comm, request, ierror) @@ -3126,7 +3759,7 @@ end subroutine MPI_Ssend_init end interface -interface MPI_Start +interface subroutine MPI_Start(request, ierror) integer, intent(inout) :: request @@ -3136,7 +3769,7 @@ end subroutine MPI_Start end interface -interface MPI_Startall +interface subroutine MPI_Startall(count, array_of_requests, ierror) integer, intent(in) :: count @@ -3147,7 +3780,7 @@ end subroutine MPI_Startall end interface -interface MPI_Status_set_cancelled +interface subroutine MPI_Status_set_cancelled(status, flag, ierror) include 'mpif-config.h' @@ -3159,7 +3792,7 @@ end subroutine MPI_Status_set_cancelled end interface -interface MPI_Status_set_elements +interface subroutine MPI_Status_set_elements(status, datatype, count, ierror) include 'mpif-config.h' @@ -3172,7 +3805,7 @@ end subroutine MPI_Status_set_elements end interface -interface MPI_Status_set_elements_x +interface subroutine MPI_Status_set_elements_x(status, datatype, count, ierror) include 'mpif-config.h' @@ -3185,7 +3818,7 @@ end subroutine MPI_Status_set_elements_x end interface -interface MPI_Test +interface subroutine MPI_Test(request, flag, status, ierror) include 'mpif-config.h' @@ -3198,7 +3831,7 @@ end subroutine MPI_Test end interface -interface MPI_Test_cancelled +interface subroutine MPI_Test_cancelled(status, flag, ierror) include 'mpif-config.h' @@ -3210,12 +3843,12 @@ end subroutine MPI_Test_cancelled end interface -interface MPI_Testall +interface subroutine MPI_Testall(count, array_of_requests, flag, array_of_statuses, ierror) include 'mpif-config.h' integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests logical, intent(out) :: flag integer, dimension(MPI_STATUS_SIZE, *), intent(out) :: array_of_statuses integer, intent(out) :: ierror @@ -3224,13 +3857,13 @@ end subroutine MPI_Testall end interface -interface MPI_Testany +interface subroutine MPI_Testany(count, array_of_requests, index, flag, status& , ierror) include 'mpif-config.h' integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, intent(out) :: index logical, intent(out) :: flag integer, dimension(MPI_STATUS_SIZE), intent(out) :: status @@ -3240,13 +3873,13 @@ end subroutine MPI_Testany end interface -interface MPI_Testsome +interface subroutine MPI_Testsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses& , ierror) include 'mpif-config.h' integer, intent(in) :: incount - integer, dimension(incount), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, intent(out) :: outcount integer, dimension(*), intent(out) :: array_of_indices integer, dimension(MPI_STATUS_SIZE, *), intent(out) :: array_of_statuses @@ -3256,7 +3889,7 @@ end subroutine MPI_Testsome end interface -interface MPI_Topo_test +interface subroutine MPI_Topo_test(comm, status, ierror) integer, intent(in) :: comm @@ -3267,7 +3900,7 @@ end subroutine MPI_Topo_test end interface -interface MPI_Type_commit +interface subroutine MPI_Type_commit(datatype, ierror) integer, intent(inout) :: datatype @@ -3277,7 +3910,7 @@ end subroutine MPI_Type_commit end interface -interface MPI_Type_contiguous +interface subroutine MPI_Type_contiguous(count, oldtype, newtype, ierror) integer, intent(in) :: count @@ -3289,7 +3922,7 @@ end subroutine MPI_Type_contiguous end interface -interface MPI_Type_create_darray +interface subroutine MPI_Type_create_darray(size, rank, ndims, gsize_array, distrib_array, & darg_array, psize_array, order, oldtype, newtype, ierror) @@ -3309,7 +3942,7 @@ end subroutine MPI_Type_create_darray end interface -interface MPI_Type_create_f90_complex +interface subroutine MPI_Type_create_f90_complex(p, r, newtype, ierror) integer, intent(in) :: p @@ -3321,7 +3954,7 @@ end subroutine MPI_Type_create_f90_complex end interface -interface MPI_Type_create_f90_integer +interface subroutine MPI_Type_create_f90_integer(r, newtype, ierror) integer, intent(in) :: r @@ -3332,7 +3965,7 @@ end subroutine MPI_Type_create_f90_integer end interface -interface MPI_Type_create_f90_real +interface subroutine MPI_Type_create_f90_real(p, r, newtype, ierror) integer, intent(in) :: p @@ -3344,7 +3977,7 @@ end subroutine MPI_Type_create_f90_real end interface -interface MPI_Type_create_hindexed +interface subroutine MPI_Type_create_hindexed(count, array_of_blocklengths, array_of_displacements, oldtype, newtype& , ierror) @@ -3360,7 +3993,7 @@ end subroutine MPI_Type_create_hindexed end interface -interface MPI_Type_create_hindexed_block +interface subroutine MPI_Type_create_hindexed_block(count, blocklength, array_of_displacements, oldtype, newtype& , ierror) @@ -3376,7 +4009,7 @@ end subroutine MPI_Type_create_hindexed_block end interface -interface MPI_Type_create_hvector +interface subroutine MPI_Type_create_hvector(count, blocklength, stride, oldtype, newtype& , ierror) @@ -3392,7 +4025,7 @@ end subroutine MPI_Type_create_hvector end interface -interface MPI_Type_create_indexed_block +interface subroutine MPI_Type_create_indexed_block(count, blocklength, array_of_displacements, oldtype, newtype& , ierror) @@ -3407,7 +4040,7 @@ end subroutine MPI_Type_create_indexed_block end interface -interface MPI_Type_create_keyval +interface subroutine MPI_Type_create_keyval(type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierror) include 'mpif-config.h' @@ -3421,7 +4054,7 @@ end subroutine MPI_Type_create_keyval end interface -interface MPI_Type_create_resized +interface subroutine MPI_Type_create_resized(oldtype, lb, extent, newtype, ierror) include 'mpif-config.h' @@ -3435,7 +4068,7 @@ end subroutine MPI_Type_create_resized end interface -interface MPI_Type_create_struct +interface subroutine MPI_Type_create_struct(count, array_of_block_lengths, array_of_displacements, array_of_types, newtype& , ierror) @@ -3451,7 +4084,7 @@ end subroutine MPI_Type_create_struct end interface -interface MPI_Type_create_subarray +interface subroutine MPI_Type_create_subarray(ndims, size_array, subsize_array, start_array, order, & oldtype, newtype, ierror) @@ -3468,7 +4101,7 @@ end subroutine MPI_Type_create_subarray end interface -interface MPI_Type_delete_attr +interface subroutine MPI_Type_delete_attr(datatype, type_keyval, ierror) integer, intent(in) :: datatype @@ -3479,7 +4112,7 @@ end subroutine MPI_Type_delete_attr end interface -interface MPI_Type_dup +interface subroutine MPI_Type_dup(datatype, newtype, ierror) integer, intent(in) :: datatype @@ -3490,7 +4123,7 @@ end subroutine MPI_Type_dup end interface -interface MPI_Type_free +interface subroutine MPI_Type_free(datatype, ierror) integer, intent(inout) :: datatype @@ -3500,7 +4133,7 @@ end subroutine MPI_Type_free end interface -interface MPI_Type_free_keyval +interface subroutine MPI_Type_free_keyval(type_keyval, ierror) integer, intent(inout) :: type_keyval @@ -3510,7 +4143,7 @@ end subroutine MPI_Type_free_keyval end interface -interface MPI_Type_get_attr +interface subroutine MPI_Type_get_attr(datatype, type_keyval, attribute_val, flag, ierror) include 'mpif-config.h' @@ -3524,7 +4157,7 @@ end subroutine MPI_Type_get_attr end interface -interface MPI_Type_get_contents +interface subroutine MPI_Type_get_contents(datatype, max_integers, max_addresses, max_datatypes, array_of_integers, & array_of_addresses, array_of_datatypes, ierror) @@ -3542,7 +4175,7 @@ end subroutine MPI_Type_get_contents end interface -interface MPI_Type_get_envelope +interface subroutine MPI_Type_get_envelope(datatype, num_integers, num_addresses, num_datatypes, combiner& , ierror) @@ -3557,7 +4190,7 @@ end subroutine MPI_Type_get_envelope end interface -interface MPI_Type_get_extent +interface subroutine MPI_Type_get_extent(datatype, lb, extent, ierror) include 'mpif-config.h' @@ -3570,7 +4203,7 @@ end subroutine MPI_Type_get_extent end interface -interface MPI_Type_get_extent_x +interface subroutine MPI_Type_get_extent_x(datatype, lb, extent, ierror) include 'mpif-config.h' @@ -3583,7 +4216,7 @@ end subroutine MPI_Type_get_extent_x end interface -interface MPI_Type_get_name +interface subroutine MPI_Type_get_name(datatype, type_name, resultlen, ierror) integer, intent(in) :: datatype @@ -3595,7 +4228,7 @@ end subroutine MPI_Type_get_name end interface -interface MPI_Type_get_true_extent +interface subroutine MPI_Type_get_true_extent(datatype, true_lb, true_extent, ierror) include 'mpif-config.h' @@ -3608,7 +4241,7 @@ end subroutine MPI_Type_get_true_extent end interface -interface MPI_Type_get_true_extent_x +interface subroutine MPI_Type_get_true_extent_x(datatype, true_lb, true_extent, ierror) include 'mpif-config.h' @@ -3621,7 +4254,7 @@ end subroutine MPI_Type_get_true_extent_x end interface -interface MPI_Type_indexed +interface subroutine MPI_Type_indexed(count, array_of_blocklengths, array_of_displacements, oldtype, newtype& , ierror) @@ -3636,7 +4269,7 @@ end subroutine MPI_Type_indexed end interface -interface MPI_Type_match_size +interface subroutine MPI_Type_match_size(typeclass, size, datatype, ierror) integer, intent(in) :: typeclass @@ -3648,7 +4281,7 @@ end subroutine MPI_Type_match_size end interface -interface MPI_Type_set_attr +interface subroutine MPI_Type_set_attr(datatype, type_keyval, attr_val, ierror) include 'mpif-config.h' @@ -3661,7 +4294,7 @@ end subroutine MPI_Type_set_attr end interface -interface MPI_Type_set_name +interface subroutine MPI_Type_set_name(datatype, type_name, ierror) integer, intent(in) :: datatype @@ -3672,7 +4305,7 @@ end subroutine MPI_Type_set_name end interface -interface MPI_Type_size +interface subroutine MPI_Type_size(datatype, size, ierror) integer, intent(in) :: datatype @@ -3683,7 +4316,7 @@ end subroutine MPI_Type_size end interface -interface MPI_Type_size_x +interface subroutine MPI_Type_size_x(datatype, size, ierror) include 'mpif-config.h' @@ -3695,7 +4328,7 @@ end subroutine MPI_Type_size_x end interface -interface MPI_Type_vector +interface subroutine MPI_Type_vector(count, blocklength, stride, oldtype, newtype& , ierror) @@ -3710,7 +4343,7 @@ end subroutine MPI_Type_vector end interface -interface MPI_Unpack +interface subroutine MPI_Unpack(inbuf, insize, position, outbuf, outcount, & datatype, comm, ierror) @@ -3729,7 +4362,7 @@ end subroutine MPI_Unpack end interface -interface MPI_Unpack_external +interface subroutine MPI_Unpack_external(datarep, inbuf, insize, position, outbuf, & outcount, datatype, ierror) @@ -3749,7 +4382,7 @@ end subroutine MPI_Unpack_external end interface -interface MPI_Unpublish_name +interface subroutine MPI_Unpublish_name(service_name, info, port_name, ierror) character(len=*), intent(in) :: service_name @@ -3761,7 +4394,7 @@ end subroutine MPI_Unpublish_name end interface -interface MPI_Wait +interface subroutine MPI_Wait(request, status, ierror) include 'mpif-config.h' @@ -3773,12 +4406,12 @@ end subroutine MPI_Wait end interface -interface MPI_Waitall +interface subroutine MPI_Waitall(count, array_of_requests, array_of_statuses, ierror) include 'mpif-config.h' integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, dimension(MPI_STATUS_SIZE, *), intent(out) :: array_of_statuses integer, intent(out) :: ierror end subroutine MPI_Waitall @@ -3786,12 +4419,12 @@ end subroutine MPI_Waitall end interface -interface MPI_Waitany +interface subroutine MPI_Waitany(count, array_of_requests, index, status, ierror) include 'mpif-config.h' integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, intent(out) :: index integer, dimension(MPI_STATUS_SIZE), intent(out) :: status integer, intent(out) :: ierror @@ -3800,13 +4433,13 @@ end subroutine MPI_Waitany end interface -interface MPI_Waitsome +interface subroutine MPI_Waitsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses& , ierror) include 'mpif-config.h' integer, intent(in) :: incount - integer, dimension(incount), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, intent(out) :: outcount integer, dimension(*), intent(out) :: array_of_indices integer, dimension(MPI_STATUS_SIZE, *), intent(out) :: array_of_statuses @@ -3816,6 +4449,7 @@ end subroutine MPI_Waitsome end interface +! This interface requires a name because there are multiple subroutines. interface MPI_Win_allocate subroutine MPI_Win_allocate(size, disp_unit, info, comm, & @@ -3842,6 +4476,7 @@ end subroutine MPI_Win_allocate_cptr end interface +! This interface requires a name because there are multiple subroutines. interface MPI_Win_allocate_shared subroutine MPI_Win_allocate_shared(size, disp_unit, info, comm, & @@ -3868,7 +4503,7 @@ end subroutine MPI_Win_allocate_shared_cptr end interface -interface MPI_Win_attach +interface subroutine MPI_Win_attach(win, base, size, ierror) include 'mpif-config.h' @@ -3882,7 +4517,7 @@ end subroutine MPI_Win_attach end interface -interface MPI_Win_call_errhandler +interface subroutine MPI_Win_call_errhandler(win, errorcode, ierror) integer, intent(in) :: win @@ -3893,7 +4528,7 @@ end subroutine MPI_Win_call_errhandler end interface -interface MPI_Win_complete +interface subroutine MPI_Win_complete(win, ierror) integer, intent(in) :: win @@ -3903,7 +4538,7 @@ end subroutine MPI_Win_complete end interface -interface MPI_Win_create +interface subroutine MPI_Win_create(base, size, disp_unit, info, comm, & win, ierror) @@ -3921,7 +4556,7 @@ end subroutine MPI_Win_create end interface -interface MPI_Win_create_dynamic +interface subroutine MPI_Win_create_dynamic(info, comm, & win, ierror) @@ -3935,7 +4570,7 @@ end subroutine MPI_Win_create_dynamic end interface -interface MPI_Win_create_errhandler +interface subroutine MPI_Win_create_errhandler(function, errhandler, ierror) external :: function @@ -3946,7 +4581,7 @@ end subroutine MPI_Win_create_errhandler end interface -interface MPI_Win_create_keyval +interface subroutine MPI_Win_create_keyval(win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierror) include 'mpif-config.h' @@ -3960,7 +4595,7 @@ end subroutine MPI_Win_create_keyval end interface -interface MPI_Win_delete_attr +interface subroutine MPI_Win_delete_attr(win, win_keyval, ierror) integer, intent(in) :: win @@ -3971,7 +4606,7 @@ end subroutine MPI_Win_delete_attr end interface -interface MPI_Win_detach +interface subroutine MPI_Win_detach(win, base, ierror) include 'mpif-config.h' @@ -3984,7 +4619,7 @@ end subroutine MPI_Win_detach end interface -interface MPI_Win_fence +interface subroutine MPI_Win_fence(assert, win, ierror) integer, intent(in) :: assert @@ -3995,7 +4630,7 @@ end subroutine MPI_Win_fence end interface -interface MPI_Win_flush +interface subroutine MPI_Win_flush(rank, win, ierror) integer, intent(in) :: rank @@ -4006,7 +4641,7 @@ end subroutine MPI_Win_flush end interface -interface MPI_Win_flush_all +interface subroutine MPI_Win_flush_all(win, ierror) integer, intent(in) :: win @@ -4016,7 +4651,7 @@ end subroutine MPI_Win_flush_all end interface -interface MPI_Win_flush_local +interface subroutine MPI_Win_flush_local(rank, win, ierror) integer, intent(in) :: rank @@ -4027,7 +4662,7 @@ end subroutine MPI_Win_flush_local end interface -interface MPI_Win_flush_local_all +interface subroutine MPI_Win_flush_local_all(win, ierror) integer, intent(in) :: win @@ -4037,7 +4672,7 @@ end subroutine MPI_Win_flush_local_all end interface -interface MPI_Win_free +interface subroutine MPI_Win_free(win, ierror) integer, intent(inout) :: win @@ -4047,7 +4682,7 @@ end subroutine MPI_Win_free end interface -interface MPI_Win_free_keyval +interface subroutine MPI_Win_free_keyval(win_keyval, ierror) integer, intent(inout) :: win_keyval @@ -4057,7 +4692,7 @@ end subroutine MPI_Win_free_keyval end interface -interface MPI_Win_get_attr +interface subroutine MPI_Win_get_attr(win, win_keyval, attribute_val, flag, ierror) include 'mpif-config.h' @@ -4071,7 +4706,7 @@ end subroutine MPI_Win_get_attr end interface -interface MPI_Win_get_errhandler +interface subroutine MPI_Win_get_errhandler(win, errhandler, ierror) integer, intent(in) :: win @@ -4082,7 +4717,7 @@ end subroutine MPI_Win_get_errhandler end interface -interface MPI_Win_get_group +interface subroutine MPI_Win_get_group(win, group, ierror) integer, intent(in) :: win @@ -4093,7 +4728,7 @@ end subroutine MPI_Win_get_group end interface -interface MPI_Win_get_info +interface subroutine MPI_Win_get_info(comm, info, ierror) include 'mpif-config.h' @@ -4105,7 +4740,7 @@ end subroutine MPI_Win_get_info end interface -interface MPI_Win_get_name +interface subroutine MPI_Win_get_name(win, win_name, resultlen, ierror) integer, intent(in) :: win @@ -4117,7 +4752,7 @@ end subroutine MPI_Win_get_name end interface -interface MPI_Win_lock +interface subroutine MPI_Win_lock(lock_type, rank, assert, win, ierror) integer, intent(in) :: lock_type @@ -4130,7 +4765,7 @@ end subroutine MPI_Win_lock end interface -interface MPI_Win_lock_all +interface subroutine MPI_Win_lock_all(assert, win, ierror) integer, intent(in) :: assert @@ -4141,7 +4776,7 @@ end subroutine MPI_Win_lock_all end interface -interface MPI_Win_post +interface subroutine MPI_Win_post(group, assert, win, ierror) integer, intent(in) :: group @@ -4153,7 +4788,7 @@ end subroutine MPI_Win_post end interface -interface MPI_Win_set_attr +interface subroutine MPI_Win_set_attr(win, win_keyval, attribute_val, ierror) include 'mpif-config.h' @@ -4166,7 +4801,7 @@ end subroutine MPI_Win_set_attr end interface -interface MPI_Win_set_errhandler +interface subroutine MPI_Win_set_errhandler(win, errhandler, ierror) integer, intent(in) :: win @@ -4177,7 +4812,7 @@ end subroutine MPI_Win_set_errhandler end interface -interface MPI_Win_set_info +interface subroutine MPI_Win_set_info(comm, info, ierror) include 'mpif-config.h' @@ -4189,7 +4824,7 @@ end subroutine MPI_Win_set_info end interface -interface MPI_Win_set_name +interface subroutine MPI_Win_set_name(win, win_name, ierror) integer, intent(in) :: win @@ -4200,6 +4835,7 @@ end subroutine MPI_Win_set_name end interface +! This interface requires a name because there are multiple subroutines. interface MPI_Win_shared_query subroutine MPI_Win_shared_query(win, rank, size, disp_unit, baseptr,& @@ -4228,7 +4864,7 @@ end subroutine MPI_Win_shared_query_cptr end interface -interface MPI_Win_start +interface subroutine MPI_Win_start(group, assert, win, ierror) integer, intent(in) :: group @@ -4240,7 +4876,7 @@ end subroutine MPI_Win_start end interface -interface MPI_Win_sync +interface subroutine MPI_Win_sync(win, ierror) integer, intent(in) :: win @@ -4250,7 +4886,7 @@ end subroutine MPI_Win_sync end interface -interface MPI_Win_test +interface subroutine MPI_Win_test(win, flag, ierror) integer, intent(in) :: win @@ -4261,7 +4897,7 @@ end subroutine MPI_Win_test end interface -interface MPI_Win_unlock +interface subroutine MPI_Win_unlock(rank, win, ierror) integer, intent(in) :: rank @@ -4272,7 +4908,7 @@ end subroutine MPI_Win_unlock end interface -interface MPI_Win_unlock_all +interface subroutine MPI_Win_unlock_all(win, ierror) integer, intent(in) :: win @@ -4282,7 +4918,7 @@ end subroutine MPI_Win_unlock_all end interface -interface MPI_Win_wait +interface subroutine MPI_Win_wait(win, ierror) integer, intent(in) :: win @@ -4292,7 +4928,7 @@ end subroutine MPI_Win_wait end interface -interface MPI_Wtick +interface function MPI_Wtick() double precision MPI_Wtick @@ -4301,7 +4937,7 @@ end function MPI_Wtick end interface -interface MPI_Wtime +interface function MPI_Wtime() double precision MPI_Wtime diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-removed-interfaces.h.in b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-removed-interfaces.h.in index e8b2c2e061e..4085afe222b 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-removed-interfaces.h.in +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-removed-interfaces.h.in @@ -1,6 +1,6 @@ ! -*- fortran -*- ! -! Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2007 Los Alamos National Security, LLC. All rights ! reserved. ! Copyright (c) 2012 The University of Tennessee and The University @@ -17,7 +17,7 @@ ! ! $HEADER$ -interface MPI_Address +interface subroutine MPI_Address(location, address, ierror) @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ location @@ -28,7 +28,7 @@ end subroutine MPI_Address end interface -interface MPI_Attr_delete +interface subroutine MPI_Attr_delete(comm, keyval, ierror) integer, intent(in) :: comm @@ -38,7 +38,7 @@ end subroutine MPI_Attr_delete end interface -interface MPI_Attr_get +interface subroutine MPI_Attr_get(comm, keyval, attribute_val, flag, ierror) integer, intent(in) :: comm @@ -50,7 +50,7 @@ end subroutine MPI_Attr_get end interface -interface MPI_Attr_put +interface subroutine MPI_Attr_put(comm, keyval, attribute_val, ierror) integer, intent(in) :: comm @@ -61,7 +61,7 @@ end subroutine MPI_Attr_put end interface -interface MPI_Errhandler_create +interface subroutine MPI_Errhandler_create(function, errhandler, ierror) external :: function @@ -71,7 +71,7 @@ end subroutine MPI_Errhandler_create end interface -interface MPI_Errhandler_get +interface subroutine MPI_Errhandler_get(comm, errhandler, ierror) integer, intent(in) :: comm @@ -81,7 +81,7 @@ end subroutine MPI_Errhandler_get end interface -interface MPI_Errhandler_set +interface subroutine MPI_Errhandler_set(comm, errhandler, ierror) integer, intent(in) :: comm @@ -91,7 +91,7 @@ end subroutine MPI_Errhandler_set end interface -interface MPI_Keyval_create +interface subroutine MPI_Keyval_create(copy_fn, delete_fn, keyval, extra_state, ierror) external :: copy_fn @@ -103,7 +103,7 @@ end subroutine MPI_Keyval_create end interface -interface MPI_Keyval_free +interface subroutine MPI_Keyval_free(keyval, ierror) integer, intent(inout) :: keyval @@ -112,7 +112,7 @@ end subroutine MPI_Keyval_free end interface -interface MPI_Type_extent +interface subroutine MPI_Type_extent(datatype, extent, ierror) integer, intent(in) :: datatype @@ -122,7 +122,7 @@ end subroutine MPI_Type_extent end interface -interface MPI_Type_hindexed +interface subroutine MPI_Type_hindexed(count, array_of_blocklengths, array_of_displacements, oldtype, newtype& , ierror) @@ -136,7 +136,7 @@ end subroutine MPI_Type_hindexed end interface -interface MPI_Type_hvector +interface subroutine MPI_Type_hvector(count, blocklength, stride, oldtype, newtype& , ierror) @@ -150,7 +150,7 @@ end subroutine MPI_Type_hvector end interface -interface MPI_Type_lb +interface subroutine MPI_Type_lb(datatype, lb, ierror) integer, intent(in) :: datatype @@ -160,7 +160,7 @@ end subroutine MPI_Type_lb end interface -interface MPI_Type_struct +interface subroutine MPI_Type_struct(count, array_of_blocklengths, array_of_displacements, array_of_types, newtype& , ierror) @@ -174,7 +174,7 @@ end subroutine MPI_Type_struct end interface -interface MPI_Type_ub +interface subroutine MPI_Type_ub(datatype, ub, ierror) integer, intent(in) :: datatype diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-status.h b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-status.h index bd5635700fe..783302c363c 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-status.h +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-status.h @@ -2,6 +2,7 @@ ! ! Copyright (c) 2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2021 Cisco Systems, Inc. All rights reserved ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -9,7 +10,7 @@ ! $HEADER$ -interface MPI_Status_f082f +interface subroutine MPI_Status_f082f(f08_status, f_status, ierror) use mpi_types @@ -22,7 +23,7 @@ end subroutine MPI_Status_f082f end interface -interface MPI_Status_f2f08 +interface subroutine MPI_Status_f2f08(f_status, f08_status, ierror) use mpi_types @@ -35,7 +36,7 @@ end subroutine MPI_Status_f2f08 end interface -interface PMPI_Status_f082f +interface subroutine PMPI_Status_f082f(f08_status, f_status, ierror) use mpi_types @@ -48,7 +49,7 @@ end subroutine PMPI_Status_f082f end interface -interface PMPI_Status_f2f08 +interface subroutine PMPI_Status_f2f08(f_status, f08_status, ierror) use mpi_types diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/pmpi-ignore-tkr-interfaces.h b/ompi/mpi/fortran/use-mpi-ignore-tkr/pmpi-ignore-tkr-interfaces.h index 157d0ad706e..510283c0c40 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/pmpi-ignore-tkr-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/pmpi-ignore-tkr-interfaces.h @@ -16,15 +16,23 @@ #define MPI_Aint_add PMPI_Aint_add #define MPI_Aint_diff PMPI_Aint_diff #define MPI_Allgather PMPI_Allgather +#define MPI_Allgather_init PMPI_Allgather_init #define MPI_Allgatherv PMPI_Allgatherv +#define MPI_Allgatherv_init PMPI_Allgatherv_init #define MPI_Alloc_mem PMPI_Alloc_mem #define MPI_Alloc_mem_cptr PMPI_Alloc_mem_cptr #define MPI_Allreduce PMPI_Allreduce +#define MPI_Allreduce_init PMPI_Allreduce_init #define MPI_Alltoall PMPI_Alltoall +#define MPI_Alltoall_init PMPI_Alltoall_init #define MPI_Alltoallv PMPI_Alltoallv +#define MPI_Alltoallv_init PMPI_Alltoallv_init #define MPI_Alltoallw PMPI_Alltoallw +#define MPI_Alltoallw_init PMPI_Alltoallw_init #define MPI_Barrier PMPI_Barrier +#define MPI_Barrier_init PMPI_Barrier_init #define MPI_Bcast PPMPI_Bcast +#define MPI_Bcast_init PPMPI_Bcast_init #define MPI_Bsend PMPI_Bsend #define MPI_Bsend_init PMPI_Bsend_init #define MPI_Buffer_attach PMPI_Buffer_attach @@ -45,6 +53,7 @@ #define MPI_Comm_connect PMPI_Comm_connect #define MPI_Comm_create PMPI_Comm_create #define MPI_Comm_create_errhandler PMPI_Comm_create_errhandler +#define MPI_Comm_create_from_group PMPI_Comm_create_from_group #define MPI_Comm_create_group PMPI_Comm_create_group #define MPI_Comm_create_keyval PMPI_Comm_create_keyval #define MPI_Comm_delete_attr PMPI_Comm_delete_attr @@ -60,6 +69,7 @@ #define MPI_Comm_get_parent PMPI_Comm_get_parent #define MPI_Comm_group PMPI_Comm_group #define MPI_Comm_idup PMPI_Comm_idup +#define MPI_Comm_idup_with_info PMPI_Comm_idup_with_info #define MPI_Comm_join PMPI_Comm_join #define MPI_Comm_rank PMPI_Comm_rank #define MPI_Comm_remote_group PMPI_Comm_remote_group @@ -84,13 +94,16 @@ #define MPI_Error_class PMPI_Error_class #define MPI_Error_string PMPI_Error_string #define MPI_Exscan PMPI_Exscan +#define MPI_Exscan_init PMPI_Exscan_init #define MPI_F_sync_reg PMPI_F_sync_reg #define MPI_Fetch_and_op PMPI_Fetch_and_op #define MPI_Finalize PMPI_Finalize #define MPI_Finalized PMPI_Finalized #define MPI_Free_mem PMPI_Free_mem #define MPI_Gather PMPI_Gather +#define MPI_Gather_init PMPI_Gather_init #define MPI_Gatherv PMPI_Gatherv +#define MPI_Gatherv_init PMPI_Gatherv_init #define MPI_Get PMPI_Get #define MPI_Get_accumulate PMPI_Get_accumulate #define MPI_Get_address PMPI_Get_address @@ -111,6 +124,7 @@ #define MPI_Group_compare PMPI_Group_compare #define MPI_Group_difference PMPI_Group_difference #define MPI_Group_excl PMPI_Group_excl +#define MPI_Group_from_session_pset PMPI_Group_from_session_pset #define MPI_Group_free PMPI_Group_free #define MPI_Group_incl PMPI_Group_incl #define MPI_Group_intersection PMPI_Group_intersection @@ -146,12 +160,14 @@ #define MPI_Info_get PMPI_Info_get #define MPI_Info_get_nkeys PMPI_Info_get_nkeys #define MPI_Info_get_nthkey PMPI_Info_get_nthkey +#define MPI_Info_get_string PMPI_Info_get_string #define MPI_Info_get_valuelen PMPI_Info_get_valuelen #define MPI_Info_set PMPI_Info_set #define MPI_Init PMPI_Init #define MPI_Init_thread PMPI_Init_thread #define MPI_Initialized PMPI_Initialized #define MPI_Intercomm_create PMPI_Intercomm_create +#define MPI_Intercomm_create_from_groups PMPI_Intercomm_create_from_groups #define MPI_Intercomm_merge PMPI_Intercomm_merge #define MPI_Iprobe PMPI_Iprobe #define MPI_Irecv PMPI_Irecv @@ -164,15 +180,22 @@ #define MPI_Iscatter PMPI_Iscatter #define MPI_Iscatterv PMPI_Iscatterv #define MPI_Isend PMPI_Isend +#define MPI_Isendrecv PMPI_Isendrecv +#define MPI_Isendrecv_replace PMPI_Isendrecv_replace #define MPI_Issend PMPI_Issend #define MPI_Lookup_name PMPI_Lookup_name #define MPI_Mprobe PMPI_Mprobe #define MPI_Mrecv PMPI_Mrecv #define MPI_Neighbor_allgather PMPI_Neighbor_allgather +#define MPI_Neighbor_allgather_init PMPI_Neighbor_allgather_init #define MPI_Neighbor_allgatherv PMPI_Neighbor_allgatherv +#define MPI_Neighbor_allgatherv_init PMPI_Neighbor_allgatherv_init #define MPI_Neighbor_alltoall PMPI_Neighbor_alltoall +#define MPI_Neighbor_alltoall_init PMPI_Neighbor_alltoall_init #define MPI_Neighbor_alltoallv PMPI_Neighbor_alltoallv +#define MPI_Neighbor_alltoallv_init PMPI_Neighbor_alltoallv_init #define MPI_Neighbor_alltoallw PMPI_Neighbor_alltoallw +#define MPI_Neighbor_alltoallw_init PMPI_Neighbor_alltoallw_init #define MPI_Op_commutative PMPI_Op_commutative #define MPI_Op_create PMPI_Op_create #define MPI_Op_free PMPI_Op_free @@ -196,9 +219,12 @@ #define MPI_Recv PMPI_Recv #define MPI_Recv_init PMPI_Recv_init #define MPI_Reduce PMPI_Reduce +#define MPI_Reduce_init PMPI_Reduce_init #define MPI_Reduce_local PMPI_Reduce_local #define MPI_Reduce_scatter PMPI_Reduce_scatter +#define MPI_Reduce_scatter_init PMPI_Reduce_scatter_init #define MPI_Reduce_scatter_block PMPI_Reduce_scatter_block +#define MPI_Reduce_scatter_block_init PMPI_Reduce_scatter_block_init #define MPI_Register_datarep PMPI_Register_datarep #define MPI_Request_free PMPI_Request_free #define MPI_Request_get_status PMPI_Request_get_status @@ -208,12 +234,21 @@ #define MPI_Rsend PMPI_Rsend #define MPI_Rsend_init PMPI_Rsend_init #define MPI_Scan PMPI_Scan +#define MPI_Scan_init PMPI_Scan_init #define MPI_Scatter PMPI_Scatter +#define MPI_Scatter_init PMPI_Scatter_init #define MPI_Scatterv PMPI_Scatterv +#define MPI_Scatterv_init PMPI_Scatterv_init #define MPI_Send PMPI_Send #define MPI_Send_init PMPI_Send_init #define MPI_Sendrecv PMPI_Sendrecv #define MPI_Sendrecv_replace PMPI_Sendrecv_replace +#define MPI_Session_get_info PMPI_Session_get_info +#define MPI_Session_get_nth_pset PMPI_Session_get_nth_pset +#define MPI_Session_get_nth_psetlen PMPI_Session_get_nth_psetlen +#define MPI_Session_get_pset_info PMPI_Session_get_pset_info +#define MPI_Session_init PMPI_Session_init +#define MPI_Session_finalize PMPI_Session_finalize #define MPI_Ssend PMPI_Ssend #define MPI_Ssend_init PMPI_Ssend_init #define MPI_Start PMPI_Start diff --git a/ompi/mpi/fortran/use-mpi-tkr/Makefile.am b/ompi/mpi/fortran/use-mpi-tkr/Makefile.am index 9b41c716117..28228faaa7d 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/Makefile.am +++ b/ompi/mpi/fortran/use-mpi-tkr/Makefile.am @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2019 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved # Copyright (c) 2007 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2014-2021 Research Organization for Information Science @@ -87,11 +87,6 @@ lib@OMPI_LIBMPI_NAME@_usempi_la_SOURCES = \ mpi.F90 \ mpi_aint_add_f90.f90 \ mpi_aint_diff_f90.f90 \ - mpi_comm_spawn_multiple_f90.f90 \ - mpi_testall_f90.f90 \ - mpi_testsome_f90.f90 \ - mpi_waitall_f90.f90 \ - mpi_waitsome_f90.f90 \ mpi_wtick_f90.f90 \ mpi_wtime_f90.f90 diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-cptr-interfaces.h b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-cptr-interfaces.h index d4a51f4bb48..3f8ef561e9c 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-cptr-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-cptr-interfaces.h @@ -1,6 +1,6 @@ ! -*- fortran -*- ! -! Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2014-2021 Cisco Systems, Inc. All rights reserved ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -17,6 +17,7 @@ ! below. ! +! This interface requires a name because there are multiple subroutines. interface MPI_Win_allocate subroutine MPI_Win_allocate(size, disp_unit, info, comm, & @@ -46,6 +47,7 @@ end subroutine MPI_Win_allocate_cptr end interface +! This interface requires a name because there are multiple subroutines. interface MPI_Win_allocate_shared subroutine MPI_Win_allocate_shared(size, disp_unit, info, comm, & @@ -75,6 +77,7 @@ end subroutine MPI_Win_allocate_shared_cptr end interface +! This interface requires a name because there are multiple subroutines. interface MPI_Win_shared_query subroutine MPI_Win_shared_query(win, rank, size, disp_unit, baseptr,& diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h index b6a5c08e71d..43f6e313508 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h @@ -10,9 +10,14 @@ ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2016-2018 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! Copyright (c) 2021 Sandia National Laboratories. All rights reserved. +! Copyright (c) 2021 IBM Corporation. All rights reserved. +! ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -20,7 +25,7 @@ ! $HEADER$ ! -interface MPI_Wtick +interface function MPI_Wtick() double precision MPI_Wtick @@ -29,7 +34,7 @@ end function MPI_Wtick end interface -interface MPI_Wtime +interface function MPI_Wtime() double precision MPI_Wtime @@ -38,7 +43,7 @@ end function MPI_Wtime end interface -interface MPI_Abort +interface subroutine MPI_Abort(comm, errorcode, ierror) integer, intent(in) :: comm @@ -49,7 +54,7 @@ end subroutine MPI_Abort end interface -interface MPI_Add_error_class +interface subroutine MPI_Add_error_class(errorclass, ierror) integer, intent(out) :: errorclass @@ -59,7 +64,7 @@ end subroutine MPI_Add_error_class end interface -interface MPI_Add_error_code +interface subroutine MPI_Add_error_code(errorclass, errorcode, ierror) integer, intent(in) :: errorclass @@ -70,7 +75,7 @@ end subroutine MPI_Add_error_code end interface -interface MPI_Add_error_string +interface subroutine MPI_Add_error_string(errorcode, string, ierror) integer, intent(in) :: errorcode @@ -80,7 +85,7 @@ end subroutine MPI_Add_error_string end interface -interface MPI_Aint_add +interface function MPI_Aint_add(base, diff) include 'mpif-config.h' @@ -91,7 +96,7 @@ end function MPI_Aint_add end interface -interface MPI_Aint_diff +interface function MPI_Aint_diff(addr1, addr2) include 'mpif-config.h' @@ -103,7 +108,7 @@ end function MPI_Aint_diff end interface -interface MPI_Barrier +interface subroutine MPI_Barrier(comm, ierror) integer, intent(in) :: comm @@ -113,7 +118,7 @@ end subroutine MPI_Barrier end interface -interface MPI_Ibarrier +interface subroutine MPI_Ibarrier(comm, request, ierror) integer, intent(in) :: comm @@ -124,7 +129,7 @@ end subroutine MPI_Ibarrier end interface -interface MPI_Cancel +interface subroutine MPI_Cancel(request, ierror) integer, intent(in) :: request @@ -134,7 +139,7 @@ end subroutine MPI_Cancel end interface -interface MPI_Cart_coords +interface subroutine MPI_Cart_coords(comm, rank, maxdims, coords, ierror) integer, intent(in) :: comm @@ -147,7 +152,7 @@ end subroutine MPI_Cart_coords end interface -interface MPI_Cart_create +interface subroutine MPI_Cart_create(old_comm, ndims, dims, periods, reorder, & comm_cart, ierror) @@ -163,7 +168,7 @@ end subroutine MPI_Cart_create end interface -interface MPI_Cart_get +interface subroutine MPI_Cart_get(comm, maxdims, dims, periods, coords& , ierror) @@ -178,7 +183,7 @@ end subroutine MPI_Cart_get end interface -interface MPI_Cart_map +interface subroutine MPI_Cart_map(comm, ndims, dims, periods, newrank& , ierror) @@ -193,7 +198,7 @@ end subroutine MPI_Cart_map end interface -interface MPI_Cart_rank +interface subroutine MPI_Cart_rank(comm, coords, rank, ierror) integer, intent(in) :: comm @@ -205,7 +210,7 @@ end subroutine MPI_Cart_rank end interface -interface MPI_Cart_shift +interface subroutine MPI_Cart_shift(comm, direction, disp, rank_source, rank_dest& , ierror) @@ -220,7 +225,7 @@ end subroutine MPI_Cart_shift end interface -interface MPI_Cart_sub +interface subroutine MPI_Cart_sub(comm, remain_dims, new_comm, ierror) integer, intent(in) :: comm @@ -232,7 +237,7 @@ end subroutine MPI_Cart_sub end interface -interface MPI_Cartdim_get +interface subroutine MPI_Cartdim_get(comm, ndims, ierror) integer, intent(in) :: comm @@ -243,7 +248,7 @@ end subroutine MPI_Cartdim_get end interface -interface MPI_Comm_call_errhandler +interface subroutine MPI_Comm_call_errhandler(comm, errorcode, ierror) integer, intent(in) :: comm @@ -254,7 +259,7 @@ end subroutine MPI_Comm_call_errhandler end interface -interface MPI_Comm_compare +interface subroutine MPI_Comm_compare(comm1, comm2, result, ierror) integer, intent(in) :: comm1 @@ -266,7 +271,7 @@ end subroutine MPI_Comm_compare end interface -interface MPI_Comm_create +interface subroutine MPI_Comm_create(comm, group, newcomm, ierror) integer, intent(in) :: comm @@ -277,8 +282,21 @@ end subroutine MPI_Comm_create end interface +interface MPI_Comm_create_from_group + +subroutine MPI_Comm_create_from_group(group, stringtag, info, errhandler, newcomm, ierror) + implicit none + integer, intent(in) :: group + character(len=*), intent(in) :: stringtag + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: newcomm + integer, intent(out) :: ierror +end subroutine MPI_Comm_create_from_group + +end interface -interface MPI_Comm_create_group +interface subroutine MPI_Comm_create_group(comm, group, tag, newcomm, ierror) integer, intent(in) :: comm @@ -291,7 +309,7 @@ end subroutine MPI_Comm_create_group end interface -interface MPI_Comm_create_errhandler +interface subroutine MPI_Comm_create_errhandler(function, errhandler, ierror) external :: function @@ -302,7 +320,7 @@ end subroutine MPI_Comm_create_errhandler end interface -interface MPI_Comm_create_keyval +interface subroutine MPI_Comm_create_keyval(comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierror) include 'mpif-config.h' @@ -316,7 +334,7 @@ end subroutine MPI_Comm_create_keyval end interface -interface MPI_Comm_delete_attr +interface subroutine MPI_Comm_delete_attr(comm, comm_keyval, ierror) integer, intent(in) :: comm @@ -327,7 +345,7 @@ end subroutine MPI_Comm_delete_attr end interface -interface MPI_Comm_dup +interface subroutine MPI_Comm_dup(comm, newcomm, ierror) integer, intent(in) :: comm @@ -338,7 +356,7 @@ end subroutine MPI_Comm_dup end interface -interface MPI_Comm_dup_with_info +interface subroutine MPI_Comm_dup_with_info(comm, info, newcomm, ierror) integer, intent(in) :: comm @@ -350,7 +368,7 @@ end subroutine MPI_Comm_dup_with_info end interface -interface MPI_Comm_idup +interface subroutine MPI_Comm_idup(comm, newcomm, request, ierror) integer, intent(in) :: comm @@ -361,8 +379,19 @@ end subroutine MPI_Comm_idup end interface +interface + +subroutine MPI_Comm_idup_with_info(comm, info, newcomm, request, ierror) + integer, intent(in) :: comm + integer, intent(in) :: info + integer, intent(out) :: newcomm + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Comm_idup_with_info + +end interface -interface MPI_Comm_free +interface subroutine MPI_Comm_free(comm, ierror) integer, intent(inout) :: comm @@ -372,7 +401,7 @@ end subroutine MPI_Comm_free end interface -interface MPI_Comm_free_keyval +interface subroutine MPI_Comm_free_keyval(comm_keyval, ierror) integer, intent(inout) :: comm_keyval @@ -382,7 +411,7 @@ end subroutine MPI_Comm_free_keyval end interface -interface MPI_Comm_get_info +interface subroutine MPI_Comm_get_info(comm, info_used, ierror) include 'mpif-config.h' @@ -394,7 +423,7 @@ end subroutine MPI_Comm_get_info end interface -interface MPI_Comm_get_attr +interface subroutine MPI_Comm_get_attr(comm, comm_keyval, attribute_val, flag, ierror) include 'mpif-config.h' @@ -408,7 +437,7 @@ end subroutine MPI_Comm_get_attr end interface -interface MPI_Comm_get_errhandler +interface subroutine MPI_Comm_get_errhandler(comm, erhandler, ierror) integer, intent(in) :: comm @@ -419,7 +448,7 @@ end subroutine MPI_Comm_get_errhandler end interface -interface MPI_Comm_get_name +interface subroutine MPI_Comm_get_name(comm, comm_name, resultlen, ierror) integer, intent(in) :: comm @@ -431,7 +460,7 @@ end subroutine MPI_Comm_get_name end interface -interface MPI_Comm_group +interface subroutine MPI_Comm_group(comm, group, ierror) integer, intent(in) :: comm @@ -442,7 +471,7 @@ end subroutine MPI_Comm_group end interface -interface MPI_Comm_rank +interface subroutine MPI_Comm_rank(comm, rank, ierror) integer, intent(in) :: comm @@ -453,7 +482,7 @@ end subroutine MPI_Comm_rank end interface -interface MPI_Comm_remote_group +interface subroutine MPI_Comm_remote_group(comm, group, ierror) integer, intent(in) :: comm @@ -464,7 +493,7 @@ end subroutine MPI_Comm_remote_group end interface -interface MPI_Comm_remote_size +interface subroutine MPI_Comm_remote_size(comm, size, ierror) integer, intent(in) :: comm @@ -475,7 +504,7 @@ end subroutine MPI_Comm_remote_size end interface -interface MPI_Comm_set_info +interface subroutine MPI_Comm_set_info(comm, info, ierror) include 'mpif-config.h' @@ -487,7 +516,7 @@ end subroutine MPI_Comm_set_info end interface -interface MPI_Comm_set_attr +interface subroutine MPI_Comm_set_attr(comm, comm_keyval, attribute_val, ierror) include 'mpif-config.h' @@ -500,7 +529,7 @@ end subroutine MPI_Comm_set_attr end interface -interface MPI_Comm_set_errhandler +interface subroutine MPI_Comm_set_errhandler(comm, errhandler, ierror) integer, intent(in) :: comm @@ -511,7 +540,7 @@ end subroutine MPI_Comm_set_errhandler end interface -interface MPI_Comm_set_name +interface subroutine MPI_Comm_set_name(comm, comm_name, ierror) integer, intent(in) :: comm @@ -522,7 +551,7 @@ end subroutine MPI_Comm_set_name end interface -interface MPI_Comm_size +interface subroutine MPI_Comm_size(comm, size, ierror) integer, intent(in) :: comm @@ -533,7 +562,7 @@ end subroutine MPI_Comm_size end interface -interface MPI_Comm_split +interface subroutine MPI_Comm_split(comm, color, key, newcomm, ierror) integer, intent(in) :: comm @@ -546,7 +575,7 @@ end subroutine MPI_Comm_split end interface -interface MPI_Comm_test_inter +interface subroutine MPI_Comm_test_inter(comm, flag, ierror) integer, intent(in) :: comm @@ -557,7 +586,7 @@ end subroutine MPI_Comm_test_inter end interface -interface MPI_Dims_create +interface subroutine MPI_Dims_create(nnodes, ndims, dims, ierror) integer, intent(in) :: nnodes @@ -569,7 +598,7 @@ end subroutine MPI_Dims_create end interface -interface MPI_Errhandler_free +interface subroutine MPI_Errhandler_free(errhandler, ierror) integer, intent(inout) :: errhandler @@ -579,7 +608,7 @@ end subroutine MPI_Errhandler_free end interface -interface MPI_Error_class +interface subroutine MPI_Error_class(errorcode, errorclass, ierror) integer, intent(in) :: errorcode @@ -590,7 +619,7 @@ end subroutine MPI_Error_class end interface -interface MPI_Error_string +interface subroutine MPI_Error_string(errorcode, string, resultlen, ierror) integer, intent(in) :: errorcode @@ -602,7 +631,7 @@ end subroutine MPI_Error_string end interface -interface MPI_Finalize +interface subroutine MPI_Finalize(ierror) integer, intent(out) :: ierror @@ -611,7 +640,7 @@ end subroutine MPI_Finalize end interface -interface MPI_Finalized +interface subroutine MPI_Finalized(flag, ierror) logical, intent(out) :: flag @@ -621,7 +650,7 @@ end subroutine MPI_Finalized end interface -interface MPI_Get_count +interface subroutine MPI_Get_count(status, datatype, count, ierror) include 'mpif-config.h' @@ -634,7 +663,7 @@ end subroutine MPI_Get_count end interface -interface MPI_Get_elements +interface subroutine MPI_Get_elements(status, datatype, count, ierror) include 'mpif-config.h' @@ -647,7 +676,7 @@ end subroutine MPI_Get_elements end interface -interface MPI_Get_elements_x +interface subroutine MPI_Get_elements_x(status, datatype, count, ierror) include 'mpif-config.h' @@ -660,7 +689,7 @@ end subroutine MPI_Get_elements_x end interface -interface MPI_Get_processor_name +interface subroutine MPI_Get_processor_name(name, resultlen, ierror) character(len=*), intent(out) :: name @@ -671,7 +700,7 @@ end subroutine MPI_Get_processor_name end interface -interface MPI_Get_version +interface subroutine MPI_Get_version(version, subversion, ierror) integer, intent(out) :: version @@ -682,7 +711,7 @@ end subroutine MPI_Get_version end interface -interface MPI_Graph_create +interface subroutine MPI_Graph_create(comm_old, nnodes, index, edges, reorder, & comm_graph, ierror) @@ -698,7 +727,7 @@ end subroutine MPI_Graph_create end interface -interface MPI_Graph_get +interface subroutine MPI_Graph_get(comm, maxindex, maxedges, index, edges& , ierror) @@ -713,7 +742,7 @@ end subroutine MPI_Graph_get end interface -interface MPI_Graph_map +interface subroutine MPI_Graph_map(comm, nnodes, index, edges, newrank& , ierror) @@ -728,7 +757,7 @@ end subroutine MPI_Graph_map end interface -interface MPI_Graph_neighbors +interface subroutine MPI_Graph_neighbors(comm, rank, maxneighbors, neighbors, ierror) integer, intent(in) :: comm @@ -741,7 +770,7 @@ end subroutine MPI_Graph_neighbors end interface -interface MPI_Graph_neighbors_count +interface subroutine MPI_Graph_neighbors_count(comm, rank, nneighbors, ierror) integer, intent(in) :: comm @@ -753,7 +782,7 @@ end subroutine MPI_Graph_neighbors_count end interface -interface MPI_Graphdims_get +interface subroutine MPI_Graphdims_get(comm, nnodes, nedges, ierror) integer, intent(in) :: comm @@ -765,7 +794,7 @@ end subroutine MPI_Graphdims_get end interface -interface MPI_Grequest_complete +interface subroutine MPI_Grequest_complete(request, ierror) integer, intent(in) :: request @@ -775,7 +804,7 @@ end subroutine MPI_Grequest_complete end interface -interface MPI_Grequest_start +interface subroutine MPI_Grequest_start(query_fn, free_fn, cancel_fn, extra_state, request& , ierror) @@ -791,7 +820,7 @@ end subroutine MPI_Grequest_start end interface -interface MPI_Group_compare +interface subroutine MPI_Group_compare(group1, group2, result, ierror) integer, intent(in) :: group1 @@ -803,7 +832,7 @@ end subroutine MPI_Group_compare end interface -interface MPI_Group_difference +interface subroutine MPI_Group_difference(group1, group2, newgroup, ierror) integer, intent(in) :: group1 @@ -815,7 +844,7 @@ end subroutine MPI_Group_difference end interface -interface MPI_Group_excl +interface subroutine MPI_Group_excl(group, n, ranks, newgroup, ierror) integer, intent(in) :: group @@ -828,7 +857,7 @@ end subroutine MPI_Group_excl end interface -interface MPI_Group_free +interface subroutine MPI_Group_free(group, ierror) integer, intent(inout) :: group @@ -837,8 +866,18 @@ end subroutine MPI_Group_free end interface +interface MPI_Group_from_session_pset +subroutine MPI_Group_from_session_pset(session, pset_name, newgroup, ierror) + implicit none + integer, intent(in) :: session + character(len=*), intent(in) :: pset_name + integer, intent(out) :: newgroup + integer, intent(out) :: ierror +end subroutine MPI_Group_from_session_pset +end interface + -interface MPI_Group_incl +interface subroutine MPI_Group_incl(group, n, ranks, newgroup, ierror) integer, intent(in) :: group @@ -851,7 +890,7 @@ end subroutine MPI_Group_incl end interface -interface MPI_Group_intersection +interface subroutine MPI_Group_intersection(group1, group2, newgroup, ierror) integer, intent(in) :: group1 @@ -863,7 +902,7 @@ end subroutine MPI_Group_intersection end interface -interface MPI_Group_range_excl +interface subroutine MPI_Group_range_excl(group, n, ranges, newgroup, ierror) integer, intent(in) :: group @@ -876,7 +915,7 @@ end subroutine MPI_Group_range_excl end interface -interface MPI_Group_range_incl +interface subroutine MPI_Group_range_incl(group, n, ranges, newgroup, ierror) integer, intent(in) :: group @@ -889,7 +928,7 @@ end subroutine MPI_Group_range_incl end interface -interface MPI_Group_rank +interface subroutine MPI_Group_rank(group, rank, ierror) integer, intent(in) :: group @@ -900,7 +939,7 @@ end subroutine MPI_Group_rank end interface -interface MPI_Group_size +interface subroutine MPI_Group_size(group, size, ierror) integer, intent(in) :: group @@ -911,7 +950,7 @@ end subroutine MPI_Group_size end interface -interface MPI_Group_translate_ranks +interface subroutine MPI_Group_translate_ranks(group1, n, ranks1, group2, ranks2& , ierror) @@ -926,7 +965,7 @@ end subroutine MPI_Group_translate_ranks end interface -interface MPI_Group_union +interface subroutine MPI_Group_union(group1, group2, newgroup, ierror) integer, intent(in) :: group1 @@ -938,7 +977,7 @@ end subroutine MPI_Group_union end interface -interface MPI_Info_create +interface subroutine MPI_Info_create(info, ierror) integer, intent(out) :: info @@ -948,7 +987,7 @@ end subroutine MPI_Info_create end interface -interface MPI_Info_delete +interface subroutine MPI_Info_delete(info, key, ierror) integer, intent(in) :: info @@ -959,7 +998,7 @@ end subroutine MPI_Info_delete end interface -interface MPI_Info_dup +interface subroutine MPI_Info_dup(info, newinfo, ierror) integer, intent(in) :: info @@ -970,7 +1009,7 @@ end subroutine MPI_Info_dup end interface -interface MPI_Info_free +interface subroutine MPI_Info_free(info, ierror) integer, intent(inout) :: info @@ -980,7 +1019,7 @@ end subroutine MPI_Info_free end interface -interface MPI_Info_get +interface subroutine MPI_Info_get(info, key, valuelen, value, flag& , ierror) @@ -995,7 +1034,7 @@ end subroutine MPI_Info_get end interface -interface MPI_Info_get_nkeys +interface subroutine MPI_Info_get_nkeys(info, nkeys, ierror) integer, intent(in) :: info @@ -1006,7 +1045,7 @@ end subroutine MPI_Info_get_nkeys end interface -interface MPI_Info_get_nthkey +interface subroutine MPI_Info_get_nthkey(info, n, key, ierror) integer, intent(in) :: info @@ -1018,7 +1057,20 @@ end subroutine MPI_Info_get_nthkey end interface -interface MPI_Info_get_valuelen +interface + +subroutine MPI_Info_get_string(info, key, buflen, value, flag, ierror) + integer, intent(in) :: info + character(len=*), intent(in) :: key + integer, intent(inout) :: buflen + character(len=*), intent(out) :: value + logical, intent(out) :: flag + integer, intent(out) :: ierror +end subroutine MPI_Info_get_string + +end interface + +interface subroutine MPI_Info_get_valuelen(info, key, valuelen, flag, ierror) integer, intent(in) :: info @@ -1031,7 +1083,7 @@ end subroutine MPI_Info_get_valuelen end interface -interface MPI_Info_set +interface subroutine MPI_Info_set(info, key, value, ierror) integer, intent(in) :: info @@ -1043,7 +1095,7 @@ end subroutine MPI_Info_set end interface -interface MPI_Init +interface subroutine MPI_Init(ierror) integer, intent(out) :: ierror @@ -1052,7 +1104,7 @@ end subroutine MPI_Init end interface -interface MPI_Init_thread +interface subroutine MPI_Init_thread(required, provided, ierror) integer, intent(in) :: required @@ -1063,7 +1115,7 @@ end subroutine MPI_Init_thread end interface -interface MPI_Initialized +interface subroutine MPI_Initialized(flag, ierror) logical, intent(out) :: flag @@ -1073,7 +1125,7 @@ end subroutine MPI_Initialized end interface -interface MPI_Intercomm_create +interface subroutine MPI_Intercomm_create(local_comm, local_leader, bridge_comm, remote_leader, tag, & newintercomm, ierror) @@ -1088,8 +1140,24 @@ end subroutine MPI_Intercomm_create end interface +interface MPI_Intercomm_create_from_groups + +subroutine MPI_Intercomm_create_from_groups(local_group, local_leader, remote_group, remote_leader, & + stringtag, info, errhandler, newintercomm, ierror) + implicit none + integer, intent(in) :: local_group, remote_group + integer, intent(in):: local_leader, remote_leader + character(len=*), intent(in) :: stringtag + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: newintercomm + integer, intent(out) :: ierror +end subroutine MPI_Intercomm_create_from_groups + +end interface + -interface MPI_Intercomm_merge +interface subroutine MPI_Intercomm_merge(intercomm, high, newintercomm, ierror) integer, intent(in) :: intercomm @@ -1101,7 +1169,7 @@ end subroutine MPI_Intercomm_merge end interface -interface MPI_Iprobe +interface subroutine MPI_Iprobe(source, tag, comm, flag, status& , ierror) @@ -1117,7 +1185,7 @@ end subroutine MPI_Iprobe end interface -interface MPI_Is_thread_main +interface subroutine MPI_Is_thread_main(flag, ierror) logical, intent(out) :: flag @@ -1127,7 +1195,7 @@ end subroutine MPI_Is_thread_main end interface -interface MPI_Op_commutative +interface subroutine MPI_Op_commutative(op, commute, ierror) integer, intent(in) :: op @@ -1138,7 +1206,7 @@ end subroutine MPI_Op_commutative end interface -interface MPI_Op_create +interface subroutine MPI_Op_create(function, commute, op, ierror) external :: function @@ -1150,7 +1218,7 @@ end subroutine MPI_Op_create end interface -interface MPI_Op_free +interface subroutine MPI_Op_free(op, ierror) integer, intent(inout) :: op @@ -1160,7 +1228,7 @@ end subroutine MPI_Op_free end interface -interface MPI_Pack_external_size +interface subroutine MPI_Pack_external_size(datarep, incount, datatype, size, ierror) include 'mpif-config.h' @@ -1174,7 +1242,7 @@ end subroutine MPI_Pack_external_size end interface -interface MPI_Pack_size +interface subroutine MPI_Pack_size(incount, datatype, comm, size, ierror) integer, intent(in) :: incount @@ -1187,7 +1255,7 @@ end subroutine MPI_Pack_size end interface -interface MPI_Pcontrol +interface subroutine MPI_Pcontrol(level) integer, intent(in) :: level @@ -1197,7 +1265,7 @@ end subroutine MPI_Pcontrol end interface -interface MPI_Probe +interface subroutine MPI_Probe(source, tag, comm, status, ierror) include 'mpif-config.h' @@ -1211,7 +1279,54 @@ end subroutine MPI_Probe end interface -interface MPI_Query_thread +interface + +subroutine MPI_Parrived(request, partition, flag, ierror) + integer, intent(in) :: request + integer, intent(in) :: partition + logical, intent(out) :: flag + integer, intent(out) :: ierror +end subroutine MPI_Parrived + +end interface + + +interface + +subroutine MPI_Pready(partition, request, ierror) + integer, intent(in) :: partition + integer, intent(in) :: request + integer, intent(out) :: ierror +end subroutine MPI_Pready + +end interface + + +interface + +subroutine MPI_Pready_list(length, array_of_partitions, request, ierror) + integer, intent(in) :: length + integer, dimension(*), intent(in) :: array_of_partitions + integer, intent(in) :: request + integer, intent(out) :: ierror +end subroutine MPI_Pready_list + +end interface + + +interface + +subroutine MPI_Pready_range(partition_low, partition_high, request, ierror) + integer, intent(in) :: partition_low + integer, intent(in) :: partition_high + integer, intent(in) :: request + integer, intent(out) :: ierror +end subroutine MPI_Pready_range + +end interface + + +interface subroutine MPI_Query_thread(provided, ierror) integer, intent(out) :: provided @@ -1221,7 +1336,7 @@ end subroutine MPI_Query_thread end interface -interface MPI_Register_datarep +interface subroutine MPI_Register_datarep(datarep, read_conversion_fn, write_conversion_fn, dtype_file_extent_fn, extra_state& , ierror) @@ -1237,7 +1352,7 @@ end subroutine MPI_Register_datarep end interface -interface MPI_Request_free +interface subroutine MPI_Request_free(request, ierror) integer, intent(inout) :: request @@ -1247,7 +1362,7 @@ end subroutine MPI_Request_free end interface -interface MPI_Request_get_status +interface subroutine MPI_Request_get_status(request, flag, status, ierror) include 'mpif-config.h' @@ -1259,8 +1374,72 @@ end subroutine MPI_Request_get_status end interface +interface MPI_Session_get_info +subroutine MPI_Session_get_info(session, info, ierror) + implicit none + integer, intent(in) :: session + integer, intent(out) :: info + integer, intent(out) :: ierror +end subroutine MPI_Session_get_info +end interface + +interface +subroutine MPI_Session_get_nth_pset(session, info, n, pset_len, pset_name, ierror) + implicit none + integer, intent(in) :: session + integer, intent(in) :: info + integer, intent(in) :: n + integer, intent(inout) :: pset_len + character(len=*), intent(out) :: pset_name + integer, intent(out) :: ierror +end subroutine MPI_Session_get_nth_pset +end interface -interface MPI_Start + +interface +subroutine MPI_Session_get_num_psets(session, info, npset_names, ierror) + implicit none + integer, intent(in) :: session + integer, intent(in) :: info + integer, intent(out) :: npset_names + integer, intent(out) :: ierror +end subroutine MPI_Session_get_num_psets +end interface + +interface +subroutine MPI_Session_get_pset_info(session, pset_name, info, ierror) + implicit none + integer, intent(in) :: session + character(len=*), intent(in) :: pset_name + integer, intent(out) :: info + integer, intent(out) :: ierror +end subroutine MPI_Session_get_pset_info +end interface + + +interface MPI_Session_init + +subroutine MPI_Session_init(info,errhandler,session,ierror) + implicit none + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: session + integer, intent(out) :: ierror +end subroutine MPI_Session_init + +end interface MPI_Session_init + +interface MPI_Session_finalize + +subroutine MPI_Session_finalize(session,ierror) + implicit none + integer, intent(inout) :: session + integer, intent(out) :: ierror +end subroutine MPI_Session_finalize + +end interface MPI_Session_finalize + +interface subroutine MPI_Start(request, ierror) integer, intent(inout) :: request @@ -1270,7 +1449,7 @@ end subroutine MPI_Start end interface -interface MPI_Startall +interface subroutine MPI_Startall(count, array_of_requests, ierror) integer, intent(in) :: count @@ -1281,7 +1460,7 @@ end subroutine MPI_Startall end interface -interface MPI_Status_set_cancelled +interface subroutine MPI_Status_set_cancelled(status, flag, ierror) include 'mpif-config.h' @@ -1293,7 +1472,7 @@ end subroutine MPI_Status_set_cancelled end interface -interface MPI_Status_set_elements +interface subroutine MPI_Status_set_elements(status, datatype, count, ierror) include 'mpif-config.h' @@ -1306,7 +1485,7 @@ end subroutine MPI_Status_set_elements end interface -interface MPI_Test +interface subroutine MPI_Test(request, flag, status, ierror) include 'mpif-config.h' @@ -1319,7 +1498,7 @@ end subroutine MPI_Test end interface -interface MPI_Test_cancelled +interface subroutine MPI_Test_cancelled(status, flag, ierror) include 'mpif-config.h' @@ -1331,27 +1510,27 @@ end subroutine MPI_Test_cancelled end interface -interface MPI_Testall +interface subroutine MPI_Testall(count, array_of_requests, flag, array_of_statuses, ierror) include 'mpif-config.h' integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests logical, intent(out) :: flag - integer, dimension(MPI_STATUS_SIZE, count), intent(out) :: array_of_statuses + integer, dimension(MPI_STATUS_SIZE, *), intent(out) :: array_of_statuses integer, intent(out) :: ierror end subroutine MPI_Testall end interface -interface MPI_Testany +interface subroutine MPI_Testany(count, array_of_requests, index, flag, status& , ierror) include 'mpif-config.h' integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, intent(out) :: index logical, intent(out) :: flag integer, dimension(MPI_STATUS_SIZE), intent(out) :: status @@ -1361,13 +1540,13 @@ end subroutine MPI_Testany end interface -interface MPI_Testsome +interface subroutine MPI_Testsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses& , ierror) include 'mpif-config.h' integer, intent(in) :: incount - integer, dimension(incount), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, intent(out) :: outcount integer, dimension(*), intent(out) :: array_of_indices integer, dimension(MPI_STATUS_SIZE, *), intent(out) :: array_of_statuses @@ -1377,7 +1556,7 @@ end subroutine MPI_Testsome end interface -interface MPI_Topo_test +interface subroutine MPI_Topo_test(comm, status, ierror) integer, intent(in) :: comm @@ -1388,7 +1567,7 @@ end subroutine MPI_Topo_test end interface -interface MPI_Type_commit +interface subroutine MPI_Type_commit(datatype, ierror) integer, intent(inout) :: datatype @@ -1398,7 +1577,7 @@ end subroutine MPI_Type_commit end interface -interface MPI_Type_contiguous +interface subroutine MPI_Type_contiguous(count, oldtype, newtype, ierror) integer, intent(in) :: count @@ -1410,7 +1589,7 @@ end subroutine MPI_Type_contiguous end interface -interface MPI_Type_create_darray +interface subroutine MPI_Type_create_darray(size, rank, ndims, gsize_array, distrib_array, & darg_array, psize_array, order, oldtype, newtype, ierror) @@ -1430,7 +1609,7 @@ end subroutine MPI_Type_create_darray end interface -interface MPI_Type_create_f90_complex +interface subroutine MPI_Type_create_f90_complex(p, r, newtype, ierror) integer, intent(in) :: p @@ -1442,7 +1621,7 @@ end subroutine MPI_Type_create_f90_complex end interface -interface MPI_Type_create_f90_integer +interface subroutine MPI_Type_create_f90_integer(r, newtype, ierror) integer, intent(in) :: r @@ -1453,7 +1632,7 @@ end subroutine MPI_Type_create_f90_integer end interface -interface MPI_Type_create_f90_real +interface subroutine MPI_Type_create_f90_real(p, r, newtype, ierror) integer, intent(in) :: p @@ -1465,7 +1644,7 @@ end subroutine MPI_Type_create_f90_real end interface -interface MPI_Type_create_hindexed +interface subroutine MPI_Type_create_hindexed(count, array_of_blocklengths, array_of_displacements, oldtype, newtype& , ierror) @@ -1481,7 +1660,7 @@ end subroutine MPI_Type_create_hindexed end interface -interface MPI_Type_create_hvector +interface subroutine MPI_Type_create_hvector(count, blocklength, stride, oldtype, newtype& , ierror) @@ -1497,7 +1676,7 @@ end subroutine MPI_Type_create_hvector end interface -interface MPI_Type_create_indexed_block +interface subroutine MPI_Type_create_indexed_block(count, blocklength, array_of_displacements, oldtype, newtype& , ierror) @@ -1512,7 +1691,7 @@ end subroutine MPI_Type_create_indexed_block end interface -interface MPI_Type_create_keyval +interface subroutine MPI_Type_create_keyval(type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierror) include 'mpif-config.h' @@ -1526,7 +1705,7 @@ end subroutine MPI_Type_create_keyval end interface -interface MPI_Type_create_resized +interface subroutine MPI_Type_create_resized(oldtype, lb, extent, newtype, ierror) include 'mpif-config.h' @@ -1540,7 +1719,7 @@ end subroutine MPI_Type_create_resized end interface -interface MPI_Type_create_struct +interface subroutine MPI_Type_create_struct(count, array_of_block_lengths, array_of_displacements, array_of_types, newtype& , ierror) @@ -1556,7 +1735,7 @@ end subroutine MPI_Type_create_struct end interface -interface MPI_Type_create_subarray +interface subroutine MPI_Type_create_subarray(ndims, size_array, subsize_array, start_array, order, & oldtype, newtype, ierror) @@ -1573,7 +1752,7 @@ end subroutine MPI_Type_create_subarray end interface -interface MPI_Type_delete_attr +interface subroutine MPI_Type_delete_attr(datatype, type_keyval, ierror) integer, intent(in) :: datatype @@ -1584,7 +1763,7 @@ end subroutine MPI_Type_delete_attr end interface -interface MPI_Type_dup +interface subroutine MPI_Type_dup(oldtype, newtype, ierror) integer, intent(in) :: oldtype @@ -1595,7 +1774,7 @@ end subroutine MPI_Type_dup end interface -interface MPI_Type_free +interface subroutine MPI_Type_free(datatype, ierror) integer, intent(inout) :: datatype @@ -1605,7 +1784,7 @@ end subroutine MPI_Type_free end interface -interface MPI_Type_free_keyval +interface subroutine MPI_Type_free_keyval(type_keyval, ierror) integer, intent(inout) :: type_keyval @@ -1615,7 +1794,7 @@ end subroutine MPI_Type_free_keyval end interface -interface MPI_Type_get_attr +interface subroutine MPI_Type_get_attr(datatype, type_keyval, attribute_val, flag, ierror) include 'mpif-config.h' @@ -1629,7 +1808,7 @@ end subroutine MPI_Type_get_attr end interface -interface MPI_Type_get_contents +interface subroutine MPI_Type_get_contents(datatype, max_integers, max_addresses, max_datatypes, array_of_integers, & array_of_addresses, array_of_datatypes, ierror) @@ -1647,7 +1826,7 @@ end subroutine MPI_Type_get_contents end interface -interface MPI_Type_get_envelope +interface subroutine MPI_Type_get_envelope(datatype, num_integers, num_addresses, num_datatypes, combiner& , ierror) @@ -1662,7 +1841,7 @@ end subroutine MPI_Type_get_envelope end interface -interface MPI_Type_get_extent +interface subroutine MPI_Type_get_extent(datatype, lb, extent, ierror) include 'mpif-config.h' @@ -1675,7 +1854,7 @@ end subroutine MPI_Type_get_extent end interface -interface MPI_Type_get_extent_x +interface subroutine MPI_Type_get_extent_x(datatype, lb, extent, ierror) include 'mpif-config.h' @@ -1688,7 +1867,7 @@ end subroutine MPI_Type_get_extent_x end interface -interface MPI_Type_get_name +interface subroutine MPI_Type_get_name(datatype, type_name, resultlen, ierror) integer, intent(in) :: datatype @@ -1700,7 +1879,7 @@ end subroutine MPI_Type_get_name end interface -interface MPI_Type_get_true_extent +interface subroutine MPI_Type_get_true_extent(datatype, true_lb, true_extent, ierror) include 'mpif-config.h' @@ -1713,7 +1892,7 @@ end subroutine MPI_Type_get_true_extent end interface -interface MPI_Type_get_true_extent_x +interface subroutine MPI_Type_get_true_extent_x(datatype, true_lb, true_extent, ierror) include 'mpif-config.h' @@ -1726,7 +1905,7 @@ end subroutine MPI_Type_get_true_extent_x end interface -interface MPI_Type_indexed +interface subroutine MPI_Type_indexed(count, array_of_blocklengths, array_of_displacements, oldtype, newtype& , ierror) @@ -1739,7 +1918,7 @@ subroutine MPI_Type_indexed(count, array_of_blocklengths, array_of_displacements end subroutine MPI_Type_indexed end interface -interface MPI_Type_match_size +interface subroutine MPI_Type_match_size(typeclass, size, datatype, ierror) integer, intent(in) :: typeclass @@ -1751,7 +1930,7 @@ end subroutine MPI_Type_match_size end interface -interface MPI_Type_set_attr +interface subroutine MPI_Type_set_attr(datatype, type_keyval, attr_val, ierror) include 'mpif-config.h' @@ -1764,7 +1943,7 @@ end subroutine MPI_Type_set_attr end interface -interface MPI_Type_set_name +interface subroutine MPI_Type_set_name(datatype, type_name, ierror) integer, intent(in) :: datatype @@ -1775,7 +1954,7 @@ end subroutine MPI_Type_set_name end interface -interface MPI_Type_size +interface subroutine MPI_Type_size(datatype, size, ierror) integer, intent(in) :: datatype @@ -1786,7 +1965,7 @@ end subroutine MPI_Type_size end interface -interface MPI_Type_size_x +interface subroutine MPI_Type_size_x(datatype, size, ierror) include 'mpif-config.h' @@ -1798,7 +1977,7 @@ end subroutine MPI_Type_size_x end interface -interface MPI_Type_vector +interface subroutine MPI_Type_vector(count, blocklength, stride, oldtype, newtype& , ierror) @@ -1813,7 +1992,7 @@ end subroutine MPI_Type_vector end interface -interface MPI_Wait +interface subroutine MPI_Wait(request, status, ierror) include 'mpif-config.h' @@ -1825,12 +2004,12 @@ end subroutine MPI_Wait end interface -interface MPI_Waitall +interface subroutine MPI_Waitall(count, array_of_requests, array_of_statuses, ierror) include 'mpif-config.h' integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, dimension(MPI_STATUS_SIZE, *), intent(out) :: array_of_statuses integer, intent(out) :: ierror end subroutine MPI_Waitall @@ -1838,12 +2017,12 @@ end subroutine MPI_Waitall end interface -interface MPI_Waitany +interface subroutine MPI_Waitany(count, array_of_requests, index, status, ierror) include 'mpif-config.h' integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, intent(out) :: index integer, dimension(MPI_STATUS_SIZE), intent(out) :: status integer, intent(out) :: ierror @@ -1852,13 +2031,13 @@ end subroutine MPI_Waitany end interface -interface MPI_Waitsome +interface subroutine MPI_Waitsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses& , ierror) include 'mpif-config.h' integer, intent(in) :: incount - integer, dimension(incount), intent(inout) :: array_of_requests + integer, dimension(*), intent(inout) :: array_of_requests integer, intent(out) :: outcount integer, dimension(*), intent(out) :: array_of_indices integer, dimension(MPI_STATUS_SIZE, *), intent(out) :: array_of_statuses @@ -1868,7 +2047,7 @@ end subroutine MPI_Waitsome end interface -interface MPI_Win_call_errhandler +interface subroutine MPI_Win_call_errhandler(win, errorcode, ierror) integer, intent(in) :: win @@ -1879,7 +2058,7 @@ end subroutine MPI_Win_call_errhandler end interface -interface MPI_Win_complete +interface subroutine MPI_Win_complete(win, ierror) integer, intent(in) :: win @@ -1889,7 +2068,7 @@ end subroutine MPI_Win_complete end interface -interface MPI_Win_create_errhandler +interface subroutine MPI_Win_create_errhandler(function, errhandler, ierror) external :: function @@ -1900,7 +2079,7 @@ end subroutine MPI_Win_create_errhandler end interface -interface MPI_Win_create_keyval +interface subroutine MPI_Win_create_keyval(win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierror) include 'mpif-config.h' @@ -1914,7 +2093,7 @@ end subroutine MPI_Win_create_keyval end interface -interface MPI_Win_delete_attr +interface subroutine MPI_Win_delete_attr(win, win_keyval, ierror) integer, intent(in) :: win @@ -1925,7 +2104,7 @@ end subroutine MPI_Win_delete_attr end interface -interface MPI_Win_fence +interface subroutine MPI_Win_fence(assert, win, ierror) integer, intent(in) :: assert @@ -1936,7 +2115,7 @@ end subroutine MPI_Win_fence end interface -interface MPI_Win_free +interface subroutine MPI_Win_free(win, ierror) integer, intent(inout) :: win @@ -1946,7 +2125,7 @@ end subroutine MPI_Win_free end interface -interface MPI_Win_free_keyval +interface subroutine MPI_Win_free_keyval(win_keyval, ierror) integer, intent(inout) :: win_keyval @@ -1956,7 +2135,7 @@ end subroutine MPI_Win_free_keyval end interface -interface MPI_Win_get_attr +interface subroutine MPI_Win_get_attr(win, win_keyval, attribute_val, flag, ierror) include 'mpif-config.h' @@ -1970,7 +2149,7 @@ end subroutine MPI_Win_get_attr end interface -interface MPI_Win_get_errhandler +interface subroutine MPI_Win_get_errhandler(win, errhandler, ierror) integer, intent(in) :: win @@ -1981,7 +2160,7 @@ end subroutine MPI_Win_get_errhandler end interface -interface MPI_Win_get_group +interface subroutine MPI_Win_get_group(win, group, ierror) integer, intent(in) :: win @@ -1992,7 +2171,7 @@ end subroutine MPI_Win_get_group end interface -interface MPI_Win_get_name +interface subroutine MPI_Win_get_name(win, win_name, resultlen, ierror) integer, intent(in) :: win @@ -2004,7 +2183,7 @@ end subroutine MPI_Win_get_name end interface -interface MPI_Win_lock +interface subroutine MPI_Win_lock(lock_type, rank, assert, win, ierror) integer, intent(in) :: lock_type @@ -2017,7 +2196,7 @@ end subroutine MPI_Win_lock end interface -interface MPI_Win_post +interface subroutine MPI_Win_post(group, assert, win, ierror) integer, intent(in) :: group @@ -2029,7 +2208,7 @@ end subroutine MPI_Win_post end interface -interface MPI_Win_set_attr +interface subroutine MPI_Win_set_attr(win, win_keyval, attribute_val, ierror) include 'mpif-config.h' @@ -2042,7 +2221,7 @@ end subroutine MPI_Win_set_attr end interface -interface MPI_Win_set_errhandler +interface subroutine MPI_Win_set_errhandler(win, errhandler, ierror) integer, intent(in) :: win @@ -2053,7 +2232,7 @@ end subroutine MPI_Win_set_errhandler end interface -interface MPI_Win_set_name +interface subroutine MPI_Win_set_name(win, win_name, ierror) integer, intent(in) :: win @@ -2064,7 +2243,7 @@ end subroutine MPI_Win_set_name end interface -interface MPI_Win_start +interface subroutine MPI_Win_start(group, assert, win, ierror) integer, intent(in) :: group @@ -2076,7 +2255,7 @@ end subroutine MPI_Win_start end interface -interface MPI_Win_test +interface subroutine MPI_Win_test(win, flag, ierror) integer, intent(in) :: win @@ -2087,7 +2266,7 @@ end subroutine MPI_Win_test end interface -interface MPI_Win_unlock +interface subroutine MPI_Win_unlock(rank, win, ierror) integer, intent(in) :: rank @@ -2098,7 +2277,7 @@ end subroutine MPI_Win_unlock end interface -interface MPI_Win_wait +interface subroutine MPI_Win_wait(win, ierror) integer, intent(in) :: win @@ -2108,7 +2287,7 @@ end subroutine MPI_Win_wait end interface -interface MPI_Close_port +interface subroutine MPI_Close_port(port_name, ierror) character(len=*), intent(in) :: port_name @@ -2118,7 +2297,7 @@ end subroutine MPI_Close_port end interface -interface MPI_Lookup_name +interface subroutine MPI_Lookup_name(service_name, info, port_name, ierror) character(len=*), intent(in) :: service_name @@ -2130,7 +2309,7 @@ end subroutine MPI_Lookup_name end interface -interface MPI_Open_port +interface subroutine MPI_Open_port(info, port_name, ierror) integer, intent(in) :: info @@ -2141,7 +2320,7 @@ end subroutine MPI_Open_port end interface -interface MPI_Publish_name +interface subroutine MPI_Publish_name(service_name, info, port_name, ierror) character(len=*), intent(in) :: service_name @@ -2153,7 +2332,7 @@ end subroutine MPI_Publish_name end interface -interface MPI_Unpublish_name +interface subroutine MPI_Unpublish_name(service_name, info, port_name, ierror) character(len=*), intent(in) :: service_name @@ -2165,7 +2344,7 @@ end subroutine MPI_Unpublish_name end interface -interface MPI_Comm_disconnect +interface subroutine MPI_Comm_disconnect(comm, ierror) integer, intent(inout) :: comm @@ -2175,7 +2354,7 @@ end subroutine MPI_Comm_disconnect end interface -interface MPI_Comm_get_parent +interface subroutine MPI_Comm_get_parent(parent, ierror) integer, intent(out) :: parent @@ -2185,7 +2364,7 @@ end subroutine MPI_Comm_get_parent end interface -interface MPI_Comm_join +interface subroutine MPI_Comm_join(fd, intercomm, ierror) integer, intent(in) :: fd @@ -2196,7 +2375,7 @@ end subroutine MPI_Comm_join end interface -interface MPI_Comm_accept +interface subroutine MPI_Comm_accept(port_name, info, root, comm, newcomm& , ierror) @@ -2211,7 +2390,7 @@ end subroutine MPI_Comm_accept end interface -interface MPI_Comm_connect +interface subroutine MPI_Comm_connect(port_name, info, root, comm, newcomm& , ierror) @@ -2226,7 +2405,7 @@ end subroutine MPI_Comm_connect end interface -interface MPI_Comm_spawn +interface subroutine MPI_Comm_spawn(command, argv, maxprocs, info, root, & comm, intercomm, array_of_errcodes, ierror) @@ -2244,7 +2423,7 @@ end subroutine MPI_Comm_spawn end interface -interface MPI_Comm_spawn_multiple +interface subroutine MPI_Comm_spawn_multiple(count, array_of_commands, array_of_argv, array_of_maxprocs, array_of_info, & root, comm, intercomm, array_of_errcodes, ierror) @@ -2263,7 +2442,7 @@ end subroutine MPI_Comm_spawn_multiple end interface -interface MPI_Mprobe +interface subroutine MPI_Mprobe(source, tag, comm, message, status, ierror) include 'mpif-config.h' @@ -2278,7 +2457,7 @@ end subroutine MPI_Mprobe end interface -interface MPI_Improbe +interface subroutine MPI_Improbe(source, tag, comm, flag, message, status, ierror) include 'mpif-config.h' @@ -2294,7 +2473,7 @@ end subroutine MPI_Improbe end interface -interface MPI_Get_library_version +interface subroutine MPI_Get_library_version(version, resultlen, ierror) character(len=*), intent(out) :: version @@ -2305,7 +2484,7 @@ end subroutine MPI_Get_library_version end interface -interface MPI_Comm_split_type +interface subroutine MPI_Comm_split_type(comm, split_type, key, info, newcomm, ierror) integer, intent(in) :: comm @@ -2319,7 +2498,7 @@ end subroutine MPI_Comm_split_type end interface -interface MPI_Type_create_hindexed_block +interface subroutine MPI_Type_create_hindexed_block(count, blocklength, array_of_displacements, oldtype, newtype& , ierror) @@ -2335,7 +2514,7 @@ end subroutine MPI_Type_create_hindexed_block end interface -interface MPI_Dist_graph_create +interface subroutine MPI_Dist_graph_create(comm_old, n, sources, degrees, destinations, & weights, info, reorder, comm_dist_graph, ierror) @@ -2354,7 +2533,7 @@ end subroutine MPI_Dist_graph_create end interface -interface MPI_Dist_graph_create_adjacent +interface subroutine MPI_Dist_graph_create_adjacent(comm_old, indegree, sources, sourceweights, & outdegree, destinations, destweights, info, reorder, & @@ -2375,7 +2554,7 @@ end subroutine MPI_Dist_graph_create_adjacent end interface -interface MPI_Dist_graph_neighbors_count +interface subroutine MPI_Dist_graph_neighbors_count(comm, indegree, outdegree, weighted, ierror) integer, intent(in) :: comm @@ -2388,7 +2567,7 @@ end subroutine MPI_Dist_graph_neighbors_count end interface -interface MPI_Dist_graph_neighbors +interface subroutine MPI_Dist_graph_neighbors(comm, maxindegree, sources, sourceweights, & maxoutdegree, destinations, destweights, ierror) @@ -2405,7 +2584,7 @@ end subroutine MPI_Dist_graph_neighbors end interface -interface MPI_Win_flush +interface subroutine MPI_Win_flush(rank, win, ierror) integer, intent(in) :: rank @@ -2416,7 +2595,7 @@ end subroutine MPI_Win_flush end interface -interface MPI_Win_flush_all +interface subroutine MPI_Win_flush_all(win, ierror) integer, intent(in) :: win @@ -2426,7 +2605,7 @@ end subroutine MPI_Win_flush_all end interface -interface MPI_Win_flush_local +interface subroutine MPI_Win_flush_local(rank, win, ierror) integer, intent(in) :: rank @@ -2437,7 +2616,7 @@ end subroutine MPI_Win_flush_local end interface -interface MPI_Win_flush_local_all +interface subroutine MPI_Win_flush_local_all(win, ierror) integer, intent(in) :: win diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-removed-interfaces.h b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-removed-interfaces.h index 48ede9d15e8..cb44e329474 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-removed-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-removed-interfaces.h @@ -10,7 +10,7 @@ ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. -! Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2016 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2018 Los Alamos National Security, LLC. All rights @@ -22,7 +22,7 @@ ! $HEADER$ ! -interface MPI_Attr_delete +interface subroutine MPI_Attr_delete(comm, keyval, ierror) integer, intent(in) :: comm @@ -33,7 +33,7 @@ end subroutine MPI_Attr_delete end interface -interface MPI_Attr_get +interface subroutine MPI_Attr_get(comm, keyval, attribute_val, flag, ierror) integer, intent(in) :: comm @@ -46,7 +46,7 @@ end subroutine MPI_Attr_get end interface -interface MPI_Attr_put +interface subroutine MPI_Attr_put(comm, keyval, attribute_val, ierror) integer, intent(in) :: comm @@ -57,7 +57,7 @@ end subroutine MPI_Attr_put end interface -interface MPI_Errhandler_create +interface subroutine MPI_Errhandler_create(function, errhandler, ierror) external :: function @@ -67,7 +67,7 @@ end subroutine MPI_Errhandler_create end interface -interface MPI_Errhandler_get +interface subroutine MPI_Errhandler_get(comm, errhandler, ierror) integer, intent(in) :: comm @@ -78,7 +78,7 @@ end subroutine MPI_Errhandler_get end interface -interface MPI_Errhandler_set +interface subroutine MPI_Errhandler_set(comm, errhandler, ierror) integer, intent(in) :: comm @@ -88,7 +88,7 @@ end subroutine MPI_Errhandler_set end interface -interface MPI_Keyval_create +interface subroutine MPI_Keyval_create(copy_fn, delete_fn, keyval, extra_state, ierror) external :: copy_fn @@ -101,7 +101,7 @@ end subroutine MPI_Keyval_create end interface -interface MPI_Keyval_free +interface subroutine MPI_Keyval_free(keyval, ierror) integer, intent(inout) :: keyval @@ -110,7 +110,7 @@ end subroutine MPI_Keyval_free end interface -interface MPI_Type_extent +interface subroutine MPI_Type_extent(datatype, extent, ierror) integer, intent(in) :: datatype @@ -120,7 +120,7 @@ end subroutine MPI_Type_extent end interface -interface MPI_Type_hindexed +interface subroutine MPI_Type_hindexed(count, array_of_blocklengths, array_of_displacements, oldtype, newtype& , ierror) @@ -135,7 +135,7 @@ end subroutine MPI_Type_hindexed end interface -interface MPI_Type_hvector +interface subroutine MPI_Type_hvector(count, blocklength, stride, oldtype, newtype& , ierror) @@ -149,7 +149,7 @@ end subroutine MPI_Type_hvector end interface -interface MPI_Type_lb +interface subroutine MPI_Type_lb(datatype, lb, ierror) integer, intent(in) :: datatype @@ -159,7 +159,7 @@ end subroutine MPI_Type_lb end interface -interface MPI_Type_struct +interface subroutine MPI_Type_struct(count, array_of_blocklengths, array_of_displacements, array_of_types, newtype& , ierror) @@ -174,7 +174,7 @@ end subroutine MPI_Type_struct end interface -interface MPI_Type_ub +interface subroutine MPI_Type_ub(datatype, ub, ierror) integer, intent(in) :: datatype diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-status.h b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-status.h index 1f75bb46d31..126340c08a3 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-status.h +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-status.h @@ -2,6 +2,7 @@ ! ! Copyright (c) 2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2021 Cisco Systems, Inc. All rights reserved ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -9,7 +10,7 @@ ! $HEADER$ -interface MPI_Status_f082f +interface subroutine MPI_Status_f082f(f08_status, f_status, ierror) use mpi_types @@ -22,7 +23,7 @@ end subroutine MPI_Status_f082f end interface -interface MPI_Status_f2f08 +interface subroutine MPI_Status_f2f08(f_status, f08_status, ierror) use mpi_types diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi_comm_spawn_multiple_f90.f90 b/ompi/mpi/fortran/use-mpi-tkr/mpi_comm_spawn_multiple_f90.f90 deleted file mode 100644 index 64007d26898..00000000000 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi_comm_spawn_multiple_f90.f90 +++ /dev/null @@ -1,60 +0,0 @@ -! -*- fortran -*- -! -! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana -! University Research and Technology -! Corporation. All rights reserved. -! Copyright (c) 2004-2005 The University of Tennessee and The University -! of Tennessee Research Foundation. All rights -! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -! University of Stuttgart. All rights reserved. -! Copyright (c) 2004-2005 The Regents of the University of California. -! All rights reserved. -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -subroutine MPI_Comm_spawn_multipleA(count, array_of_commands, array_of_argv, & - array_of_maxprocs, array_of_info, & - root, comm, intercomm, array_of_errcodes, ierror) - include 'mpif-config.h' - integer, intent(in) :: count - character(len=*), dimension(*), intent(in) :: array_of_commands - character(len=*), dimension(count,*), intent(in) :: array_of_argv - integer, dimension(*), intent(in) :: array_of_maxprocs - integer, dimension(*), intent(in) :: array_of_info - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(out) :: intercomm - integer, dimension(*), intent(out) :: array_of_errcodes - integer, intent(out) :: ierror - - call MPI_Comm_spawn_multiple(count, array_of_commands, array_of_argv, & - array_of_maxprocs, array_of_info, root, comm, intercomm, & - array_of_errcodes, ierror) -end subroutine MPI_Comm_spawn_multipleA - -subroutine MPI_Comm_spawn_multipleN(count, array_of_commands, array_of_argv, & - array_of_maxprocs, array_of_info, & - root, comm, intercomm, array_of_errcodes, ierror) - include 'mpif-config.h' - integer, intent(in) :: count - character(len=*), dimension(*), intent(in) :: array_of_commands - double precision, intent(in) :: array_of_argv - integer, dimension(*), intent(in) :: array_of_maxprocs - integer, dimension(*), intent(in) :: array_of_info - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(out) :: intercomm - integer, dimension(*), intent(out) :: array_of_errcodes - integer, intent(out) :: ierror - - call MPI_Comm_spawn_multiple(count, array_of_commands, array_of_argv, & - array_of_maxprocs, array_of_info, root, comm, intercomm, & - array_of_errcodes, ierror) -end subroutine MPI_Comm_spawn_multipleN - diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi_testall_f90.f90 b/ompi/mpi/fortran/use-mpi-tkr/mpi_testall_f90.f90 deleted file mode 100644 index c54478f95c6..00000000000 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi_testall_f90.f90 +++ /dev/null @@ -1,43 +0,0 @@ -! -*- fortran -*- -! -! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana -! University Research and Technology -! Corporation. All rights reserved. -! Copyright (c) 2004-2005 The University of Tennessee and The University -! of Tennessee Research Foundation. All rights -! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -! University of Stuttgart. All rights reserved. -! Copyright (c) 2004-2005 The Regents of the University of California. -! All rights reserved. -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -subroutine MPI_TestallS(count, array_of_requests, flag, array_of_statuses, ierror) - include 'mpif-config.h' - integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests - logical, intent(out) :: flag - integer, dimension(MPI_STATUS_SIZE, count), intent(out) :: array_of_statuses - integer, intent(out) :: ierror - - call MPI_Testall(count, array_of_requests, flag, array_of_statuses, ierror) -end subroutine MPI_TestallS - - -subroutine MPI_TestallI(count, array_of_requests, flag, array_of_statuses, ierror) - include 'mpif-config.h' - integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests - logical, intent(out) :: flag - double precision, intent(out) :: array_of_statuses - integer, intent(out) :: ierror - - call MPI_Testall(count, array_of_requests, flag, array_of_statuses, ierror) -end subroutine MPI_TestallI - diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi_testsome_f90.f90 b/ompi/mpi/fortran/use-mpi-tkr/mpi_testsome_f90.f90 deleted file mode 100644 index bd54a27fed4..00000000000 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi_testsome_f90.f90 +++ /dev/null @@ -1,47 +0,0 @@ -! -*- fortran -*- -! -! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana -! University Research and Technology -! Corporation. All rights reserved. -! Copyright (c) 2004-2005 The University of Tennessee and The University -! of Tennessee Research Foundation. All rights -! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -! University of Stuttgart. All rights reserved. -! Copyright (c) 2004-2005 The Regents of the University of California. -! All rights reserved. -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -subroutine MPI_TestsomeS(incount, array_of_requests, outcount, array_of_indices, array_of_statuses& - , ierror) - include 'mpif-config.h' - integer, intent(in) :: incount - integer, dimension(incount), intent(inout) :: array_of_requests - integer, intent(out) :: outcount - integer, dimension(*), intent(out) :: array_of_indices - integer, dimension(MPI_STATUS_SIZE, incount), intent(out) :: array_of_statuses - integer, intent(out) :: ierror - - call MPI_Testsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses, ierror) -end subroutine MPI_TestsomeS - - -subroutine MPI_TestsomeI(incount, array_of_requests, outcount, array_of_indices, array_of_statuses& - , ierror) - include 'mpif-config.h' - integer, intent(in) :: incount - integer, dimension(incount), intent(inout) :: array_of_requests - integer, intent(out) :: outcount - integer, dimension(*), intent(out) :: array_of_indices - double precision, intent(out) :: array_of_statuses - integer, intent(out) :: ierror - - call MPI_Testsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses, ierror) -end subroutine MPI_TestsomeI - diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi_waitall_f90.f90 b/ompi/mpi/fortran/use-mpi-tkr/mpi_waitall_f90.f90 deleted file mode 100644 index 7a093092ae8..00000000000 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi_waitall_f90.f90 +++ /dev/null @@ -1,39 +0,0 @@ -! -*- fortran -*- -! -! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana -! University Research and Technology -! Corporation. All rights reserved. -! Copyright (c) 2004-2005 The University of Tennessee and The University -! of Tennessee Research Foundation. All rights -! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -! University of Stuttgart. All rights reserved. -! Copyright (c) 2004-2005 The Regents of the University of California. -! All rights reserved. -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -subroutine MPI_WaitallS(count, array_of_requests, array_of_statuses, ierror) - include 'mpif-config.h' - integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests - integer, dimension(MPI_STATUS_SIZE, count), intent(out) :: array_of_statuses - integer, intent(out) :: ierror - call MPI_Waitall(count, array_of_requests, array_of_statuses, ierror) -end subroutine MPI_WaitallS - - -subroutine MPI_WaitallI(count, array_of_requests, array_of_statuses, ierror) - include 'mpif-config.h' - integer, intent(in) :: count - integer, dimension(count), intent(inout) :: array_of_requests - double precision, intent(out) :: array_of_statuses - integer, intent(out) :: ierror - call MPI_Waitall(count, array_of_requests, array_of_statuses, ierror) -end subroutine MPI_WaitallI - diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi_waitsome_f90.f90 b/ompi/mpi/fortran/use-mpi-tkr/mpi_waitsome_f90.f90 deleted file mode 100644 index cecd5ce4e60..00000000000 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi_waitsome_f90.f90 +++ /dev/null @@ -1,47 +0,0 @@ -! -*- fortran -*- -! -! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana -! University Research and Technology -! Corporation. All rights reserved. -! Copyright (c) 2004-2005 The University of Tennessee and The University -! of Tennessee Research Foundation. All rights -! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -! University of Stuttgart. All rights reserved. -! Copyright (c) 2004-2005 The Regents of the University of California. -! All rights reserved. -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -subroutine MPI_WaitsomeS(incount, array_of_requests, outcount, array_of_indices, array_of_statuses& - , ierror) - include 'mpif-config.h' - integer, intent(in) :: incount - integer, dimension(incount), intent(inout) :: array_of_requests - integer, intent(out) :: outcount - integer, dimension(*), intent(out) :: array_of_indices - integer, dimension(MPI_STATUS_SIZE, incount), intent(out) :: array_of_statuses - integer, intent(out) :: ierror - - call MPI_Waitsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses, ierror) -end subroutine MPI_WaitsomeS - - -subroutine MPI_WaitsomeI(incount, array_of_requests, outcount, array_of_indices, array_of_statuses& - , ierror) - include 'mpif-config.h' - integer, intent(in) :: incount - integer, dimension(incount), intent(inout) :: array_of_requests - integer, intent(out) :: outcount - integer, dimension(*), intent(out) :: array_of_indices - double precision, intent(out) :: array_of_statuses - integer, intent(out) :: ierror - - call MPI_Waitsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses, ierror) -end subroutine MPI_WaitsomeI - diff --git a/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-interfaces.h b/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-interfaces.h index e8e5d6d9582..dd10025ce74 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-interfaces.h @@ -7,6 +7,8 @@ ! Additional copyrights may follow ! ! $HEADER$ +! + #define MPI_Wtick PMPI_Wtick #define MPI_Wtime PMPI_Wtime @@ -30,6 +32,7 @@ #define MPI_Comm_call_errhandler PMPI_Comm_call_errhandler #define MPI_Comm_compare PMPI_Comm_compare #define MPI_Comm_create PMPI_Comm_create +#define MPI_Comm_create_from_group PMPI_Comm_create_from_group #define MPI_Comm_create_group PMPI_Comm_create_group #define MPI_Comm_create_errhandler PMPI_Comm_create_errhandler #define MPI_Comm_create_keyval PMPI_Comm_create_keyval @@ -37,6 +40,7 @@ #define MPI_Comm_dup PMPI_Comm_dup #define MPI_Comm_dup_with_info PMPI_Comm_dup_with_info #define MPI_Comm_idup PMPI_Comm_idup +#define MPI_Comm_idup_with_info PMPI_Comm_idup_with_info #define MPI_Comm_free PMPI_Comm_free #define MPI_Comm_free_keyval PMPI_Comm_free_keyval #define MPI_Comm_get_info PMPI_Comm_get_info @@ -77,6 +81,7 @@ #define MPI_Group_difference PMPI_Group_difference #define MPI_Group_excl PMPI_Group_excl #define MPI_Group_free PMPI_Group_free +#define MPI_Group_from_session_pset PMPI_Group_from_session_pset #define MPI_Group_incl PMPI_Group_incl #define MPI_Group_intersection PMPI_Group_intersection #define MPI_Group_range_excl PMPI_Group_range_excl @@ -92,12 +97,14 @@ #define MPI_Info_get PMPI_Info_get #define MPI_Info_get_nkeys PMPI_Info_get_nkeys #define MPI_Info_get_nthkey PMPI_Info_get_nthkey +#define MPI_Info_get_string PMPI_Info_get_string #define MPI_Info_get_valuelen PMPI_Info_get_valuelen #define MPI_Info_set PMPI_Info_set #define MPI_Init PMPI_Init #define MPI_Init_thread PMPI_Init_thread #define MPI_Initialized PMPI_Initialized #define MPI_Intercomm_create PMPI_Intercomm_create +#define MPI_Intercomm_create_from_groups PMPI_Intercomm_create_from_groups #define MPI_Intercomm_merge PMPI_Intercomm_merge #define MPI_Iprobe PMPI_Iprobe #define MPI_Is_thread_main PMPI_Is_thread_main @@ -108,10 +115,20 @@ #define MPI_Pack_size PMPI_Pack_size #define MPI_Pcontrol PMPI_Pcontrol #define MPI_Probe PMPI_Probe +#define MPI_Parrived PMPI_Parrived +#define MPI_Pready PMPI_Pready +#define MPI_Pready_list PMPI_Pready_list +#define MPI_Pready_range PMPI_Pready_range #define MPI_Query_thread PMPI_Query_thread #define MPI_Register_datarep PMPI_Register_datarep #define MPI_Request_free PMPI_Request_free #define MPI_Request_get_status PMPI_Request_get_status +#define MPI_Session_get_info PMPI_Session_get_info +#define MPI_Session_get_nth_pset PMPI_Session_get_nth_pset +#define MPI_Session_get_num_psets PMPI_Session_get_num_psets +#define MPI_Session_get_pset_info PMPI_Session_get_pset_info +#define MPI_Session_init PMPI_Session_init +#define MPI_Session_finalize PMPI_Session_finalize #define MPI_Start PMPI_Start #define MPI_Startall PMPI_Startall #define MPI_Status_f2f08 PMPI_Status_f2f08 diff --git a/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-status.h b/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-status.h index d20bd24829e..a34472700d8 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-status.h +++ b/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-status.h @@ -2,6 +2,7 @@ ! ! Copyright (c) 2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2021 Cisco Systems, Inc. All rights reserved ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -9,7 +10,7 @@ ! $HEADER$ -interface PMPI_Status_f082f +interface subroutine PMPI_Status_f082f(f08_status, f_status, ierror) use mpi_types @@ -22,7 +23,7 @@ end subroutine PMPI_Status_f082f end interface -interface PMPI_Status_f2f08 +interface subroutine PMPI_Status_f2f08(f_status, f08_status, ierror) use mpi_types diff --git a/ompi/mpi/fortran/use-mpi/mpi-types.F90.in b/ompi/mpi/fortran/use-mpi/mpi-types.F90.in index 3b2ac85e2e8..7d462a4242b 100644 --- a/ompi/mpi/fortran/use-mpi/mpi-types.F90.in +++ b/ompi/mpi/fortran/use-mpi/mpi-types.F90.in @@ -2,6 +2,9 @@ ! ! Copyright (c) 2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2022 Cisco Systems, Inc. All rights reserved +! Copyright (c) 2022 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -16,9 +19,12 @@ #define OMPI_PRIVATE #endif +! These types appear in both the "mpi" module and the "mpi_f08" module +! (yes, the MPI spec requires that the TYPE(MPI_Blah) types all show +! up in both modules). module mpi_types -type, BIND(C) :: MPI_Status + type, BIND(C) :: MPI_Status integer :: MPI_SOURCE integer :: MPI_TAG integer :: MPI_ERROR @@ -36,4 +42,184 @@ type, BIND(C) :: MPI_Status integer OMPI_PRIVATE :: internal(@OMPI_FORTRAN_STATUS_SIZE@ - 3) end type MPI_Status + type, BIND(C) :: MPI_Comm + integer :: MPI_VAL + end type MPI_Comm + + type, BIND(C) :: MPI_Datatype + integer :: MPI_VAL + end type MPI_Datatype + + type, BIND(C) :: MPI_Errhandler + integer :: MPI_VAL + end type MPI_Errhandler + + type, BIND(C) :: MPI_File + integer :: MPI_VAL + end type MPI_File + + type, BIND(C) :: MPI_Group + integer :: MPI_VAL + end type MPI_Group + + type, BIND(C) :: MPI_Info + integer :: MPI_VAL + end type MPI_Info + + type, BIND(C) :: MPI_Message + integer :: MPI_VAL + end type MPI_Message + + type, BIND(C) :: MPI_Op + integer :: MPI_VAL + end type MPI_Op + + type, BIND(C) :: MPI_Request + integer :: MPI_VAL + end type MPI_Request + + type, BIND(C) :: MPI_Session + integer :: MPI_VAL + end type MPI_Session + + type, BIND(C) :: MPI_Win + integer :: MPI_VAL + end type MPI_Win + + ! Interfaces for operators with handles. + ! There are no comparisons for MPI_Status because it is not a handle. + ! This covers both .EQ. and == + interface operator (.EQ.) + module procedure ompi_comm_op_eq + module procedure ompi_datatype_op_eq + module procedure ompi_errhandler_op_eq + module procedure ompi_file_op_eq + module procedure ompi_group_op_eq + module procedure ompi_info_op_eq + module procedure ompi_message_op_eq + module procedure ompi_op_op_eq + module procedure ompi_request_op_eq + module procedure ompi_win_op_eq + end interface operator (.EQ.) + + ! This covers both .NE. and /= + interface operator (.NE.) + module procedure ompi_comm_op_ne + module procedure ompi_datatype_op_ne + module procedure ompi_errhandler_op_ne + module procedure ompi_file_op_ne + module procedure ompi_group_op_ne + module procedure ompi_info_op_ne + module procedure ompi_message_op_ne + module procedure ompi_op_op_ne + module procedure ompi_request_op_ne + module procedure ompi_win_op_ne + end interface operator (.NE.) + +contains + + ! .EQ. operator + !----------------- + logical function ompi_comm_op_eq(a, b) + type(MPI_Comm), intent(in) :: a, b + ompi_comm_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_comm_op_eq + + logical function ompi_datatype_op_eq(a, b) + type(MPI_Datatype), intent(in) :: a, b + ompi_datatype_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_datatype_op_eq + + logical function ompi_errhandler_op_eq(a, b) + type(MPI_Errhandler), intent(in) :: a, b + ompi_errhandler_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_errhandler_op_eq + + logical function ompi_file_op_eq(a, b) + type(MPI_File), intent(in) :: a, b + ompi_file_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_file_op_eq + + logical function ompi_group_op_eq(a, b) + type(MPI_Group), intent(in) :: a, b + ompi_group_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_group_op_eq + + logical function ompi_info_op_eq(a, b) + type(MPI_Info), intent(in) :: a, b + ompi_info_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_info_op_eq + + logical function ompi_message_op_eq(a, b) + type(MPI_Message), intent(in) :: a, b + ompi_message_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_message_op_eq + + logical function ompi_op_op_eq(a, b) + type(MPI_Op), intent(in) :: a, b + ompi_op_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_op_op_eq + + logical function ompi_request_op_eq(a, b) + type(MPI_Request), intent(in) :: a, b + ompi_request_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_request_op_eq + + logical function ompi_win_op_eq(a, b) + type(MPI_Win), intent(in) :: a, b + ompi_win_op_eq = (a%MPI_VAL .EQ. b%MPI_VAL) + end function ompi_win_op_eq + + ! .NE. operator + !----------------- + logical function ompi_comm_op_ne(a, b) + type(MPI_Comm), intent(in) :: a, b + ompi_comm_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_comm_op_ne + + logical function ompi_datatype_op_ne(a, b) + type(MPI_Datatype), intent(in) :: a, b + ompi_datatype_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_datatype_op_ne + + logical function ompi_errhandler_op_ne(a, b) + type(MPI_Errhandler), intent(in) :: a, b + ompi_errhandler_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_errhandler_op_ne + + logical function ompi_file_op_ne(a, b) + type(MPI_File), intent(in) :: a, b + ompi_file_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_file_op_ne + + logical function ompi_group_op_ne(a, b) + type(MPI_Group), intent(in) :: a, b + ompi_group_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_group_op_ne + + logical function ompi_info_op_ne(a, b) + type(MPI_Info), intent(in) :: a, b + ompi_info_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_info_op_ne + + logical function ompi_message_op_ne(a, b) + type(MPI_Message), intent(in) :: a, b + ompi_message_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_message_op_ne + + logical function ompi_op_op_ne(a, b) + type(MPI_Op), intent(in) :: a, b + ompi_op_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_op_op_ne + + logical function ompi_request_op_ne(a, b) + type(MPI_Request), intent(in) :: a, b + ompi_request_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_request_op_ne + + logical function ompi_win_op_ne(a, b) + type(MPI_Win), intent(in) :: a, b + ompi_win_op_ne = (a%MPI_VAL .NE. b%MPI_VAL) + end function ompi_win_op_ne + end module diff --git a/ompi/mpi/man/man3/MPI_Allgather.3in b/ompi/mpi/man/man3/MPI_Allgather.3in index 5a0bc50375e..b4747e5a16e 100644 --- a/ompi/mpi/man/man3/MPI_Allgather.3in +++ b/ompi/mpi/man/man3/MPI_Allgather.3in @@ -7,7 +7,7 @@ .\" $COPYRIGHT$ .TH MPI_Allgather 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Allgather, MPI_Iallgather\fP \- Gathers data from all processes and distributes it to all processes +\fBMPI_Allgather, MPI_Iallgather, MPI_Allgather_init\fP \- Gathers data from all processes and distributes it to all processes .SH SYNTAX .ft R @@ -22,6 +22,10 @@ int MPI_Iallgather(const void\fI *sendbuf\fP, int \fI sendcount\fP, MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP, MPI_Request \fI*request\fP) +int MPI_Allgather_init(const void\fI *sendbuf\fP, int \fI sendcount\fP, + MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, + MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP, MPI_Info\fI info\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -39,6 +43,12 @@ MPI_IALLGATHER(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, COMM\fP INTEGER \fIREQUEST, IERROR\fP +MPI_ALLGATHER_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, + RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF\fP(*)\fI, RECVBUF\fP (*) + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, COMM\fP + INTEGER \fIINFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -62,6 +72,16 @@ MPI_Iallgather(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \f TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Allgather_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIrecvtype\fP, + \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP .fi .SH INPUT PARAMETERS .ft R @@ -86,6 +106,9 @@ Datatype of receive buffer elements (handle). .TP 1i comm Communicator (handle). +.TP 1i +info +Info (handle, persistent only). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Allgather_init.3in b/ompi/mpi/man/man3/MPI_Allgather_init.3in new file mode 100644 index 00000000000..f7b03f37700 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Allgather_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Allgather.3 diff --git a/ompi/mpi/man/man3/MPI_Allgatherv.3in b/ompi/mpi/man/man3/MPI_Allgatherv.3in index 8ebf6fae27b..837cf40be31 100644 --- a/ompi/mpi/man/man3/MPI_Allgatherv.3in +++ b/ompi/mpi/man/man3/MPI_Allgatherv.3in @@ -7,7 +7,7 @@ .\" $COPYRIGHT$ .TH MPI_Allgatherv 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Allgatherv, MPI_Iallgatherv\fP \- Gathers data from all processes and delivers it to all. Each process may contribute a different amount of data. +\fBMPI_Allgatherv, MPI_Iallgatherv, MPI_Allgatherv_init\fP \- Gathers data from all processes and delivers it to all. Each process may contribute a different amount of data. .SH SYNTAX .ft R @@ -23,6 +23,11 @@ int MPI_Iallgatherv(const void\fI *sendbuf\fP, int\fI sendcount\fP, const int\fI displs[]\fP, MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP, MPI_Request \fI*request\fP) +int MPI_Allgatherv_init(const void\fI *sendbuf\fP, int\fI sendcount\fP, + MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, const int\fI recvcounts[]\fP, + const int\fI displs[]\fP, MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP, + MPI_Info\fI info\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -34,12 +39,18 @@ MPI_ALLGATHERV(\fISENDBUF\fP,\fI SENDCOUNT\fP, \fISENDTYPE\fP,\fI RECVBUF\fP, INTEGER \fISENDCOUNT\fP,\fI SENDTYPE\fP, \fIRECVCOUNT\fP(*) INTEGER \fIDISPLS\fP(*),\fI RECVTYPE\fP,\fI COMM\fP,\fI IERROR\fP -MPI_IALLGATHERV(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, - RECVCOUNT, DISPLS, RECVTYPE, COMM, REQUEST, IERROR\fP) +MPI_IALLGATHERV(\fISENDBUF\fP,\fI SENDCOUNT\fP, \fISENDTYPE\fP,\fI RECVBUF\fP, + \fIRECVCOUNT\fP,\fI DISPLS\fP, \fIRECVTYPE\fP,\fI COMM\fP, \fI REQUEST\fP, \fI IERROR\fP) \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT\fP(*), INTEGER \fIDISPLS\fP(*)\fI, RECVTYPE, COMM, REQUEST, IERROR\fP +MPI_ALLGATHERV_INIT(\fISENDBUF\fP,\fI SENDCOUNT\fP, \fISENDTYPE\fP,\fI RECVBUF\fP, + \fIRECVCOUNT\fP,\fI DISPLS\fP, \fIRECVTYPE\fP,\fI COMM\fP, \fI INFO\fP, \fI REQUEST\fP, \fI IERROR\fP) + \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT\fP(*), + INTEGER \fIDISPLS\fP(*)\fI, RECVTYPE, COMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -64,6 +75,18 @@ MPI_Iallgatherv(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \ TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Allgatherv_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, \fIdispls\fP, + \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP, \fIdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -88,6 +111,9 @@ Datatype of receive buffer elements (handle). .TP 1i comm Communicator (handle). +.TP 1i +info +Info (handle, persistent only). .sp .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Allgatherv_init.3in b/ompi/mpi/man/man3/MPI_Allgatherv_init.3in new file mode 100644 index 00000000000..8fc7b812b1a --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Allgatherv_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Allgatherv.3 diff --git a/ompi/mpi/man/man3/MPI_Allreduce.3in b/ompi/mpi/man/man3/MPI_Allreduce.3in index 380b6f4c26e..d4cfa2a4d9e 100644 --- a/ompi/mpi/man/man3/MPI_Allreduce.3in +++ b/ompi/mpi/man/man3/MPI_Allreduce.3in @@ -7,7 +7,7 @@ .\" $COPYRIGHT$ .TH MPI_Allreduce 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Allreduce, MPI_Iallreduce\fP \- Combines values from all processes and distributes the result back to all processes. +\fBMPI_Allreduce, MPI_Iallreduce, MPI_Allreduce_init\fP \- Combines values from all processes and distributes the result back to all processes. .SH SYNTAX .ft R @@ -21,6 +21,10 @@ int MPI_Iallreduce(const void \fI*sendbuf\fP, void \fI*recvbuf\fP, int\fI count\ MPI_Datatype\fI datatype\fP, MPI_Op\fI op\fP, MPI_Comm\fI comm\fP, MPI_Request \fI*request\fP) +int MPI_Allreduce_init(const void \fI*sendbuf\fP, void \fI*recvbuf\fP, int\fI count\fP, + MPI_Datatype\fI datatype\fP, MPI_Op\fI op\fP, MPI_Comm\fI comm\fP, + MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -34,6 +38,10 @@ MPI_IALLREDUCE(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, REQUEST, IERROR\f \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) INTEGER \fICOUNT, DATATYPE, OP, COMM, REQUEST, IERROR\fP +MPI_ALLREDUCE_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) + INTEGER \fICOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -58,6 +66,18 @@ MPI_Iallreduce(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\ TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Allreduce_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, + \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -76,6 +96,9 @@ Operation (handle). .TP 1i comm Communicator (handle). +.TP 1i +info +Info (handle, persistent only). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Allreduce_init.3in b/ompi/mpi/man/man3/MPI_Allreduce_init.3in new file mode 100644 index 00000000000..9c97358ebe6 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Allreduce_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Allreduce.3 diff --git a/ompi/mpi/man/man3/MPI_Alltoall.3in b/ompi/mpi/man/man3/MPI_Alltoall.3in index 2a9babbd99b..bc980a30650 100644 --- a/ompi/mpi/man/man3/MPI_Alltoall.3in +++ b/ompi/mpi/man/man3/MPI_Alltoall.3in @@ -8,7 +8,7 @@ .TH MPI_Alltoall 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Alltoall, MPI_Ialltoall\fP \- All processes send data to all processes +\fBMPI_Alltoall, MPI_Ialltoall, MPI_Alltoall_init\fP \- All processes send data to all processes .SH SYNTAX .ft R @@ -24,6 +24,10 @@ int MPI_Ialltoall(const void *\fIsendbuf\fP, int \fIsendcount\fP, MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) +int MPI_Alltoall_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, + MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, + MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -43,6 +47,13 @@ MPI_IALLTOALL(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE\fP INTEGER \fICOMM, REQUEST, IERROR\fP +MPI_ALLTOALL_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, + RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) + + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -68,6 +79,18 @@ MPI_Ialltoall(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fI TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Alltoall_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIrecvtype\fP, + \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -89,6 +112,9 @@ Datatype of receive buffer elements (handle). .TP 1.2i comm Communicator over which data is to be exchanged (handle). +.TP 1.2i +info +Info (handle, persistent only) .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Alltoall_init.3in b/ompi/mpi/man/man3/MPI_Alltoall_init.3in new file mode 100644 index 00000000000..591c20bb28d --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Alltoall_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Alltoall.3 diff --git a/ompi/mpi/man/man3/MPI_Alltoallv.3in b/ompi/mpi/man/man3/MPI_Alltoallv.3in index c3d70d71188..a5fc02713fd 100644 --- a/ompi/mpi/man/man3/MPI_Alltoallv.3in +++ b/ompi/mpi/man/man3/MPI_Alltoallv.3in @@ -8,7 +8,7 @@ .TH MPI_Alltoallv 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Alltoallv, MPI_Ialltoallv\fP \- All processes send different amount of data to, and receive different amount of data from, all processes +\fBMPI_Alltoallv, MPI_Ialltoallv, MPI_Alltoallv_init\fP \- All processes send different amount of data to, and receive different amount of data from, all processes .SH SYNTAX .ft R @@ -26,6 +26,12 @@ int MPI_Ialltoallv(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], const int \fIrdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) +int MPI_Alltoallv_init(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], + const int \fIsdispls\fP[], MPI_Datatype \fIsendtype\fP, + void *\fIrecvbuf\fP, const int\fI recvcounts\fP[], + const int \fIrdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, + MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -47,6 +53,14 @@ MPI_IALLTOALLV(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPE\fP INTEGER \fICOMM, REQUEST, IERROR\fP +MPI_ALLTOALLV_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, + RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) + + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPE\fP + INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPE\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -74,6 +88,19 @@ MPI_Ialltoallv(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtype\fP, \ TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Alltoallv_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, + \fIrdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIsdispls(*),\fP + \fIrecvcounts(*)\fP, \fIrdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -107,6 +134,9 @@ Datatype of receive buffer elements. .TP 1.2i comm Communicator over which data is to be exchanged. +.TP 1.2i +info +Info (handle, persistent only) .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Alltoallv_init.3in b/ompi/mpi/man/man3/MPI_Alltoallv_init.3in new file mode 100644 index 00000000000..6cc7026e897 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Alltoallv_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Alltoallv.3 diff --git a/ompi/mpi/man/man3/MPI_Alltoallw.3in b/ompi/mpi/man/man3/MPI_Alltoallw.3in index 73a16c73041..4407f10c96a 100644 --- a/ompi/mpi/man/man3/MPI_Alltoallw.3in +++ b/ompi/mpi/man/man3/MPI_Alltoallw.3in @@ -8,7 +8,7 @@ .TH MPI_Alltoallw 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Alltoallw, MPI_Ialltoallw\fP \- All processes send data of different types to, and receive data of different types from, all processes +\fBMPI_Alltoallw, MPI_Ialltoallw, MPI_Alltoallw_init\fP \- All processes send data of different types to, and receive data of different types from, all processes .SH SYNTAX .ft R @@ -27,6 +27,12 @@ int MPI_Ialltoallw(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) +int MPI_Alltoallw_init(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], + const int \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], + void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const int \fIrdispls\fP[], + const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, + MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -48,6 +54,14 @@ MPI_IALLTOALLW(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPES(*)\fP INTEGER \fICOMM, REQUEST, IERROR\fP +MPI_ALLTOALLW_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, + RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, INFO, REQUEST, IERROR\fP) + + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPES(*)\fP + INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPES(*)\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -77,6 +91,20 @@ MPI_Ialltoallw(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtypes\fP, TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Alltoallw_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtypes\fP, \fIrecvbuf\fP, + \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtypes\fP, \fIcomm\fP, fIinfo\fP, \fIrequest\fP, \fIierror\fP) + + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIsdispls(*),\fP + \fIrecvcounts(*)\fP, \fIrdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: \fIsendtypes(*),\fP + \fIrecvtypes(*)\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -111,6 +139,9 @@ receiving data from rank j. .TP 1.2i comm Communicator over which data is to be exchanged. +.TP 1.2i +info +Info (handle, persistent only) .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Alltoallw_init.3in b/ompi/mpi/man/man3/MPI_Alltoallw_init.3in new file mode 100644 index 00000000000..0cca872ba48 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Alltoallw_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Alltoallw.3 diff --git a/ompi/mpi/man/man3/MPI_Barrier.3.md b/ompi/mpi/man/man3/MPI_Barrier.3.md index 63ebbbccc4b..f12f0b73510 100644 --- a/ompi/mpi/man/man3/MPI_Barrier.3.md +++ b/ompi/mpi/man/man3/MPI_Barrier.3.md @@ -9,6 +9,7 @@ MPI_Barrier, MPI_Ibarrier - Synchronization between MPI processes in a group #include int MPI_Barrier(MPI_Comm) int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request) +int MPI_barrier_init(MPI_Comm comm, MPI_Info info, MPI_Request *request) ``` ## Fortran Syntax ```fortran @@ -18,6 +19,8 @@ MPI_BARRIER(COMM, IERROR) INTEGER COMM, IERROR MPI_IBARRIER(COMM, REQUEST, IERROR) INTEGER COMM, REQUEST, IERROR +MPI_BARRIER_INIT(COMM, INFO, REQUEST, IERROR) + INTEGER COMM, INFO, REQUEST, IERROR ``` ## Fortran 2008 Syntax ```fortran @@ -29,9 +32,15 @@ MPI_Ibarrier(comm, request, ierror) TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT (OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror +MPI_Barrier_init(comm, info, request, ierror) + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT (OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror ``` # Input Parameter * `comm` : Communicator (handle). +* `info` : Info (handle, persistent only). # Output Parameters * `request` : Request (handle, non-blocking only). * `IERROR` : Fortran only: Error status (integer). diff --git a/ompi/mpi/man/man3/MPI_Barrier_init.3in b/ompi/mpi/man/man3/MPI_Barrier_init.3in new file mode 100644 index 00000000000..17e1bd261a8 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Barrier_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Barrier.3 diff --git a/ompi/mpi/man/man3/MPI_Bcast_init.3in b/ompi/mpi/man/man3/MPI_Bcast_init.3in new file mode 100644 index 00000000000..c0a86bebc97 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Bcast_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Bcast.3 diff --git a/ompi/mpi/man/man3/MPI_Comm_create_from_group.3.md b/ompi/mpi/man/man3/MPI_Comm_create_from_group.3.md new file mode 100644 index 00000000000..52739ef9f20 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Comm_create_from_group.3.md @@ -0,0 +1,89 @@ +# Name + +`MPI_Comm_create_from_group` - Creates a new communicator from a group and stringtag + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Comm_create_from_group(MPI_Group group, const char *stringtag, MPI_Info info, MPI_Errhandler errhandler, MPI_Comm *newcomm) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_COMM_CREATE_FROM_GROUP(GROUP, STRINGTAG, INFO, ERRHANDLER, NEWCOMM, IERROR) + INTEGER GROUP, INFO, ERRHANDLER, NEWCOMM, IERROR + CHARACTER*(*) STRINGTAG +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Comm_create_from_group(group, stringtag, info, errhandler, newcomm, ierror) + TYPE(MPI_Group), INTENT(IN) :: group + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newcomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +``` + +# Input Parameters + +* `group` : Group (handler) +* `stringtag` : Unique identifier for this operation (string) +* `info` : info object (handler) +* `errhandler` : error handler to be attached to the new intra-communicator (handle) + +# Output Parameters + +* `newcomm` : New communicator (handle). +* `IERROR` : Fortran only: Error status (integer). + +# Description + +`MPI_Comm_create_from_group` is similar to `MPI_Comm_create_group`, except +that the set of MPI processes involved in the creation of the new intra-communicator +is specified by a group argument, rather than the group associated with a pre-existing communicator. +If a non-empty group is specified, then all MPI processes in that group must call +the function and each of these MPI processes must provide the same arguments, including +a `group` that contains the same members with the same ordering, and identical `stringtag` +value. In the event that `MPI_GROUP_EMPTY` is supplied as the group argument, then the +call is a local operation and `MPI_COMM_NULL` is returned as `newcomm`. The `stringtag` argument +is analogous to the `tag` used for `MPI_Comm_create_group`. If multiple threads at +a given MPI process perform concurrent `MPI_Comm_create_from_group` operations, +the user must distinguish these operations by providing different `stringtag` arguments. The +`stringtag` shall not exceed MPI_MAX_STRINGTAG_LEN characters in length. For C, this includes +space for a null terminating character. + +# Notes + +The `errhandler` argument specifies an error handler to be attached to the new intracommunicator. +The `info` argument provides hints and assertions, possibly MPI implementation dependent, which +indicate desired characteristics and guide communicator creation. MPI_MAX_STRINGTAG_LEN shall have a value +of at least 63. + + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +`MPI_Comm_set_errhandler`; the predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. + +# See Also + +[`MPI_Comm_create_group`(3)](MPI_Comm_create_group.html) diff --git a/ompi/mpi/man/man3/MPI_Comm_idup_with_info.3in b/ompi/mpi/man/man3/MPI_Comm_idup_with_info.3in new file mode 100644 index 00000000000..13690bf4200 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Comm_idup_with_info.3in @@ -0,0 +1,91 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2021 Triad National Security, LLC. All rights reserved. +.\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2006-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright (c) 2021 Triad National Security, LLC. All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_Comm_idup_with_info 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Comm_idup_with_info \fP \- Start the nonblocking duplication of an existing communicator with all its cached information. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Comm_idup_with_info(MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, MPI_Comm\fI *newcomm\fP, MPI_Request\fI *request\fP) + +.fi +.SH Fortran Syntax +.nf +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_COMM_IDUP_WITH_INFO(\fICOMM, INFO, NEWCOMM, REQUEST, IERROR\fP) + INTEGER \fICOMM, INFO, NEWCOMM, REQUEST, IERROR\fP + +.fi +.SH Fortran 2008 Syntax +.nf +USE mpi_f08 +MPI_Comm_idup_with_info(\fIcomm\fP, \fIinfo\fP, \fInewcomm\fP, \fIrequest\fP, \fIierror\fP) + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Comm), INTENT(OUT) :: \fInewcomm\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +.fi +.SH INPUT PARAMETER +.ft R +.TP 1i +comm +Communicator (handle). +info +Info object (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +newcomm +Copy of comm (handle). +.ft R +.TP 1i +request +Communication request (handle). +.ft R +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +MPI_Comm_idup_with_info starts the nonblocking duplication of an existing communicator comm with associated key +values. For each key value, the respective copy callback function determines the attribute value associated with this key in the new communicator; one particular action that a copy callback may take is to delete the attribute from the new communicator. Returns in newcomm a new communicator with the same group, any copied cached information, but a new context (see Section 5.7.1 of the MPI-1 Standard, "Functionality"). The communicator returned in \fInewcomm\fP will not be available until the request is complete. The hints provided by the supplied \fIinfo\fP argument are associated with the output communicator. +.sp +The completion of a communicator duplication request can be determined by calling any of MPI_Wait, MPI_Waitany, MPI_Test, or MPI_Testany with the request returned by this function. + +.SH NOTES +This operation is used to provide a parallel +library call with a duplicate communication space that has the same properties as the original communicator. This includes any attributes (see below) and topologies (see Chapter 6, "Process Topologies," in the MPI-1 Standard). This call is valid even if there are pending point-to-point communications involving the communicator comm. A typical call might involve an MPI_Comm_idup_with_info at the beginning of the parallel call, and an MPI_Comm_free of that duplicated communicator at the end of the call. Other models of communicator management are also possible. +.sp +This call applies to both intra- and intercommunicators. + +Note that it is not defined by the MPI standard what happens if the +attribute copy callback invokes other MPI functions. In Open MPI, it +is not valid for attribute copy callbacks (or any of their children) +to add or delete attributes on the same object on which the attribute +copy callback is being invoked. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. + + +.SH SEE ALSO +MPI_Comm_dup +MPI_Comm_idup +MPI_Comm_dup_with_info diff --git a/ompi/mpi/man/man3/MPI_Exscan.3in b/ompi/mpi/man/man3/MPI_Exscan.3in index 8741eff6435..c550f8625ea 100644 --- a/ompi/mpi/man/man3/MPI_Exscan.3in +++ b/ompi/mpi/man/man3/MPI_Exscan.3in @@ -23,6 +23,10 @@ int MPI_Iexscan(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) +int MPI_Exscan_init(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int \fIcount\fP, + MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, MPI_Comm \fIcomm\fP, + MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -36,6 +40,10 @@ MPI_IEXSCAN(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, REQUEST, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fICOUNT, DATATYPE, OP, COMM, REQUEST, IERROR\fP +MPI_EXSCAN_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fICOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -59,6 +67,17 @@ MPI_Iexscan(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Exscan_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Exscan_init.3in b/ompi/mpi/man/man3/MPI_Exscan_init.3in new file mode 100644 index 00000000000..c2ff4cf3254 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Exscan_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Exscan.3 diff --git a/ompi/mpi/man/man3/MPI_Gather.3.md b/ompi/mpi/man/man3/MPI_Gather.3.md index 5d58221317b..67091047c53 100644 --- a/ompi/mpi/man/man3/MPI_Gather.3.md +++ b/ompi/mpi/man/man3/MPI_Gather.3.md @@ -1,6 +1,6 @@ # Name -`MPI_Gather`, `MPI_Igather` - Gathers values from a group of processes. +`MPI_Gather`, `MPI_Igather`, `MPI_Gather_init` - Gathers values from a group of processes. # Synopsis @@ -16,6 +16,10 @@ int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request) + +int MPI_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, + MPI_Comm comm, MPI_Info info, MPI_Request *request) ``` ## Fortran Syntax @@ -35,6 +39,12 @@ MPI_IGATHER(SENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, SENDBUF(*), RECVBUF(*) INTEGER SENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT INTEGER COMM, REQUEST, IERROR + +MPI_GATHER_INIT(SENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, + RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR) + SENDBUF(*), RECVBUF(*) + INTEGER SENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT + INTEGER COMM, INFO, REQUEST, IERROR ``` ## Fortran 2008 Syntax @@ -60,6 +70,17 @@ MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror + +MPI_Gather_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, + root, comm, info, request, ierror) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: sendbuf + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, recvcount, root + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror ``` @@ -73,6 +94,7 @@ at root). * `recvtype` : Datatype of recvbuffer elements (handle, significant only at root). * `root` : Rank of receiving process (integer). * `comm` : Communicator (handle). +* `info` : Info (handle, persistent only). # Output Parameters diff --git a/ompi/mpi/man/man3/MPI_Gather_init.3in b/ompi/mpi/man/man3/MPI_Gather_init.3in new file mode 100644 index 00000000000..d15bc2d25cf --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Gather_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Gather.3 diff --git a/ompi/mpi/man/man3/MPI_Gatherv.3.md b/ompi/mpi/man/man3/MPI_Gatherv.3.md index 2a52322ef84..e9925817b4d 100644 --- a/ompi/mpi/man/man3/MPI_Gatherv.3.md +++ b/ompi/mpi/man/man3/MPI_Gatherv.3.md @@ -1,6 +1,6 @@ # Name -`MPI_Gatherv`, `MPI_Igatherv` - Gathers varying amounts of data from all +`MPI_Gatherv`, `MPI_Igatherv`, `MPI_Gatherv_init` - Gathers varying amounts of data from all processes to the root process # Syntax @@ -17,6 +17,10 @@ int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request) + +int MPI_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request) ``` ## Fortran Syntax @@ -36,6 +40,12 @@ MPI_IGATHERV(SENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNTS, SENDBUF(*), RECVBUF(*) INTEGER SENDCOUNT, SENDTYPE, RECVCOUNTS(*), DISPLS(*) INTEGER RECVTYPE, ROOT, COMM, REQUEST, IERROR + +MPI_GATHERV_INIT(SENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNTS, + DISPLS, RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR) + SENDBUF(*), RECVBUF(*) + INTEGER SENDCOUNT, SENDTYPE, RECVCOUNTS(*), DISPLS(*) + INTEGER RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR ``` ## Fortran 2008 Syntax @@ -62,6 +72,18 @@ MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, TYPE(MPI_Comm), INTENT(IN) :: comm TYPE(MPI_Request), INTENT(OUT) :: request INTEGER, OPTIONAL, INTENT(OUT) :: ierror + +MPI_Gatherv_init(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, + recvtype, root, comm, info, request, ierror) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: sendbuf + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: recvbuf + INTEGER, INTENT(IN) :: sendcount, root + INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) + TYPE(MPI_Datatype), INTENT(IN) :: sendtype, recvtype + TYPE(MPI_Comm), INTENT(IN) :: comm + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror ``` # Input Parameters @@ -79,6 +101,7 @@ from process i (significant only at root). (handle). * `root` : Rank of receiving process (integer). * `comm` : Communicator (handle). +* `info` : Info (handle, persistent only). diff --git a/ompi/mpi/man/man3/MPI_Gatherv_init.3in b/ompi/mpi/man/man3/MPI_Gatherv_init.3in new file mode 100644 index 00000000000..3202cdbbd85 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Gatherv_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Gatherv.3 diff --git a/ompi/mpi/man/man3/MPI_Group_from_session_pset.3.md b/ompi/mpi/man/man3/MPI_Group_from_session_pset.3.md new file mode 100644 index 00000000000..486d7cfcbb4 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Group_from_session_pset.3.md @@ -0,0 +1,75 @@ +# Name + +`MPI_Group_from_session_pset` - Creates a group using a provided session handle and process set. + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Group_from_session_pset(MPI_Session session, const char *pset_name, MPI_Group *newgroup) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_GROUP_FROM_SESSION_PSET(SESSION, PSET_NAME, NEWGROUP, IERROR) + INTEGER SESSION, NEWGROUP, IERROR + CHARACTER*(*) PSET_NAME +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Group_from_session_pset(session, pset_name, newgroup, ierror) + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Group), INTENT(OUT) :: newgroup + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +``` + +# Input Parameters + +* `session` : Session (handle). +* `pset_name` : name of process set to use to create the new group (string) + +# Output Parameters + +* `newgroup` : New group derived from supplied session and process set (handle). +* `IERROR` : Fortran only: Error status (integer). + +# Description + +The function `MPI_Group_from_session_pset` creates a group `newgroup` using the +provided `session` handle and `process set`. The process set name must be one returned from +an invocation of `MPI_Session_get_nth_pset` using the supplied `session` handle. If the +`pset_name` does not exist, MPI_GROUP_NULL will be returned in the `newgroup` argument. + +# Note + +As with other group constructors, `MPI_Group_from_session_pset` is a local function. + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. + +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +`MPI_Session_set_errhandler`; the predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. + +# See Also + +[`MPI_Session_init`(3)](MPI_Session_init.html) +[`MPI_Session_get_nth_pset`(3)](MPI_Session_get_nth_pset.html) +[`MPI_Group_free`(3)](MPI_Group_free.html) diff --git a/ompi/mpi/man/man3/MPI_Info_get_string.3in b/ompi/mpi/man/man3/MPI_Info_get_string.3in new file mode 100644 index 00000000000..4d848efb418 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Info_get_string.3in @@ -0,0 +1,102 @@ +.\" -*- nroff -*- +.\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2006-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright (c) 2020 Google, LLC. All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_Info_get 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Info_get_string\fP \- Retrieves the value associated with a key in an info object. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Info_get_string(MPI_Info \fIinfo\fP, const char \fI*key\fP, int *\fIbuflen\fP, char \fI*value\fP, int *\fIflag\fP) + +.fi +.SH Fortran Syntax +.nf +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_INFO_GET_STRING(\fIINFO, KEY, BUFLEN, VALUE, FLAG, IERROR\fP) + INTEGER \fIINFO, BUFLEN, IERROR\fP + CHARACTER*(*) \fIKEY, VALUE\fP + LOGICAL \fIFLAG\fP + +.fi +.SH Fortran 2008 Syntax +.nf +USE mpi_f08 +MPI_Info_get_string(\fIinfo\fP, \fIkey\fP, \fIbuflen\fP, \fIvalue\fP, \fIflag\fP, \fIierror\fP) + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + CHARACTER(LEN=*), INTENT(IN) :: \fIkey\fP + INTEGER, INTENT(INOUT) :: \fIbuflen\fP + CHARACTER(LEN=valuelen), INTENT(OUT) :: \fIvalue\fP + LOGICAL, INTENT(OUT) :: \fIflag\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +info +Info object (handle). +.ft R +.TP 1i +key +Key (string). + +.SH OUTPUT PARAMETER +.ft R +.ft 1i +buflen +On entry, length of value arg. On return, set to required size to hold value string (integer). +.ft R +.TP 1i +value +Value (string). +.ft R +.TP 1i +flag +Returns true if key defined, false if not (boolean). +.ft R +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +MPI_Info_get_string retrieves the value associated with \fIkey\fP from \fIinfo\fP, if any. If such a key exists in info, it sets \fIflag\fP to true and returns the value in \fIvalue\fP, otherwise it sets +flag to false and leaves value unchanged. \fIbuflen\fP on input is the size of the provided buffer, for the output of buflen it is the size of the buffer needed to store the value string. +If the buflen passed into the function is less than the actual size needed to store the value string (including null terminator in C), the value is truncated. On return, +the value of \fIbuflen\fP will be set to the required buffer size to hold the value string. If buflen is set to 0, value is not changed. In C, \fIbuflen\fP includes the required space for the +null terminator. In C, this function returns a null terminated string in all cases where the \fIbuflen\fP input value is greater than 0. + +If \fIkey\fP is larger than MPI_MAX_INFO_KEY, the call is erroneous. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. + +.SH SEE ALSO +.ft r +MPI_Info_create +.br +MPI_Info_delete +.br +MPI_Info_dup +.br +MPI_Info_free +.br +MPI_Info_get_nkeys +.br +MPI_Info_get_nthkey +.br +MPI_Info_set +.br + diff --git a/ompi/mpi/man/man3/MPI_Intercomm_create_from_groups.3.md b/ompi/mpi/man/man3/MPI_Intercomm_create_from_groups.3.md new file mode 100644 index 00000000000..6cd96541653 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Intercomm_create_from_groups.3.md @@ -0,0 +1,92 @@ +# Name + +`MPI_Intercomm_create_from_groups` - Creates a new inter-communicator from a local and remote group and stringtag + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Intercomm_create_from_groups(MPI_Group local_group, int local_leader, MPI_Group remote_group, int remote_leader, const char *stringtag, MPI_Info info, MPI_Errhandler errhandler, MPI_Comm *newintercomm) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_INTERCOMM_CREATE_FROM_GROUPS(LOCAL_GROUP, LOCAL_LEADER, REMOTE_GROUP, REMOTE_LEADER, STRINGTAG, INFO, ERRHANDLER, NEWINTERCOMM, IERROR) + INTEGER LOCAL_GROUP, LOCAL_LEADER, REMOTE_GROUP, REMOTE_LEADER, INFO, ERRHANDLER, NEWINTERCOMM, IERROR + CHARACTER*(*) STRINGTAG +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Intercomm_create_from_groups(local_group, local_leader, remote_group, remote_leader, stringtag, info, errhandler, newintercomm, ierror) + TYPE(MPI_Group), INTENT(IN) :: local_group, remote_group + INTEGER, INTENT(IN) :: local_leader, remote_leader + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newintercomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +``` + +# Input Parameters + +* `local_group` : Local group (handler) +* `local_leader` : rank of local group leader in local_group (integer) +* `remote_group` : Remote group (handler) +* `remote_leader` : rank of remote leader in remote_group, significant only at local_leader (integer) +* `stringtag` : Unique identifier for this operation (string) +* `info` : info object (handler) +* `errhandler` : error handler to be attached to the new inter-communicator (handle) + +# Output Parameters + +* `newintercomm` : New inter-communicator (handle). +* `IERROR` : Fortran only: Error status (integer). + +# Description + +`MPI_Intercomm_create_from_groups` creates an inter-communicator. Unlike `MPI_Intercomm_create`, this function +uses as input previously defined, disjoint local and remote groups. The calling MPI +process must be a member of the local group. The call is collective over the union of +the local and remote groups. All involved MPI processes shall provide an identical value +for the `stringtag` argument. Within each group, all MPI processes shall provide identical +`local_group`, `local_leader` arguments. Wildcards are not permitted for the +`remote_leader` or `local_leader` arguments. The `stringtag` argument serves the same purpose +as the `stringtag` used in the `MPI_Comm_create_from_group` function; it differentiates +concurrent calls in a multithreaded environment. The `stringtag` shall not exceed +`MPI_MAX_STRINGTAG_LEN` characters in length. For C, this includes space for a null terminating +character. In the event that MPI_GROUP_EMPTY is supplied as the `local_group` or `remote_group1 or both, then the +call is a local operation and MPI_COMM_NULL is returned as the newintercomm`. + +# Notes + +The `errhandler` argument specifies an error handler to be attached to the new inter-communicator. +The `info` argument provides hints and assertions, possibly MPI implementation dependent, which +indicate desired characteristics and guide communicator creation. MPI_MAX_STRINGTAG_LEN shall have a value +of at least 63. + + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +`MPI_Comm_set_errhandler`; the predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. + +# See Also + +[`MPI_Comm_create_from_group`(3)](MPI_Comm_create_from_group.html) diff --git a/ompi/mpi/man/man3/MPI_Isendrecv.3in b/ompi/mpi/man/man3/MPI_Isendrecv.3in new file mode 100644 index 00000000000..27188af6922 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Isendrecv.3in @@ -0,0 +1,118 @@ +.\" -*- nroff -*- +.\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2006-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright (c) 2020 Google, LLC. All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_Isendrecv 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Isendrecv\fP \- Sends and receives a message. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Isendrecv(const void *\fIsendbuf\fP, int\fI sendcount\fP, MPI_Datatype\fI sendtype\fP, + int\fI dest\fP, int\fI sendtag\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, + MPI_Datatype\fI recvtype\fP, int\fI source\fP, int\fI recvtag\fP, + MPI_Comm\fI comm\fP, MPI_Request\fI *request\fP) + +.fi +.SH Fortran Syntax +.nf +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_ISENDRECV(\fISENDBUF, SENDCOUNT, SENDTYPE, DEST, SENDTAG, + RECVBUF, RECVCOUNT, RECVTYPE, SOURCE, RECVTAG, COMM, + REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNT, SENDTYPE, DEST, SENDTAG\fP + INTEGER \fIRECVCOUNT, RECVTYPE, SOURCE, RECVTAG, COMM\fP + INTEGER \fIREQUEST, IERROR\fP + +.fi +.SH Fortran 2008 Syntax +.nf +USE mpi_f08 +MPI_Isendrecv(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIdest\fP, \fIsendtag\fP, \fIrecvbuf\fP, + \fIrecvcount\fP, \fIrecvtype\fP, \fIsource\fP, \fIrecvtag\fP, \fIcomm\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN) :: \fIsendbuf\fP + TYPE(*), DIMENSION(..) :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIdest\fP, \fIsendtag\fP, \fIrecvcount\fP, \fIsource,\fP + \fIrecvtag\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Request) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +sendbuf +Initial address of send buffer (choice). +.TP 1i +sendcount +Number of elements to send (integer). +.TP 1i +sendtype +Type of elements in send buffer (handle). +.TP 1i +dest +Rank of destination (integer). +.TP 1i +sendtag +Send tag (integer). +.TP 1i +recvcount +Maximum number of elements to receive (integer). +.TP 1i +recvtype +Type of elements in receive buffer (handle). +.TP 1i +source +Rank of source (integer). +.TP 1i +recvtag +Receive tag (integer). +.TP 1i +comm +Communicator (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +recvbuf +Initial address of receive buffer (choice). +.TP 1i +request +Communication request (handle). +.ft R +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +The non-blocking send-receive operations combine in one call the sending of a message to one destination and the receiving of another message, from another process. The two (source and destination) are possibly the same. This operation is useful for executing a shift operation across a chain of processes. The send-receive operation can be used in conjunction with the functions described in the "Process Topologies" chapter of the MPI Standard in order to perform shifts on various logical topologies. +.sp +A message sent by a send-receive operation can be received by a regular receive operation or probed by a probe operation; a send-receive operation can receive a message sent by a regular send operation. +.sp +MPI_Isendrecv executes a non-blocking send and receive operation. Both send and receive use the same communicator, but possibly different tags. The send buffer and receive buffers must be disjoint, and may have different lengths and datatypes. +.sp +A non-blocking send-receive request can be determined to be completed by calling the MPI_Wait, MPI_Waitany, MPI_Test, or MPI_Testany with the request returned by this function. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. + +.SH SEE ALSO +.ft R +.sp +MPI_Isendrecv_replace , MPI_Sendrecv, MPI_Sendrecv_replace + + diff --git a/ompi/mpi/man/man3/MPI_Isendrecv_replace.3in b/ompi/mpi/man/man3/MPI_Isendrecv_replace.3in new file mode 100644 index 00000000000..727d8acbfd5 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Isendrecv_replace.3in @@ -0,0 +1,107 @@ +.\" -*- nroff -*- +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2006-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright (c) 2020 Google, LLC. All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_Isendrecv_replace 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Isendrecv_replace\fP \- Sends and receives a message using a single buffer. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Isendrecv_replace(void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\fP, + int\fI dest\fP, int\fI sendtag\fP, int\fI source\fP, int\fI recvtag\fP, MPI_Comm\fI comm\fP, + MPI_Request\fI *request\fP) + +.fi +.SH Fortran Syntax +.nf +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_ISENDRECV_REPLACE(\fIBUF, COUNT, DATATYPE, DEST, SENDTAG, SOURCE, + RECVTAG, COMM, REQUEST, IERROR\fP) + \fIBUF\fP(*) + INTEGER \fICOUNT, DATATYPE, DEST, SENDTAG\fP + INTEGER \fISOURCE, RECVTAG, COMM\fP + INTEGER \fIREQUEST, IERROR\fP + +.fi +.SH Fortran 2008 Syntax +.nf +USE mpi_f08 +MPI_Isendrecv_replace(\fIbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIdest\fP, \fIsendtag\fP, \fIsource\fP, \fIrecvtag\fP, + \fIcomm\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..) :: \fIbuf\fP + INTEGER, INTENT(IN) :: \fIcount\fP, \fIdest\fP, \fIsendtag\fP, \fIsource\fP, \fIrecvtag\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Request) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +.fi +.SH INPUT/OUTPUT PARAMETER +.ft R +.TP 1i +buf +Initial address of send and receive buffer (choice). + +.SH INPUT PARAMETERS +.ft R +.TP 1i +count +Number of elements in send and receive buffer (integer). +.TP 1i +datatype +Type of elements to send and receive (handle). +.TP 1i +dest +Rank of destination (integer). +.TP 1i +sendtag +Send message tag (integer). +.TP 1i +source +Rank of source (integer). +.TP 1i +recvtag +Receive message tag (integer). +.TP 1i +comm +Communicator (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +request +Communication request (handle). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +The non-blocking send-receive operations combine in one call the sending of a message to one destination and the receiving of another message, from another process. The two (source and destination) are possibly the same. A send-receive operation is useful for executing a shift operation across a chain of processes. The send-receive operation can be used in conjunction with the functions described in the "Process Topologies" chapter of the MPI Standard in order to perform shifts on various logical topologies. Also, a send-receive operation is useful for implementing remote procedure calls. +.sp +A message sent by a send-receive operation can be received by a regular receive operation or probed by a probe operation; a send-receive operation can receive a message sent by a regular send operation. +.sp +MPI_Isendrecv_replace executes a non-blocking send and receive. The same buffer is used both for the send and for the receive, so that the message sent is replaced by the message received. +.sp +A non-blocking send-receive request can be determined to be completed by calling the MPI_Wait, MPI_Waitany, MPI_Test, or MPI_Testany with the request returned by this function. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. + +.SH SEE ALSO +.ft R +.sp +MPI_Isendrecv, MPI_Sendrecv, MPI_Sendrecv_replace + + + diff --git a/ompi/mpi/man/man3/MPI_Neighbor_allgather.3in b/ompi/mpi/man/man3/MPI_Neighbor_allgather.3in index e152d747ce8..56eab70bd30 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_allgather.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_allgather.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Neighbor_allgather 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Neighbor_allgather, MPI_Ineighbor_allgather\fP \- Gathers and distributes data from and to all neighbors +\fBMPI_Neighbor_allgather, MPI_Ineighbor_allgather, MPI_Neighbor_allgather\fP \- Gathers and distributes data from and to all neighbors .SH SYNTAX .ft R @@ -21,6 +21,10 @@ int MPI_Ineighbor_allgather(const void\fI *sendbuf\fP, int \fI sendcount\fP, MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP, MPI_Request \fIreq\fP) +int MPI_Neighbor_allgather_init(const void\fI *sendbuf\fP, int \fI sendcount\fP, + MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, + MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP, MPI_Info\fIinfo\fP, MPI_Request \fIreq\fP) + .fi .SH Fortran Syntax .nf @@ -38,6 +42,12 @@ MPI_INEIGHBOR_ALLGATHER(\fISENDBUF\fP,\fI SENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVB INTEGER \fISENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVCOUNT\fP,\fI RECVTYPE\fP,\fI COMM\fP, INTEGER \fIREQUEST, IERROR\fP +MPI_NEIGHBOR_ALLGATHER_INIT(\fISENDBUF\fP,\fI SENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVBUF\fP,\fI RECVCOUNT\fP,\fI + RECVTYPE\fP,\fI COMM\fP,\fI INFO\fP,\f IREQUEST\fP,\fI IERROR\fP) + \fISENDBUF\fP (*), \fIRECVBUF\fP (*) + INTEGER \fISENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVCOUNT\fP,\fI RECVTYPE\fP,\fI COMM\fP, + INTEGER \fIINFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -61,6 +71,17 @@ MPI_Ineighbor_allgather(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvb TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Neighbor_allgather_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, + \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Neighbor_allgather_init.3in b/ompi/mpi/man/man3/MPI_Neighbor_allgather_init.3in new file mode 100644 index 00000000000..f0569265c29 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Neighbor_allgather_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Neighbor_allgather.3 diff --git a/ompi/mpi/man/man3/MPI_Neighbor_allgatherv.3in b/ompi/mpi/man/man3/MPI_Neighbor_allgatherv.3in index fdc3e02d157..29807404f40 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_allgatherv.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_allgatherv.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Neighbor_allgatherv 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Neighbor_allgatherv, MPI_Ineighbor_allgatherv\fP \- Gathers and distributes data from and to all neighbors. Each process may contribute a different amount of data. +\fBMPI_Neighbor_allgatherv, MPI_Ineighbor_allgatherv, MPI_Neighbor_allgatherv_init\fP \- Gathers and distributes data from and to all neighbors. Each process may contribute a different amount of data. .SH SYNTAX .ft R @@ -22,6 +22,11 @@ int MPI_Ineighbor_allgatherv(const void\fI *sendbuf\fP, int\fI sendcount\fP, const int\fI displs[]\fP, MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP, MPI_Request \fI*request\fP) +int MPI_Neighbor_allgatherv(const void\fI *sendbuf\fP, int\fI sendcount\fP, + MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, const int\fI recvcounts[]\fP, + const int\fI displs[]\fP, MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP, + MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -39,6 +44,12 @@ MPI_INEIGHBOR_ALLGATHERV(\fISENDBUF\fP,\fI SENDCOUNT\fP, \fISENDTYPE\fP,\fI RECV INTEGER \fISENDCOUNT\fP,\fI SENDTYPE\fP, \fIRECVCOUNT\fP(*), INTEGER \fIDISPLS\fP(*),\fI RECVTYPE\fP,\fI COMM\fP,\fIREQUEST\fP,\fI IERROR\fP +MPI_NEIGHBOR_ALLGATHERV_INIT(\fISENDBUF\fP,\fI SENDCOUNT\fP, \fISENDTYPE\fP,\fI RECVBUF\fP, + \fIRECVCOUNT\fP,\fI DISPLS\fP, \fIRECVTYPE\fP,\fI COMM\fP,\fI INFO\fP,\fI REQUEST\fP,\fI IERROR\fP) + \fISENDBUF\fP(*), \fIRECVBUF\fP(*) + INTEGER \fISENDCOUNT\fP,\fI SENDTYPE\fP, \fIRECVCOUNT\fP(*), + INTEGER \fIDISPLS\fP(*),\fI RECVTYPE\fP,\fI COMM\fP,\fIINFO\fP,\fIREQUEST\fP,\fI IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -63,6 +74,18 @@ MPI_Ineighbor_allgatherv(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecv TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Neighbor_allgatherv_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, + \fIdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP, \fIdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -87,6 +110,9 @@ Datatype of receive buffer elements (handle). .TP 1i comm Communicator (handle). +.TP1i +info +Info (handle, persistent only). .sp .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Neighbor_allgatherv_init.3in b/ompi/mpi/man/man3/MPI_Neighbor_allgatherv_init.3in new file mode 100644 index 00000000000..b8ce05e6462 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Neighbor_allgatherv_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Neighbor_allgatherv.3 diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoall.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoall.3in index 2f867a82ec3..69959ec0ea4 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_alltoall.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoall.3in @@ -7,7 +7,7 @@ .TH MPI_Neighbor_alltoall 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Neighbor_alltoall, MPI_Ineighbor_alltoall\fP \- All processes send data to neighboring processes in a virtual topology communicator +\fBMPI_Neighbor_alltoall, MPI_Ineighbor_alltoall, MPI_Neighbor_alltoall\fP \- All processes send data to neighboring processes in a virtual topology communicator .SH SYNTAX .ft R @@ -23,6 +23,10 @@ int MPI_Ineighbor_alltoall(const void *\fIsendbuf\fP, int \fIsendcount\fP, MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) +int MPI_Neighbor_alltoall_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, + MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, + MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -42,6 +46,13 @@ MPI_INEIGHBOR_ALLTOALL(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE\fP INTEGER \fICOMM, REQUEST, IERROR\fP +MPI_NEIGHBOR_ALLTOALL_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, + RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) + + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -67,6 +78,18 @@ MPI_Ineighbor_alltoall(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbu TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Neighbor_alltoall_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, + \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -88,6 +111,9 @@ Datatype of receive buffer elements (handle). .TP 1.2i comm Communicator over which data is to be exchanged (handle). +.TP 1.2i +info +Info (handle, persistent only). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoall_init.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoall_init.3in new file mode 100644 index 00000000000..56f630ecd06 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoall_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Neighbor_alltoall.3 diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in index aaf678813ab..3a488b547e0 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in @@ -7,7 +7,7 @@ .TH MPI_Neighbor_alltoallv 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Neighbor_alltoallv, MPI_Ineighbor_alltoallv\fP \- All processes send different amounts of data to, and receive different amounts of data from, all neighbors +\fBMPI_Neighbor_alltoallv, MPI_Ineighbor_alltoallv, MPI_Neighbor_alltoallv_init\fP \- All processes send different amounts of data to, and receive different amounts of data from, all neighbors .SH SYNTAX .ft R @@ -25,6 +25,12 @@ int MPI_Ineighbor_alltoallv(const void *\fIsendbuf\fP, const int \fIsendcounts\f const int \fIrdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) +int MPI_Neighbor_alltoallv_init(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], + const int \fIsdispls\fP[], MPI_Datatype \fIsendtype\fP, + void *\fIrecvbuf\fP, const int\fI recvcounts\fP[], + const int \fIrdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, + MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -46,6 +52,14 @@ MPI_INEIGHBOR_ALLTOALLV(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPE\fP INTEGER \fICOMM, REQUEST, IERROR\fP +MPI_NEIGHBOR_ALLTOALLV_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, + RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) + + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPE\fP + INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPE\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -73,6 +87,19 @@ MPI_Ineighbor_alltoallv(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendt TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Neighbor_alltoallv_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtype\fP, \fIrecvbuf\fP, + \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIsdispls(*),\fP + \fIrecvcounts(*)\fP, \fIrdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -106,6 +133,9 @@ Datatype of receive buffer elements. .TP 1.2i comm Communicator over which data is to be exchanged. +.TP 1.2i +info +Info (handle, persistent only). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoallv_init.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv_init.3in new file mode 100644 index 00000000000..13b4e89a23c --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Neighbor_alltoallv.3 diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in index e71731b9951..ed21676afe3 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in @@ -7,7 +7,7 @@ .TH MPI_Neighbor_alltoallw 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Neighbor_alltoallw, MPI_Ineighbor_alltoallw\fP \- All processes send data of different types to, and receive data of different types from, all processes +\fBMPI_Neighbor_alltoallw, MPI_Ineighbor_alltoallw, MPI_Neighbor_alltoallw_init\fP \- All processes send data of different types to, and receive data of different types from, all processes .SH SYNTAX .ft R @@ -25,6 +25,11 @@ int MPI_Ineighbor_alltoallw(const void *\fIsendbuf\fP, const int \fIsendcounts\f void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const MPI_Aint \fIrdispls\fP[], const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) +int MPI_Neighbor_alltoallw_init(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], + const MPI_Aint \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], + void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const MPI_Aint \fIrdispls\fP[], + const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -48,6 +53,15 @@ MPI_INEIGHBOR_ALLTOALLW(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, INTEGER(KIND=MPI_ADDRESS_KIND) \fISDISPLS(*), RDISPLS(*)\fP INTEGER \fICOMM, REQUEST, IERROR\fP +MPI_NEIGHBOR_ALLTOALLW_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, + RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, INFO, REQUEST, IERROR\fP) + + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), SENDTYPES(*)\fP + INTEGER \fIRECVCOUNTS(*), RECVTYPES(*)\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fISDISPLS(*), RDISPLS(*)\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -77,6 +91,21 @@ MPI_Ineighbor_alltoallw(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendt TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Neighbor_alltoallw_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtypes\fP, \fIrecvbuf\fP, + \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtypes\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIrecvcounts(*)\fP + INTEGER(KIND=MPI_ADDRESS_KIND), INTENT(IN), ASYNCHRONOUS :: + \fIsdispls(*)\fP, \fIrdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: \fIsendtypes(*),\fP + \fIrecvtypes(*)\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -111,6 +140,9 @@ receiving data from neighbor j. .TP 1.2i comm Communicator over which data is to be exchanged. +.TP 1.2i +info +Info (handle, persistent only). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoallw_init.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoallw_init.3in new file mode 100644 index 00000000000..299138b3a37 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoallw_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Neighbor_alltoallw.3 diff --git a/ompi/mpi/man/man3/MPI_Reduce.3in b/ompi/mpi/man/man3/MPI_Reduce.3in index 808c92d9a6c..8af09fe4a93 100644 --- a/ompi/mpi/man/man3/MPI_Reduce.3in +++ b/ompi/mpi/man/man3/MPI_Reduce.3in @@ -7,7 +7,7 @@ .\" $COPYRIGHT$ .TH MPI_Reduce 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Reduce, MPI_Ireduce\fP \- Reduces values on all processes within a group. +\fBMPI_Reduce, MPI_Ireduce, MPI_Reduce_init\fP \- Reduces values on all processes within a group. .SH SYNTAX .ft R @@ -22,6 +22,11 @@ int MPI_Ireduce(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\fP, MPI_Op\fI op\fP, int\fI root\fP, MPI_Comm\fI comm\fP, MPI_Request \fI*request\fP) + +int MPI_Reduce_init(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int\fI count\fP, + MPI_Datatype\fI datatype\fP, MPI_Op\fI op\fP, int\fI root\fP, + MPI_Comm\fI comm\fP, MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -37,6 +42,11 @@ MPI_IREDUCE(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, ROOT, COMM, \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fICOUNT, DATATYPE, OP, ROOT, COMM, REQUEST, IERROR\fP +MPI_REDUCE_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, ROOT, COMM, + INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fICOUNT, DATATYPE, OP, ROOT, COMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -61,6 +71,19 @@ MPI_Ireduce(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPI_Reduce_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, + \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIcount\fP, \fIroot\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -82,6 +105,9 @@ Rank of root process (integer). .TP 1i comm Communicator (handle). +.TP 1i +info +Info (handle, persistent). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Reduce_init.3in b/ompi/mpi/man/man3/MPI_Reduce_init.3in new file mode 100644 index 00000000000..f8c65fb7223 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Reduce_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Reduce.3 diff --git a/ompi/mpi/man/man3/MPI_Reduce_scatter.3in b/ompi/mpi/man/man3/MPI_Reduce_scatter.3in index 288c0b69e12..01fa0d9f8f7 100644 --- a/ompi/mpi/man/man3/MPI_Reduce_scatter.3in +++ b/ompi/mpi/man/man3/MPI_Reduce_scatter.3in @@ -7,7 +7,7 @@ .\" $COPYRIGHT$ .TH MPI_Reduce_scatter 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Reduce_scatter, MPI_Ireduce_scatter\fP \- Combines values and scatters the results. +\fBMPI_Reduce_scatter, MPI_Ireduce_scatter, MPI_Reduce_scatter_init\fP \- Combines values and scatters the results. .SH SYNTAX .ft R @@ -20,6 +20,10 @@ int MPI_Reduce_scatter(const void *\fIsendbuf\fP, void\fI *recvbuf\fP, const int int MPI_Ireduce_scatter(const void *\fIsendbuf\fP, void\fI *recvbuf\fP, const int\fI recvcounts\fP[], MPI_Datatype\fI datatype\fP, MPI_Op\fI op\fP, MPI_Comm\fI comm\fP, MPI_Request \fI*request\fP) +.fi +int MPI_Reduce_scatter_init(const void *\fIsendbuf\fP, void\fI *recvbuf\fP, const int\fI recvcounts\fP[], + MPI_Datatype\fI datatype\fP, MPI_Op\fI op\fP, MPI_Comm\fI comm\fP, MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -35,6 +39,11 @@ MPI_IREDUCE_SCATTER(\fISENDBUF, RECVBUF, RECVCOUNTS, DATATYPE, OP, \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fIRECVCOUNTS(*), DATATYPE, OP, COMM, REQUEST, IERROR \fP +MPI_REDUCE_SCATTER_INIT(\fISENDBUF, RECVBUF, RECVCOUNTS, DATATYPE, OP, + COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fIRECVCOUNTS(*), DATATYPE, OP, COMM, INFO, REQUEST, IERROR \fP + .fi .SH Fortran 2008 Syntax .nf @@ -60,6 +69,18 @@ MPI_Ireduce_scatter(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, \fIdatatype\ TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Reduce_scatter_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, + \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -79,6 +100,9 @@ Operation (handle). .TP 1i comm Communicator (handle). +.TP 1i +info +Info (handle, persistent). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Reduce_scatter_block.3in b/ompi/mpi/man/man3/MPI_Reduce_scatter_block.3in index e2d7a10facb..e6e3f3e11f7 100644 --- a/ompi/mpi/man/man3/MPI_Reduce_scatter_block.3in +++ b/ompi/mpi/man/man3/MPI_Reduce_scatter_block.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Reduce_scatter_block 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Reduce_scatter_block, MPI_Ireduce_scatter_block\fP \- Combines values and scatters the results in blocks. +\fBMPI_Reduce_scatter_block, MPI_Ireduce_scatter_block, MPI_Reduce_scatter_block_init\fP \- Combines values and scatters the results in blocks. .SH SYNTAX .ft R @@ -19,6 +19,10 @@ int MPI_Reduce_scatter_block(const void *\fIsendbuf\fP, void\fI *recvbuf\fP, int int MPI_Ireduce_scatter_block(const void *\fIsendbuf\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, MPI_Datatype\fI datatype\fP, MPI_Op\fI op\fP, MPI_Comm\fI comm\fP, MPI_Request \fI*request\fP) + +int MPI_Reduce_scatter_block_init(const void *\fIsendbuf\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, + MPI_Datatype\fI datatype\fP, MPI_Op\fI op\fP, MPI_Comm\fI comm\fP, MPI_Info\fI info\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -34,6 +38,12 @@ MPI_IREDUCE_SCATTER_BLOCK(\fISENDBUF, RECVBUF, RECVCOUNT, DATATYPE, OP, \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fIRECVCOUNT, DATATYPE, OP, COMM, REQUEST, IERROR \fP + +MPI_REDUCE_SCATTER_BLOCK_INOT(\fISENDBUF, RECVBUF, RECVCOUNT, DATATYPE, OP, + COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fIRECVCOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR \fP + .fi .SH Fortran 2008 Syntax .nf @@ -59,6 +69,18 @@ MPI_Ireduce_scatter_block(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIdata TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Reduce_scatter_block_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, + \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -77,6 +99,9 @@ Operation (handle). .TP 1i comm Communicator (handle). +.TP 1i +info +Info (handle, persistent only). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Reduce_scatter_block_init.3in b/ompi/mpi/man/man3/MPI_Reduce_scatter_block_init.3in new file mode 100644 index 00000000000..f649a6c443d --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Reduce_scatter_block_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Reduce_scatter_block.3 diff --git a/ompi/mpi/man/man3/MPI_Reduce_scatter_init.3in b/ompi/mpi/man/man3/MPI_Reduce_scatter_init.3in new file mode 100644 index 00000000000..4f03aec6068 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Reduce_scatter_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Reduce_scatter.3 diff --git a/ompi/mpi/man/man3/MPI_Scan.3in b/ompi/mpi/man/man3/MPI_Scan.3in index 5ccb8ae51a9..e212fe63fce 100644 --- a/ompi/mpi/man/man3/MPI_Scan.3in +++ b/ompi/mpi/man/man3/MPI_Scan.3in @@ -9,7 +9,7 @@ .TH MPI_Scan 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Scan, MPI_Iscan\fP \- Computes an inclusive scan (partial reduction) +\fBMPI_Scan, MPI_Iscan, MPI_Scan_init\fP \- Computes an inclusive scan (partial reduction) .SH SYNTAX .ft R @@ -24,6 +24,10 @@ int MPI_Iscan(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) +int MPI_Scan_init(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int \fIcount\fP, + MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, MPI_Comm \fIcomm\fP, + MPI_Info \fIinfo\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -37,6 +41,10 @@ MPI_ISCAN(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, REQUEST, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fICOUNT, DATATYPE, OP, COMM, REQUEST, IERROR\fP +MPI_SCAN_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fICOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -60,6 +68,17 @@ MPI_Iscan(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \ TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Scan_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -78,6 +97,9 @@ Operation (handle). .TP 1i comm Communicator (handle). +.TP 1i +info +Info (handle, persistent only) .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Scan_init.3in b/ompi/mpi/man/man3/MPI_Scan_init.3in new file mode 100644 index 00000000000..42cdcd65e92 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Scan_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Scan.3 diff --git a/ompi/mpi/man/man3/MPI_Scatter.3in b/ompi/mpi/man/man3/MPI_Scatter.3in index 1a97867c17c..427fc23e015 100644 --- a/ompi/mpi/man/man3/MPI_Scatter.3in +++ b/ompi/mpi/man/man3/MPI_Scatter.3in @@ -7,7 +7,7 @@ .\" $COPYRIGHT$ .TH MPI_Scatter 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Scatter, MPI_Iscatter\fP \- Sends data from one task to all tasks in a group. +\fBMPI_Scatter, MPI_Iscatter, MPI_Scatter_init\fP \- Sends data from one task to all tasks in a group. .SH SYNTAX .ft R @@ -22,6 +22,10 @@ int MPI_Iscatter(const void *\fIsendbuf\fP, int\fI sendcount\fP, MPI_Datatype\fI void\fI *recvbuf\fP, int\fI recvcount\fP, MPI_Datatype\fI recvtype\fP, int\fI root\fP, MPI_Comm\fI comm\fP, MPI_Request \fI*request\fP) +int MPI_Scatter_init(const void *\fIsendbuf\fP, int\fI sendcount\fP, MPI_Datatype\fI sendtype\fP, + void\fI *recvbuf\fP, int\fI recvcount\fP, MPI_Datatype\fI recvtype\fP, int\fI root\fP, + MPI_Comm\fI comm\fP, MPI_Info\fI info\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -39,6 +43,12 @@ MPI_ISCATTER(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT\fP INTEGER \fICOMM, REQUEST, IERROR\fP +MPI_SCATTER_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, + RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -62,6 +72,17 @@ MPI_Iscatter(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIr TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_Scatter_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIrecvtype\fP, + \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP, \fIroot\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -87,6 +108,9 @@ Rank of sending process (integer). .TP 1i comm Communicator (handle). +.TP 1i +info +Info (handle, persistent). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Scatter_init.3in b/ompi/mpi/man/man3/MPI_Scatter_init.3in new file mode 100644 index 00000000000..05572bc5ca1 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Scatter_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Scatter.3 diff --git a/ompi/mpi/man/man3/MPI_Scatterv.3in b/ompi/mpi/man/man3/MPI_Scatterv.3in index fc8f46d2ec1..81cd31d8527 100644 --- a/ompi/mpi/man/man3/MPI_Scatterv.3in +++ b/ompi/mpi/man/man3/MPI_Scatterv.3in @@ -8,7 +8,7 @@ .\" $COPYRIGHT$ .TH MPI_Scatterv 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Scatterv, MPI_Iscatterv\fP \- Scatters a buffer in parts to all tasks in a group. +\fBMPI_Scatterv, MPI_Iscatterv, MPI_Scatterv_init\fP \- Scatters a buffer in parts to all tasks in a group. .SH SYNTAX .ft R @@ -23,6 +23,10 @@ int MPI_Iscatterv(const void *\fIsendbuf\fP, const int\fI sendcounts[]\fP, const MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, MPI_Datatype\fI recvtype\fP, int\fI root\fP, MPI_Comm\fI comm\fP, MPI_Request \fI*request\fP) +int MPI_Scatterv_init(const void *\fIsendbuf\fP, const int\fI sendcounts[]\fP, const int\fI displs[]\fP, + MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, + MPI_Datatype\fI recvtype\fP, int\fI root\fP, MPI_Comm\fI comm\fP, MPI_Info\fI info\fP, MPI_Request \fI*request\fP) + .fi .SH Fortran Syntax .nf @@ -40,6 +44,12 @@ MPI_ISCATTERV(\fISENDBUF, SENDCOUNTS, DISPLS, SENDTYPE, RECVBUF, INTEGER \fISENDCOUNTS(*), DISPLS(*), SENDTYPE\fP INTEGER \fIRECVCOUNT, RECVTYPE, ROOT, COMM, REQUEST, IERROR\fP +MPI_SCATTERV_INIT(\fISENDBUF, SENDCOUNTS, DISPLS, SENDTYPE, RECVBUF, + RECVCOUNT, RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), DISPLS(*), SENDTYPE\fP + INTEGER \fIRECVCOUNT, RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP + .fi .SH Fortran 2008 Syntax .nf @@ -64,6 +74,18 @@ MPI_Iscatterv(\fIsendbuf\fP, \fIsendcounts\fP, \fIdispls\fP, \fIsendtype\fP, \fI TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP +MPI_scatterv_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIdispls\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, + \fIrecvtype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIdispls(*)\fP + INTEGER, INTENT(IN) :: \fIrecvcount\fP, \fIroot\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH INPUT PARAMETERS .ft R @@ -93,6 +115,9 @@ Rank of sending process (integer). .TP 1i comm Communicator (handle). +.TP 1i +info +Info (handle, persistent only). .SH OUTPUT PARAMETERS .ft R diff --git a/ompi/mpi/man/man3/MPI_Scatterv_init.3in b/ompi/mpi/man/man3/MPI_Scatterv_init.3in new file mode 100644 index 00000000000..86a7f302ccf --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Scatterv_init.3in @@ -0,0 +1 @@ +.so man3/MPI_Scatterv.3 diff --git a/ompi/mpi/man/man3/MPI_Session_create_errhandler.3.md b/ompi/mpi/man/man3/MPI_Session_create_errhandler.3.md new file mode 100644 index 00000000000..b2d74a3ad11 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Session_create_errhandler.3.md @@ -0,0 +1,76 @@ +# Name + +`MPI_Session_create_errhandler` - Creates an error handler that can be +attached to sessions + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Session_create_errhandler(MPI_Session_errhandler_function *function, + MPI_Errhandler *errhandler) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_SESSION_CREATE_ERRHANDLER(FUNCTION, ERRHANDLER, IERROR) + EXTERNAL FUNCTION + INTEGER ERRHANDLER, IERROR +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Session_create_errhandler(session_errhandler_fn, errhandler, ierror) + PROCEDURE(MPI_Session_errhandler_function) :: session_errhandler_fn + TYPE(MPI_Errhandler), INTENT(OUT) :: errhandler + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +``` + +# Input Parameter + +* `function` : User-defined error handling procedure (function). + +# Output Parameters + +* `errhandler` : MPI error handler (handle). +* `IERROR` : Fortran only: Error status (integer). + +# Description + +`MPI_Session_create_errhandler` creates an error handler that can be attached +to sessions. This `function` is identical to `MPI_Errhandler_create`, +the use of which is deprecated. +In C, the user routine should be a `function` of type +`MPI_Session_errhandler_function`, which is defined as +```c +typedef void MPI_Session_errhandler_function(MPI_Session *, int *, ...); +``` +The first argument is the session in use. The second is the error +code to be returned by the MPI routine that raised the error. This +typedef replaces `MPI_Handler_function`, the use of which is deprecated. +In Fortran, the user routine should be of this form: +```fortran +SUBROUTINE SESSION_ERRHANDLER_FUNCTION(SESSION, ERROR_CODE, ...) + INTEGER SESSION, ERROR_CODE +``` + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the `function` and Fortran routines in the last argument. +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O `function` errors. The error handler may be changed with +`MPI_Session_set_errhandler`; the predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Session_f2c.3.md b/ompi/mpi/man/man3/MPI_Session_f2c.3.md new file mode 100644 index 00000000000..2deaa5915ac --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Session_f2c.3.md @@ -0,0 +1,43 @@ +# NAME + +MPI_Session_c2f, MPI_Session_f2c - Translates a C session handle into a Fortran INTEGER-style session handle, or vice versa. + +# SYNTAX + +## C Syntax + +```c +#include + +int MPI_Session_f2c(const MPI_Fint *f_session, MPI_Session *c_session) +int MPI_Session_c2f(const MPI_Session *c_session, MPI_Fint *f_session) +``` + +# PARAMETERS + +* `f_session`: `mpi`-style `INTEGER` MPI session object +* `c_session`: C-style MPI session object + +# DESCRIPTION + +These two procedures are provided in C to convert from a Fortran +session (which is an array of integers) to a C session (which is a +structure), and vice versa. The conversion occurs on all the +information in `session`, including that which is hidden. That is, +no session information is lost in the conversion. + +When using `MPI_Session_f2c()`, if `f_session` is a valid Fortran +session, then `MPI_Session_f2c()` returns in `c_session` a +valid C session with the same content. If `f_session` is the Fortran +value of `MPI_SESSION_NULL`, or if +`f_session` is not a valid Fortran session, then the call is erroneous. + +When using `MPI_Session_c2f()`, the opposite conversion is applied. If +`c_session` is `MPI_SESSION_NULL`, or if +`c_session` is not a valid C session, then the call is erroneous. + +# NOTES + +These functions are only available in C; they are not available in any +of the Fortran MPI interfaces. + diff --git a/ompi/mpi/man/man3/MPI_Session_finalize.3.md b/ompi/mpi/man/man3/MPI_Session_finalize.3.md new file mode 100644 index 00000000000..c5d4d6d8219 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Session_finalize.3.md @@ -0,0 +1,78 @@ +# Name + +`MPI_Session_finalize` - releases all MPI state associated with a session + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Session_finalize(MPI_Session *session) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_SESSION_FINALIZE(SESSION, IERROR) + INTEGER SESSION, IERROR +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Session_finalize(session, ierror) + TYPE(MPI_Session), INTENT(IN) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +``` + +# Input Parameters + +* `session` : session to be finalized (handle) + +# Output Parameters + +* `IERROR` : Fortran only: Error status (integer). + +# Description + +`MPI_Session_finalize` releases all MPI state associated with the supplied `session`. Every instantiated +session must be finalized using `MPI_Session_finalize`. The handle `session` is set to +MPI_SESSION_NULL by the call. + +# Notes + +Before an MPI process invokes `MPI_Session_finalize`, the process must perform +all MPI calls needed to complete its involvement in MPI communications: it must locally +complete all MPI operations that it initiated and it must execute matching calls needed to +complete MPI communications initiated by other processes. The call to `MPI_Session_finalize` does not free objects created by MPI calls; these +objects are freed using `MPI_XXX_FREE` calls. `MPI_Session_finalize` may be synchronizing on any or all of the groups associated +with communicators, windows, or  les derived from the session and not disconnected, freed, +or closed, respectively, before the call to `MPI_Session_finalize` procedure. +`MPI_Session_finalize` behaves as if all such synchronizations occur concurrently. As +`MPI_Comm_free` may mark a communicator for freeing later, `MPI_Session_finalize` +may be synchronizing on the group associated with a communicator that is only freed (with +`MPI_Comm_free) rather than disconnected (with `MPI_Comm_disconnect`). + + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +`MPI_Session_set_errhandler`; the predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. + +# See Also + +[`MPI_Session_init`(3)](MPI_Session_init.html) +[`MPI_Comm_disconnect`(3)](MPI_Comm_disconnect.html) diff --git a/ompi/mpi/man/man3/MPI_Session_get_info.3.md b/ompi/mpi/man/man3/MPI_Session_get_info.3.md new file mode 100644 index 00000000000..e3fd2659fd3 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Session_get_info.3.md @@ -0,0 +1,71 @@ +# Name + +`MPI_Session_get_info` - Returns an info object containing the hints of an MPI Session + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Session_get_info(MPI_Session session, MPI_Info *info_used) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_SESSION_GET_INFO(SESSION, INFO_USED) + INTEGER SESSION, INFO_USED +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Session_get_info(session, info_used) + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(OUT) :: info_used +``` + +# Input Parameters + +* `session` : session (handle) + +# Output Parameters + +* `info_used`: info object (handle) +* `IERROR` : Fortran only: Error status (integer). + +# Description + +`MPI_Session_get_info` returns a new info object containing the hints of the MPI +Session associated with session. The current setting of all hints related to this MPI Session +is returned in `info_used`. An MPI implementation is required to return all hints that are +supported by the implementation and have default values specified; any user-supplied hints +that were not ignored by the implementation; and any additional hints that were set by +the implementation. If no such hints exist, a handle to a newly created info object is +returned that contains no key/value pair. + +# Notes + +The user is responsible for freeing info_used via ` MPI_Info_free`. + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +`MPI_Session_set_errhandler`; the predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. + +# See Also + +[`MPI_Session_init`(3)](MPI_Session_init.html) diff --git a/ompi/mpi/man/man3/MPI_Session_get_nth_pset.3.md b/ompi/mpi/man/man3/MPI_Session_get_nth_pset.3.md new file mode 100644 index 00000000000..f608930f7d6 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Session_get_nth_pset.3.md @@ -0,0 +1,86 @@ +# Name + +`MPI_Session_get_nth_pset` - Query runtime for name of the nth process set + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Session_get_nth_pset(MPI_Session session, MPI_Info info, int n, int *pset_len, char *pset_name) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_SESSION_GET_NTH_PSET(SESSION, INFO, N, PSET_LEN, PSET_NAME, IERROR) + INTEGER SESSION, INFO, N, PSET_LEN, IERROR + CHARACTER*(*) PSET_NAME +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Session_get_nth_pset(session, info, n, pset_len, pset_name, ierror) + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, INTENT(IN) :: n + INTEGER, INTENT(INOUT) :: pset_len + CHARACTER(LEN=*), INTENT(OUT) :: pset_name + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +``` + +# Input Parameters + +* `session` : session (handle) +* `info`: info object (handle) +* `n`: index of the desired process set name (integer) + +## Input/Output Parameter + +* `pset_len`: length of the pset_name argument (integer) + +# Output Parameters + +* `pset_name` : name of the nth process set (string) +* `IERROR` : Fortran only: Error status (integer). + +# Description + +`MPI_Session_get_nth_pset` returns the name of the nth process set in the supplied `pset_name` buffer. +`pset_len` is the size of the buffer needed to store the nth process set name. If the `pset_len` +passed into the function is less than the actual buffer size needed for the process set name, +then the string value returned in `pset_name` is truncated. If `pset_len` is set to 0, `pset_name` is +not changed. On return, the value of `pset_len` will be set to the required buffer size to hold +the process set name. In C, `pset_len` includes the required space for the null terminator. In +C, this function returns a null terminated string in all cases where the `pset_len` input value +is greater than 0. + +# Notes + +Process set names have an implementation-defined maximum length of +`MPI_MAX_PSET_NAME_LEN` characters. `MPI_MAX_PSET_NAME_LEN` shall have a value of +at least 63. + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +`MPI_Session_set_errhandler`; the predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. + +# See Also + +[`MPI_Session_init`(3)](MPI_Session_init.html) +[`MPI_Session_get_num_psets`(3)](MPI_Session_get_num_psets.html) diff --git a/ompi/mpi/man/man3/MPI_Session_get_num_psets.3.md b/ompi/mpi/man/man3/MPI_Session_get_num_psets.3.md new file mode 100644 index 00000000000..3c61cb4d2dc --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Session_get_num_psets.3.md @@ -0,0 +1,77 @@ +# Name + +`MPI_Session_get_num_psets` - Query runtime for number of available process sets + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Session_get_num_psets(MPI_Session session, MPI_Info info, int *npset_names) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_SESSION_GET_NUM_PSETS(SESSION, INFO, NPSET_NAMES, IERROR) + INTEGER SESSION, INFO, SESSION, IERROR +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Session_get_num_psets(session, info, npset_names, ierror) + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, INTENT(OUT) :: npset_names + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +``` + +# Input Parameters + +* `session` : session (handle) +* `info`: info object (handle) + +# Output Parameters + +* `npset_names` : number of available process sets (non-negtive integer) +* `IERROR` : Fortran only: Error status (integer). + +# Description + +`MPI_Session_get_num_psets` is used to query the runtime for the number of available process sets in +which the calling MPI process is a member. An MPI implementation is allowed to increase +the number of available process sets during the execution of an MPI application when new +process sets become available. However, MPI implementations are not allowed to change +the index of a particular process set name, or to change the name of the process set at a +particular index, or to delete a process set name once it has been added. + +# Notes + +When a process set becomes invalid, for example, when some processes become unreachable due to failures +in the communication system, subsequent usage of the process set name may raise an +error. For example, creating an `MPI_Group` from such a process set might succeed because it +is a local operation, but creating an `MPI_Comm` from that group and attempting collective +communication may raise an error. + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +`MPI_Session_set_errhandler`; the predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. + +# See Also + +[`MPI_Session_init`(3)](MPI_Session_init.html) diff --git a/ompi/mpi/man/man3/MPI_Session_get_pset_info.3.md b/ompi/mpi/man/man3/MPI_Session_get_pset_info.3.md new file mode 100644 index 00000000000..038c70486bd --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Session_get_pset_info.3.md @@ -0,0 +1,72 @@ +# Name + +`MPI_Session_get_pset_info` - Returns an info object containing properties of a specific process set + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Session_get_pset_info(MPI_Session session, const char *pset_name, MPI_Info *info) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_SESSION_GET_PSET_INFO(SESSION, PSET_NAME, INFO, IERROR) + INTEGER SESSION, INFO, IERROR + CHARACTER*(*) PSET_NAME +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Session_get_pset_info(session, pset_name, info, ierror) + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +``` + +# Input Parameters + +* `session` : session (handle) +* `pset_name` : name of process set (string) + +# Output Parameters + +* `info`: info object (handle) +* `IERROR` : Fortran only: Error status (integer). + +# Description + +`MPI_Session_get_pset_info` is used to query properties of a specific process set. The returned info +object can be queried with existing MPI info object query functions. One key/value pair +must be de ned, "mpi_size". The value of the "mpi_size" key specifies the number of MPI +processes in the process set. + +# Notes + +The user is responsible for freeing the returned info object via ` MPI_Info_free`. + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +`MPI_Session_set_errhandler`; the predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. + +# See Also + +[`MPI_Session_init`(3)](MPI_Session_init.html) diff --git a/ompi/mpi/man/man3/MPI_Session_init.3.md b/ompi/mpi/man/man3/MPI_Session_init.3.md new file mode 100644 index 00000000000..5d3577954e8 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Session_init.3.md @@ -0,0 +1,76 @@ +# Name + +`MPI_Session_init` - Creates a new session handle + +# Syntax + +## C Syntax + +```c +#include + +int MPI_Session_init(MPI_Info info, MPI_Errhandler errhandler, MPI_Session *session) +``` + +## Fortran Syntax + +```fortran +USE MPI +! or the older form: INCLUDE 'mpif.h' + +MPI_SESSION_INIT(INFO, ERRHANDLER, SESSION, IERROR) + INTEGER INFO, ERRHANDLER, SESSION, IERROR +``` + +## Fortran 2008 Syntax + +```fortran +USE mpi_f08 + +MPI_Session_init(info, errhandler, session, ierror) + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +``` + +# Input Parameters + +* `info` : info object (handle) +* `errhandler` : error handler to be attached to the returned session (handle) + +# Output Parameters + +* `session` : New session (handle). +* `IERROR` : Fortran only: Error status (integer). + +# Description + +`MPI_Session_init` is used to instantiate an MPI Session. The returned session handle +can be used to query the runtime system about characteristics of the job within which the process is running, as well as other system resources. +An application can make multiple calls to `MPI_Session_init` and the related `MPI_Session_finalize` routine. + +# Notes + +The info argument is used to request MPI functionality requirements and possible MPI +implementation specific capabilities. + +The `errhandler` argument specifies an error handler to invoke in the event that the +Session instantiation call encounters an error. + +# Errors + +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The predefined error handler `MPI_ERRORS_RETURN` +may be used to cause error values to be returned. Note that MPI does not +guarantee that an MPI program can continue past an error. + +# See Also + +[`MPI_Session_get_num_psets`(3)](MPI_Session_get_num_psets.html) +[`MPI_Session_get_nth_pset`(3)](MPI_Session_get_nth_pset.html) +[`MPI_Session_group_from_pset`(3)](MPI_Session_group_from_pset.html) +[`MPI_Session_finalize`(3)](MPI_Session_finalize.html) diff --git a/ompi/mpi/man/man3/Makefile.am b/ompi/mpi/man/man3/Makefile.am index 351247f51bb..324c4032866 100644 --- a/ompi/mpi/man/man3/Makefile.am +++ b/ompi/mpi/man/man3/Makefile.am @@ -4,6 +4,8 @@ # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights reserved. # Copyright (c) 2020 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2021 Triad National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -45,6 +47,7 @@ MD_FILES = \ MPI_Comm_connect.3.md \ MPI_Comm_create.3.md \ MPI_Comm_create_errhandler.3.md \ + MPI_Comm_create_from_group.3.md \ MPI_Comm_create_group.3.md \ MPI_Comm_create_keyval.3.md \ MPI_Comm_delete_attr.3.md \ @@ -84,8 +87,19 @@ MD_FILES = \ MPI_Group_difference.3.md \ MPI_Group_excl.3.md \ MPI_Group_free.3.md \ + MPI_Group_from_session_pset.3.md \ MPI_Group_incl.3.md \ - MPI_Group_intersection.3.md + MPI_Group_intersection.3.md \ + MPI_Intercomm_create_from_groups.3.md \ + MPI_Session_create_errhandler.3.md \ + MPI_Session_f2c.3.md \ + MPI_Session_finalize.3.md \ + MPI_Session_get_info.3.md \ + MPI_Session_get_num_psets.3.md \ + MPI_Session_get_nth_pset.3.md \ + MPI_Session_get_pset_info.3.md \ + MPI_Session_init.3.md + TEMPLATE_FILES = \ MPI_Abort.3in \ @@ -98,24 +112,33 @@ TEMPLATE_FILES = \ MPI_Aint_diff.3in \ MPI_Allgather.3in \ MPI_Iallgather.3in \ + MPI_Allgather_init.3in \ MPI_Allgatherv.3in \ MPI_Iallgatherv.3in \ + MPI_Allgatherv_init.3in \ MPI_Alloc_mem.3in \ MPI_Allreduce.3in \ MPI_Iallreduce.3in \ + MPI_Allreduce_init.3in \ MPI_Alltoall.3in \ MPI_Ialltoall.3in \ + MPI_Alltoall_init.3in \ MPI_Alltoallv.3in \ MPI_Ialltoallv.3in \ + MPI_Alltoallv_init.3in \ MPI_Alltoallw.3in \ MPI_Ialltoallw.3in \ + MPI_Alltoallw_init.3in \ MPI_Attr_delete.3in \ MPI_Attr_get.3in \ MPI_Attr_put.3in \ MPI_Ibarrier.3in \ + MPI_Barrier_init.3in \ MPI_Ibcast.3in \ + MPI_Bcast_init.3in \ MPI_Comm_c2f.3in \ MPI_Comm_idup.3in \ + MPI_Comm_idup_with_info.3in \ MPI_Comm_f2c.3in \ MPI_Comm_free.3in \ MPI_Comm_free_keyval.3in \ @@ -153,6 +176,7 @@ TEMPLATE_FILES = \ MPI_Error_string.3in \ MPI_Exscan.3in \ MPI_Iexscan.3in \ + MPI_Exscan_init.3in \ MPI_Fetch_and_op.3in \ MPI_File_c2f.3in \ MPI_File_call_errhandler.3in \ @@ -212,7 +236,9 @@ TEMPLATE_FILES = \ MPI_File_write_at_all_begin.3in \ MPI_File_write_at_all_end.3in \ MPI_Igather.3in \ + MPI_Gather_init.3in \ MPI_Igatherv.3in \ + MPI_Gatherv_init.3in \ MPI_Get_elements_x.3in \ MPI_Graph_neighbors.3in \ MPI_Group_c2f.3in \ @@ -236,6 +262,7 @@ TEMPLATE_FILES = \ MPI_Info_get.3in \ MPI_Info_get_nkeys.3in \ MPI_Info_get_nthkey.3in \ + MPI_Info_get_string.3in \ MPI_Info_get_valuelen.3in \ MPI_Info_set.3in \ MPI_Init.3in \ @@ -247,6 +274,8 @@ TEMPLATE_FILES = \ MPI_Irecv.3in \ MPI_Irsend.3in \ MPI_Isend.3in \ + MPI_Isendrecv.3in \ + MPI_Isendrecv_replace.3in \ MPI_Issend.3in \ MPI_Is_thread_main.3in \ MPI_Keyval_create.3in \ @@ -258,14 +287,19 @@ TEMPLATE_FILES = \ MPI_Mrecv.3in \ MPI_Neighbor_allgather.3in \ MPI_Ineighbor_allgather.3in \ + MPI_Neighbor_allgather_init.3in \ MPI_Neighbor_allgatherv.3in \ MPI_Ineighbor_allgatherv.3in \ + MPI_Neighbor_allgatherv_init.3in \ MPI_Neighbor_alltoall.3in \ MPI_Ineighbor_alltoall.3in \ + MPI_Neighbor_alltoall_init.3in \ MPI_Neighbor_alltoallv.3in \ MPI_Ineighbor_alltoallv.3in \ + MPI_Neighbor_alltoallv_init.3in \ MPI_Neighbor_alltoallw.3in \ MPI_Ineighbor_alltoallw.3in \ + MPI_Neighbor_alltoallw_init.3in \ MPI_Op_c2f.3in \ MPI_Op_commutative.3in \ MPI_Op_create.3in \ @@ -292,11 +326,14 @@ TEMPLATE_FILES = \ MPI_Recv_init.3in \ MPI_Reduce.3in \ MPI_Ireduce.3in \ + MPI_Reduce_init.3in \ MPI_Reduce_local.3in \ MPI_Reduce_scatter.3in \ MPI_Ireduce_scatter.3in \ + MPI_Reduce_scatter_init.3in \ MPI_Reduce_scatter_block.3in \ MPI_Ireduce_scatter_block.3in \ + MPI_Reduce_scatter_block_init.3in \ MPI_Register_datarep.3in \ MPI_Request_c2f.3in \ MPI_Request_f2c.3in \ @@ -309,10 +346,13 @@ TEMPLATE_FILES = \ MPI_Rsend_init.3in \ MPI_Scan.3in \ MPI_Iscan.3in \ + MPI_Scan_init.3in \ MPI_Scatter.3in \ MPI_Iscatter.3in \ + MPI_Scatter_init.3in \ MPI_Scatterv.3in \ MPI_Iscatterv.3in \ + MPI_Scatterv_init.3in \ MPI_Send.3in \ MPI_Send_init.3in \ MPI_Sendrecv.3in \ diff --git a/ompi/mpi/tool/Makefile.am b/ompi/mpi/tool/Makefile.am index c749cb3eb5a..814ae91eebd 100644 --- a/ompi/mpi/tool/Makefile.am +++ b/ompi/mpi/tool/Makefile.am @@ -6,6 +6,8 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -13,36 +15,74 @@ # $HEADER$ # -SUBDIRS = profile - -AM_CPPFLAGS = -DOMPI_PROFILING_DEFINES=0 - -noinst_LTLIBRARIES = libmpi_mpit_common.la +# The purpose of the profiling layer is to allow intercept libraries +# which override the MPI_ namespace symbols. We potentially compile +# every MPI function twice. We always build the profiling layer, +# because the symbols that are always implemented as functions are the +# PMPI_ namespace symbols. We sometimes also build the non-profiling +# layer, if weak symbols can't be used to alias the MPI_ namespace +# into the PMPI_ namespace. +noinst_LTLIBRARIES = libmpi_mpit.la libmpi_mpit_profile.la if BUILD_MPI_BINDINGS_LAYER -noinst_LTLIBRARIES += libmpi_mpit.la +noinst_LTLIBRARIES += libmpi_mpit_noprofile.la endif headers = mpit-internal.h -libmpi_mpit_common_la_SOURCES = mpit_common.c - -libmpi_mpit_la_SOURCES = init_thread.c finalize.c cvar_get_num.c \ - cvar_get_info.c cvar_read.c cvar_write.c \ - cvar_handle_alloc.c cvar_handle_free.c \ - category_get_num.c category_get_info.c \ - category_get_cvars.c category_get_pvars.c \ - category_get_categories.c category_changed.c \ - pvar_get_info.c pvar_get_num.c pvar_handle_alloc.c \ - pvar_handle_free.c pvar_read.c pvar_readreset.c \ - pvar_reset.c pvar_session_create.c pvar_session_free.c \ - pvar_start.c pvar_stop.c pvar_write.c \ - enum_get_info.c enum_get_item.c cvar_get_index.c \ - pvar_get_index.c category_get_index.c +# mpit_common.c is not public functions, which does not have profiling +# implications, so they are always built. +libmpi_mpit_la_SOURCES = \ + mpit_common.c +libmpi_mpit_la_LIBADD = libmpi_mpit_profile.la +if BUILD_MPI_BINDINGS_LAYER +libmpi_mpit_la_LIBADD += libmpi_mpit_noprofile.la +endif # Conditionally install the header files - if WANT_INSTALL_HEADERS ompidir = $(ompiincludedir)/$(subdir) ompi_HEADERS = $(headers) endif + +# +# List of all C files that have profile versions +# +interface_profile_sources = \ + category_changed.c \ + category_get_categories.c \ + category_get_cvars.c \ + category_get_info.c \ + category_get_index.c \ + category_get_num.c \ + category_get_pvars.c \ + cvar_get_info.c \ + cvar_get_index.c \ + cvar_get_num.c \ + cvar_handle_alloc.c \ + cvar_handle_free.c \ + cvar_read.c \ + cvar_write.c \ + enum_get_info.c \ + enum_get_item.c \ + finalize.c \ + init_thread.c \ + pvar_get_info.c \ + pvar_get_index.c \ + pvar_get_num.c \ + pvar_handle_alloc.c \ + pvar_handle_free.c \ + pvar_read.c \ + pvar_readreset.c \ + pvar_reset.c \ + pvar_session_create.c \ + pvar_session_free.c \ + pvar_start.c \ + pvar_stop.c \ + pvar_write.c + +libmpi_mpit_profile_la_SOURCES = $(interface_profile_sources) +libmpi_mpit_profile_la_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=1 + +libmpi_mpit_noprofile_la_SOURCES = $(interface_profile_sources) +libmpi_mpit_noprofile_la_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=0 diff --git a/ompi/mpi/tool/category_changed.c b/ompi/mpi/tool/category_changed.c index aed854ba669..3b3dce70036 100644 --- a/ompi/mpi/tool/category_changed.c +++ b/ompi/mpi/tool/category_changed.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,17 +13,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_category_changed = PMPI_T_category_changed #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_category_changed PMPI_T_category_changed #endif - int MPI_T_category_changed(int *stamp) { if (!mpit_is_initialized ()) { diff --git a/ompi/mpi/tool/category_get_categories.c b/ompi/mpi/tool/category_get_categories.c index 4f898b9c9c3..813dcc34be1 100644 --- a/ompi/mpi/tool/category_get_categories.c +++ b/ompi/mpi/tool/category_get_categories.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_category_get_categories = PMPI_T_category_get_categories #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_category_get_categories PMPI_T_category_get_categories #endif - int MPI_T_category_get_categories(int cat_index, int len, int indices[]) { const mca_base_var_group_t *group; diff --git a/ompi/mpi/tool/category_get_cvars.c b/ompi/mpi/tool/category_get_cvars.c index 430ba7db201..bc361f6ba9b 100644 --- a/ompi/mpi/tool/category_get_cvars.c +++ b/ompi/mpi/tool/category_get_cvars.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_category_get_cvars = PMPI_T_category_get_cvars #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_category_get_cvars PMPI_T_category_get_cvars #endif - int MPI_T_category_get_cvars(int cat_index, int len, int indices[]) { const mca_base_var_group_t *group; diff --git a/ompi/mpi/tool/category_get_index.c b/ompi/mpi/tool/category_get_index.c index df4e1946042..996c5538f35 100644 --- a/ompi/mpi/tool/category_get_index.c +++ b/ompi/mpi/tool/category_get_index.c @@ -7,7 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_category_get_index = PMPI_T_category_get_index #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_category_get_index PMPI_T_category_get_index #endif - int MPI_T_category_get_index (const char *name, int *category_index) { int ret; diff --git a/ompi/mpi/tool/category_get_info.c b/ompi/mpi/tool/category_get_info.c index 1ee833509be..4906c796125 100644 --- a/ompi/mpi/tool/category_get_info.c +++ b/ompi/mpi/tool/category_get_info.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_category_get_info = PMPI_T_category_get_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_category_get_info PMPI_T_category_get_info #endif - int MPI_T_category_get_info(int cat_index, char *name, int *name_len, char *desc, int *desc_len, int *num_cvars, int *num_pvars, int *num_categories) diff --git a/ompi/mpi/tool/category_get_num.c b/ompi/mpi/tool/category_get_num.c index f7b19beea20..4da64da8156 100644 --- a/ompi/mpi/tool/category_get_num.c +++ b/ompi/mpi/tool/category_get_num.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_category_get_num = PMPI_T_category_get_num #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_category_get_num PMPI_T_category_get_num #endif - int MPI_T_category_get_num (int *num_cat) { if (!mpit_is_initialized ()) { diff --git a/ompi/mpi/tool/category_get_pvars.c b/ompi/mpi/tool/category_get_pvars.c index 90eedb6c86b..c4371762fb5 100644 --- a/ompi/mpi/tool/category_get_pvars.c +++ b/ompi/mpi/tool/category_get_pvars.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_category_get_pvars = PMPI_T_category_get_pvars #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_category_get_pvars PMPI_T_category_get_pvars #endif - int MPI_T_category_get_pvars(int cat_index, int len, int indices[]) { const mca_base_var_group_t *group; diff --git a/ompi/mpi/tool/cvar_get_index.c b/ompi/mpi/tool/cvar_get_index.c index 2de238efa2e..ee0672f0c9b 100644 --- a/ompi/mpi/tool/cvar_get_index.c +++ b/ompi/mpi/tool/cvar_get_index.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_cvar_get_index = PMPI_T_cvar_get_index #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_cvar_get_index PMPI_T_cvar_get_index #endif - int MPI_T_cvar_get_index (const char *name, int *cvar_index) { int ret; diff --git a/ompi/mpi/tool/cvar_get_info.c b/ompi/mpi/tool/cvar_get_info.c index 8b5e7fa0d14..c3f94787587 100644 --- a/ompi/mpi/tool/cvar_get_info.c +++ b/ompi/mpi/tool/cvar_get_info.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_cvar_get_info = PMPI_T_cvar_get_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_cvar_get_info PMPI_T_cvar_get_info #endif - int MPI_T_cvar_get_info(int cvar_index, char *name, int *name_len, int *verbosity, MPI_Datatype *datatype, MPI_T_enum *enumtype, char *desc, int *desc_len, int *bind, int *scope) diff --git a/ompi/mpi/tool/cvar_get_num.c b/ompi/mpi/tool/cvar_get_num.c index 0b852fc32be..32ba1201cdc 100644 --- a/ompi/mpi/tool/cvar_get_num.c +++ b/ompi/mpi/tool/cvar_get_num.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_cvar_get_num = PMPI_T_cvar_get_num #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_cvar_get_num PMPI_T_cvar_get_num #endif - int MPI_T_cvar_get_num (int *num_cvar) { if (!mpit_is_initialized ()) { return MPI_T_ERR_NOT_INITIALIZED; diff --git a/ompi/mpi/tool/cvar_handle_alloc.c b/ompi/mpi/tool/cvar_handle_alloc.c index 0a0ecec297a..77514825a05 100644 --- a/ompi/mpi/tool/cvar_handle_alloc.c +++ b/ompi/mpi/tool/cvar_handle_alloc.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_cvar_handle_alloc = PMPI_T_cvar_handle_alloc #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_cvar_handle_alloc PMPI_T_cvar_handle_alloc #endif - int MPI_T_cvar_handle_alloc (int cvar_index, void *obj_handle, MPI_T_cvar_handle *handle, int *count) { diff --git a/ompi/mpi/tool/cvar_handle_free.c b/ompi/mpi/tool/cvar_handle_free.c index 4d64762c8b5..7ffa6c4c30a 100644 --- a/ompi/mpi/tool/cvar_handle_free.c +++ b/ompi/mpi/tool/cvar_handle_free.c @@ -3,6 +3,8 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -10,17 +12,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_cvar_handle_free = PMPI_T_cvar_handle_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_cvar_handle_free PMPI_T_cvar_handle_free #endif - int MPI_T_cvar_handle_free (MPI_T_cvar_handle *handle) { if (!mpit_is_initialized ()) { diff --git a/ompi/mpi/tool/cvar_read.c b/ompi/mpi/tool/cvar_read.c index ef53e736b93..b46db2c99c4 100644 --- a/ompi/mpi/tool/cvar_read.c +++ b/ompi/mpi/tool/cvar_read.c @@ -8,6 +8,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,17 +17,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_cvar_read = PMPI_T_cvar_read #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_cvar_read PMPI_T_cvar_read #endif - int MPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf) { const mca_base_var_storage_t *value = NULL; diff --git a/ompi/mpi/tool/cvar_write.c b/ompi/mpi/tool/cvar_write.c index 23731ffb601..5dadb1e48e5 100644 --- a/ompi/mpi/tool/cvar_write.c +++ b/ompi/mpi/tool/cvar_write.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_cvar_write = PMPI_T_cvar_write #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_cvar_write PMPI_T_cvar_write #endif - int MPI_T_cvar_write (MPI_T_cvar_handle handle, const void *buf) { int rc = MPI_SUCCESS; diff --git a/ompi/mpi/tool/enum_get_info.c b/ompi/mpi/tool/enum_get_info.c index 441197e389f..59de6eebe80 100644 --- a/ompi/mpi/tool/enum_get_info.c +++ b/ompi/mpi/tool/enum_get_info.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_enum_get_info = PMPI_T_enum_get_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_enum_get_info PMPI_T_enum_get_info #endif - int MPI_T_enum_get_info(MPI_T_enum enumtype, int *num, char *name, int *name_len) { int rc = MPI_SUCCESS; diff --git a/ompi/mpi/tool/enum_get_item.c b/ompi/mpi/tool/enum_get_item.c index 0ddcd34f9b4..0a6e23c806e 100644 --- a/ompi/mpi/tool/enum_get_item.c +++ b/ompi/mpi/tool/enum_get_item.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_enum_get_item = PMPI_T_enum_get_item #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_enum_get_item PMPI_T_enum_get_item #endif - int MPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name, int *name_len) { diff --git a/ompi/mpi/tool/finalize.c b/ompi/mpi/tool/finalize.c index dce8a40d0bf..359468b494d 100644 --- a/ompi/mpi/tool/finalize.c +++ b/ompi/mpi/tool/finalize.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2014-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,6 +13,7 @@ * $HEADER$ */ +#include "ompi_config.h" #include "ompi/mpi/tool/mpit-internal.h" @@ -18,15 +21,13 @@ #include "opal/include/opal/sys/atomic.h" #include "opal/runtime/opal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_finalize = PMPI_T_finalize #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_finalize PMPI_T_finalize #endif - int MPI_T_finalize (void) { ompi_mpit_lock (); diff --git a/ompi/mpi/tool/init_thread.c b/ompi/mpi/tool/init_thread.c index 958985c3584..96e4b2d9024 100644 --- a/ompi/mpi/tool/init_thread.c +++ b/ompi/mpi/tool/init_thread.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,18 +16,19 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" #include "ompi/runtime/ompi_info_support.h" #include "opal/include/opal/sys/atomic.h" #include "opal/runtime/opal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_init_thread = PMPI_T_init_thread #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_init_thread PMPI_T_init_thread #endif extern opal_mutex_t ompi_mpit_big_lock; diff --git a/ompi/mpi/tool/profile/Makefile.am b/ompi/mpi/tool/profile/Makefile.am deleted file mode 100644 index d530d1c450a..00000000000 --- a/ompi/mpi/tool/profile/Makefile.am +++ /dev/null @@ -1,106 +0,0 @@ -# -*- makefile -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011 Sandia National Laboratories. All rights reserved. -# Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -# -# OMPI_PROFILING_DEFINES flag s enabled when we want our MPI_* symbols -# to be replaced by PMPI_*. In other words, this flag decides -# whether "profile/defines.h" is included or not. "profile/defines.h" -# replaces all MPI_* symbols with PMPI_* symbols. In this directory -# we definately need it to be 1. -# -AM_CPPFLAGS = -DOMPI_PROFILING_DEFINES=1 - -# -# This build needs to go through only if profiling is required. -# Further, this build HAS to go through if profiling is required. -# - -noinst_LTLIBRARIES = libmpi_pmpit.la - -headers = defines.h - -nodist_libmpi_pmpit_la_SOURCES = \ - pcategory_changed.c \ - pcategory_get_categories.c \ - pcategory_get_cvars.c \ - pcategory_get_info.c \ - pcategory_get_index.c \ - pcategory_get_num.c \ - pcategory_get_pvars.c \ - pcvar_get_info.c \ - pcvar_get_index.c \ - pcvar_get_num.c \ - pcvar_handle_alloc.c \ - pcvar_handle_free.c \ - pcvar_read.c \ - pcvar_write.c \ - penum_get_info.c \ - penum_get_item.c \ - pfinalize.c \ - pinit_thread.c \ - ppvar_get_info.c \ - ppvar_get_index.c \ - ppvar_get_num.c \ - ppvar_handle_alloc.c \ - ppvar_handle_free.c \ - ppvar_read.c \ - ppvar_readreset.c \ - ppvar_reset.c \ - ppvar_session_create.c \ - ppvar_session_free.c \ - ppvar_start.c \ - ppvar_stop.c \ - ppvar_write.c - -# -# Sym link in the sources from the real MPI directory -# -$(nodist_libmpi_pmpit_la_SOURCES): - $(OMPI_V_LN_S) if test ! -r $@ ; then \ - pname=`echo $@ | cut -b '2-'` ; \ - $(LN_S) $(top_srcdir)/ompi/mpi/tool/$$pname $@ ; \ - fi - -# Conditionally install the header files - -if WANT_INSTALL_HEADERS -ompidir = $(ompiincludedir)/$(subdir) -ompi_HEADERS = $(headers) -endif - -# These files were created by targets above - -MAINTAINERCLEANFILES = $(nodist_libmpi_pmpit_la_SOURCES) - -# Don't want these targets in here - -tags-recursive: -tags: -TAGS: -GTAGS: -ID: diff --git a/ompi/mpi/tool/profile/defines.h b/ompi/mpi/tool/profile/defines.h deleted file mode 100644 index 44ef3a56eca..00000000000 --- a/ompi/mpi/tool/profile/defines.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPIT_PROFILE_DEFINES_H -#define OMPIT_PROFILE_DEFINES_H -/* - * This file is included in the top directory only if - * profiling is required. Once profiling is required, - * this file will replace all MPI_* symbols with - * PMPI_* symbols - */ -#define MPI_T_category_changed PMPI_T_category_changed -#define MPI_T_category_get_categories PMPI_T_category_get_categories -#define MPI_T_category_get_cvars PMPI_T_category_get_cvars -#define MPI_T_category_get_info PMPI_T_category_get_info -#define MPI_T_category_get_index PMPI_T_category_get_index -#define MPI_T_category_get_num PMPI_T_category_get_num -#define MPI_T_category_get_pvars PMPI_T_category_get_pvars -#define MPI_T_cvar_get_info PMPI_T_cvar_get_info -#define MPI_T_cvar_get_index PMPI_T_cvar_get_index -#define MPI_T_cvar_get_num PMPI_T_cvar_get_num -#define MPI_T_cvar_handle_alloc PMPI_T_cvar_handle_alloc -#define MPI_T_cvar_handle_free PMPI_T_cvar_handle_free -#define MPI_T_cvar_read PMPI_T_cvar_read -#define MPI_T_cvar_write PMPI_T_cvar_write -#define MPI_T_enum_get_info PMPI_T_enum_get_info -#define MPI_T_enum_get_item PMPI_T_enum_get_item -#define MPI_T_finalize PMPI_T_finalize -#define MPI_T_init_thread PMPI_T_init_thread -#define MPI_T_pvar_get_info PMPI_T_pvar_get_info -#define MPI_T_pvar_get_index PMPI_T_pvar_get_index -#define MPI_T_pvar_get_num PMPI_T_pvar_get_num -#define MPI_T_pvar_handle_alloc PMPI_T_pvar_handle_alloc -#define MPI_T_pvar_handle_free PMPI_T_pvar_handle_free -#define MPI_T_pvar_read PMPI_T_pvar_read -#define MPI_T_pvar_readreset PMPI_T_pvar_readreset -#define MPI_T_pvar_reset PMPI_T_pvar_reset -#define MPI_T_pvar_session_create PMPI_T_pvar_session_create -#define MPI_T_pvar_session_free PMPI_T_pvar_session_free -#define MPI_T_pvar_start PMPI_T_pvar_start -#define MPI_T_pvar_stop PMPI_T_pvar_stop -#define MPI_T_pvar_write PMPI_T_pvar_write -#endif /* OMPIT_C_PROFILE_DEFINES_H */ diff --git a/ompi/mpi/tool/pvar_get_index.c b/ompi/mpi/tool/pvar_get_index.c index 6280439b973..4f7832c4b7c 100644 --- a/ompi/mpi/tool/pvar_get_index.c +++ b/ompi/mpi/tool/pvar_get_index.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_get_index = PMPI_T_pvar_get_index #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_get_index PMPI_T_pvar_get_index #endif - int MPI_T_pvar_get_index (const char *name, int var_class, int *pvar_index) { int ret; diff --git a/ompi/mpi/tool/pvar_get_info.c b/ompi/mpi/tool/pvar_get_info.c index a83984c85e3..01ad175e2d6 100644 --- a/ompi/mpi/tool/pvar_get_info.c +++ b/ompi/mpi/tool/pvar_get_info.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_get_info = PMPI_T_pvar_get_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_get_info PMPI_T_pvar_get_info #endif - int MPI_T_pvar_get_info(int pvar_index, char *name, int *name_len, int *verbosity, int *var_class, MPI_Datatype *datatype, MPI_T_enum *enumtype, char *desc, int *desc_len, int *bind, diff --git a/ompi/mpi/tool/pvar_get_num.c b/ompi/mpi/tool/pvar_get_num.c index 190c8e2bc13..1e3fa0b1fb3 100644 --- a/ompi/mpi/tool/pvar_get_num.c +++ b/ompi/mpi/tool/pvar_get_num.c @@ -6,6 +6,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,17 +15,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_get_num = PMPI_T_pvar_get_num #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_get_num PMPI_T_pvar_get_num #endif - int MPI_T_pvar_get_num(int *num_pvar) { if (!mpit_is_initialized ()) { diff --git a/ompi/mpi/tool/pvar_handle_alloc.c b/ompi/mpi/tool/pvar_handle_alloc.c index 83eb650475f..0400025a11c 100644 --- a/ompi/mpi/tool/pvar_handle_alloc.c +++ b/ompi/mpi/tool/pvar_handle_alloc.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_handle_alloc = PMPI_T_pvar_handle_alloc #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_handle_alloc PMPI_T_pvar_handle_alloc #endif - int MPI_T_pvar_handle_alloc(MPI_T_pvar_session session, int pvar_index, void *obj_handle, MPI_T_pvar_handle *handle, int *count) { diff --git a/ompi/mpi/tool/pvar_handle_free.c b/ompi/mpi/tool/pvar_handle_free.c index 154d1c270f3..81ffa0707a9 100644 --- a/ompi/mpi/tool/pvar_handle_free.c +++ b/ompi/mpi/tool/pvar_handle_free.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_handle_free = PMPI_T_pvar_handle_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_handle_free PMPI_T_pvar_handle_free #endif - int MPI_T_pvar_handle_free(MPI_T_pvar_session session, MPI_T_pvar_handle *handle) { int ret = MPI_SUCCESS; diff --git a/ompi/mpi/tool/pvar_read.c b/ompi/mpi/tool/pvar_read.c index 8314c9d4291..0c4e53653e0 100644 --- a/ompi/mpi/tool/pvar_read.c +++ b/ompi/mpi/tool/pvar_read.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,17 +13,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_read = PMPI_T_pvar_read #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_read PMPI_T_pvar_read #endif - int MPI_T_pvar_read(MPI_T_pvar_session session, MPI_T_pvar_handle handle, void* buf) { diff --git a/ompi/mpi/tool/pvar_readreset.c b/ompi/mpi/tool/pvar_readreset.c index 59581bceba3..1567c4b1aef 100644 --- a/ompi/mpi/tool/pvar_readreset.c +++ b/ompi/mpi/tool/pvar_readreset.c @@ -3,6 +3,8 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -10,17 +12,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_readreset = PMPI_T_pvar_readreset #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_readreset PMPI_T_pvar_readreset #endif - int MPI_T_pvar_readreset(MPI_T_pvar_session session, MPI_T_pvar_handle handle, void *buf) { diff --git a/ompi/mpi/tool/pvar_reset.c b/ompi/mpi/tool/pvar_reset.c index 80e0bdeded5..a6f59d5405d 100644 --- a/ompi/mpi/tool/pvar_reset.c +++ b/ompi/mpi/tool/pvar_reset.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,17 +13,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_reset = PMPI_T_pvar_reset #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_reset PMPI_T_pvar_reset #endif - int MPI_T_pvar_reset(MPI_T_pvar_session session, MPI_T_pvar_handle handle) { int ret = MPI_SUCCESS; diff --git a/ompi/mpi/tool/pvar_session_create.c b/ompi/mpi/tool/pvar_session_create.c index 9e0e91cf0a4..6c153bebb8f 100644 --- a/ompi/mpi/tool/pvar_session_create.c +++ b/ompi/mpi/tool/pvar_session_create.c @@ -7,6 +7,8 @@ * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,17 +16,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_session_create = PMPI_T_pvar_session_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_session_create PMPI_T_pvar_session_create #endif - int MPI_T_pvar_session_create(MPI_T_pvar_session *session) { int ret = MPI_SUCCESS; diff --git a/ompi/mpi/tool/pvar_session_free.c b/ompi/mpi/tool/pvar_session_free.c index 98ec4a98a94..459489d6a8b 100644 --- a/ompi/mpi/tool/pvar_session_free.c +++ b/ompi/mpi/tool/pvar_session_free.c @@ -3,6 +3,8 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -10,17 +12,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_session_free = PMPI_T_pvar_session_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_session_free PMPI_T_pvar_session_free #endif - int MPI_T_pvar_session_free(MPI_T_pvar_session *session) { int ret = MPI_SUCCESS; diff --git a/ompi/mpi/tool/pvar_start.c b/ompi/mpi/tool/pvar_start.c index d2fce3fa2a6..ecf99372c08 100644 --- a/ompi/mpi/tool/pvar_start.c +++ b/ompi/mpi/tool/pvar_start.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,17 +13,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_start = PMPI_T_pvar_start #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_start PMPI_T_pvar_start #endif - static int pvar_handle_start (mca_base_pvar_handle_t *handle) { if (OPAL_SUCCESS != mca_base_pvar_handle_start (handle)) { diff --git a/ompi/mpi/tool/pvar_stop.c b/ompi/mpi/tool/pvar_stop.c index 8923bbbf7b6..349e6f2d0ec 100644 --- a/ompi/mpi/tool/pvar_stop.c +++ b/ompi/mpi/tool/pvar_stop.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,17 +13,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_stop = PMPI_T_pvar_stop #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_stop PMPI_T_pvar_stop #endif - static int pvar_handle_stop (mca_base_pvar_handle_t *handle) { if (OPAL_SUCCESS != mca_base_pvar_handle_stop (handle)) { diff --git a/ompi/mpi/tool/pvar_write.c b/ompi/mpi/tool/pvar_write.c index 5bd17213600..4bd3f8d75e1 100644 --- a/ompi/mpi/tool/pvar_write.c +++ b/ompi/mpi/tool/pvar_write.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,17 +13,17 @@ * $HEADER$ */ +#include "ompi_config.h" + #include "ompi/mpi/tool/mpit-internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_T_pvar_write = PMPI_T_pvar_write #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/tool/profile/defines.h" +#define MPI_T_pvar_write PMPI_T_pvar_write #endif - int MPI_T_pvar_write(MPI_T_pvar_session session, MPI_T_pvar_handle handle, const void* buf) { diff --git a/ompi/mpiext/affinity/c/mpiext_affinity_c.h b/ompi/mpiext/affinity/c/mpiext_affinity_c.h index bf94f283c43..da6210e11b7 100644 --- a/ompi/mpiext/affinity/c/mpiext_affinity_c.h +++ b/ompi/mpiext/affinity/c/mpiext_affinity_c.h @@ -3,6 +3,9 @@ * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021 The University of Tennessee and the University + * of Tennessee research Foundation. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mpiext/affinity/c/mpiext_affinity_str.c b/ompi/mpiext/affinity/c/mpiext_affinity_str.c index e0949b0a340..03ec8c86923 100644 --- a/ompi/mpiext/affinity/c/mpiext_affinity_str.c +++ b/ompi/mpiext/affinity/c/mpiext_affinity_str.c @@ -94,6 +94,199 @@ int OMPI_Affinity_str(ompi_affinity_fmt_t fmt_type, /*---------------------------------------------------------------------------*/ +/* + * Make a map of socket/core/hwthread tuples + */ +static int build_map(int *num_sockets_arg, int *num_cores_arg, hwloc_cpuset_t cpuset, int ***map, + hwloc_topology_t topo) +{ + int num_sockets, num_cores; + int socket_index, core_index, pu_index; + hwloc_obj_t socket, core, pu; + int **data; + + /* Find out how many sockets we have */ + num_sockets = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET); + /* some systems (like the iMac) only have one + * socket and so don't report a socket + */ + if (0 == num_sockets) { + num_sockets = 1; + } + /* Lazy: take the total number of cores that we have in the + topology; that'll be more than the max number of cores + under any given socket */ + num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); + *num_sockets_arg = num_sockets; + *num_cores_arg = num_cores; + + /* Alloc a 2D array: sockets x cores. */ + data = malloc(num_sockets * sizeof(int *)); + if (NULL == data) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + data[0] = calloc(num_sockets * num_cores, sizeof(int)); + if (NULL == data[0]) { + free(data); + return MPI_ERR_NO_MEM; + } + for (socket_index = 1; socket_index < num_sockets; ++socket_index) { + data[socket_index] = data[socket_index - 1] + num_cores; + } + + /* Iterate the PUs in this cpuset; fill in the data[][] array with + the socket/core/pu triples */ + for (pu_index = 0, + pu = hwloc_get_obj_inside_cpuset_by_type(topo, cpuset, HWLOC_OBJ_PU, pu_index); + NULL != pu; + pu = hwloc_get_obj_inside_cpuset_by_type(topo, cpuset, HWLOC_OBJ_PU, ++pu_index)) { + /* Go upward and find the core this PU belongs to */ + core = pu; + while (NULL != core && core->type != HWLOC_OBJ_CORE) { + core = core->parent; + } + core_index = 0; + if (NULL != core) { + core_index = core->logical_index; + } + + /* Go upward and find the socket this PU belongs to */ + socket = pu; + while (NULL != socket && socket->type != HWLOC_OBJ_SOCKET) { + socket = socket->parent; + } + socket_index = 0; + if (NULL != socket) { + socket_index = socket->logical_index; + } + + /* Save this socket/core/pu combo. LAZY: Assuming that we + won't have more PU's per core than (sizeof(int)*8). */ + data[socket_index][core_index] |= (1 << pu->sibling_rank); + } + + *map = data; + return OMPI_SUCCESS; +} + +/* + * Turn an int bitmap to a "a-b,c" range kind of string + */ +static char *bitmap2rangestr(int bitmap) +{ + size_t i; + int range_start, range_end; + bool first, isset; + char tmp[OMPI_AFFINITY_STRING_MAX - 1] = {0}; + const int stmp = sizeof(tmp) - 1; + static char ret[OMPI_AFFINITY_STRING_MAX] = {0}; + + first = true; + range_start = -999; + for (i = 0; i < sizeof(int) * 8; ++i) { + isset = (bitmap & (1 << i)); + + /* Do we have a running range? */ + if (range_start >= 0) { + if (isset) { + continue; + } else { + /* A range just ended; output it */ + if (!first) { + strncat(ret, ",", sizeof(ret) - strlen(ret) - 1); + } else { + first = false; + } + + range_end = i - 1; + if (range_start == range_end) { + snprintf(tmp, stmp, "%d", range_start); + } else { + snprintf(tmp, stmp, "%d-%d", range_start, range_end); + } + size_t ret_len = strlen(ret); + snprintf(ret + ret_len, sizeof(ret) - ret_len, "%s", tmp); + + range_start = -999; + } + } + + /* No running range */ + else { + if (isset) { + range_start = i; + } + } + } + + /* If we ended the bitmap with a range open, output it */ + if (range_start >= 0) { + if (!first) { + strncat(ret, ",", sizeof(ret) - strlen(ret) - 1); + first = false; + } + + range_end = i - 1; + if (range_start == range_end) { + snprintf(tmp, stmp, "%d", range_start); + } else { + snprintf(tmp, stmp, "%d-%d", range_start, range_end); + } + size_t ret_len = strlen(ret); + snprintf(ret + ret_len, sizeof(ret) - ret_len, "%s", tmp); + } + + return ret; +} + +/* + * Make a prettyprint string for a hwloc_cpuset_t + */ +static int cset2str(char *str, int len, hwloc_topology_t topo, hwloc_cpuset_t cpuset) +{ + bool first; + int num_sockets, num_cores; + int ret, socket_index, core_index; + char tmp[OMPI_AFFINITY_STRING_MAX - 1] = {0}; + const int stmp = sizeof(tmp) - 1; + int **map = NULL; + + str[0] = tmp[stmp] = '\0'; + + /* if the cpuset is all zero, then not bound */ + if (hwloc_bitmap_iszero(cpuset)) { + return OMPI_ERR_NOT_BOUND; + } + + if (OMPI_SUCCESS != (ret = build_map(&num_sockets, &num_cores, cpuset, &map, topo))) { + return ret; + } + /* Iterate over the data matrix and build up the string */ + first = true; + for (socket_index = 0; socket_index < num_sockets; ++socket_index) { + for (core_index = 0; core_index < num_cores; ++core_index) { + if (map[socket_index][core_index] > 0) { + if (!first) { + strncat(str, ", ", len - strlen(str) - 1); + } + first = false; + + snprintf(tmp, stmp, "socket %d[core %d[hwt %s]]", socket_index, core_index, + bitmap2rangestr(map[socket_index][core_index])); + strncat(str, tmp, len - strlen(str) - 1); + } + } + } + if (NULL != map) { + if (NULL != map[0]) { + free(map[0]); + } + free(map); + } + + return OMPI_SUCCESS; +} + /* * Where did OMPI bind this process? (prettyprint) */ @@ -107,10 +300,10 @@ static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(cpuset, opal_process_info.cpuset); - if(OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(str, - OMPI_AFFINITY_STRING_MAX, - opal_hwloc_topology, - cpuset)) + if(OMPI_ERR_NOT_BOUND == cset2str(str, + OMPI_AFFINITY_STRING_MAX, + opal_hwloc_topology, + cpuset)) { opal_string_copy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX); } @@ -119,6 +312,90 @@ static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) return OMPI_SUCCESS; } +/* determine if there is a single cpu in a bitmap */ +static bool is_single_cpu(hwloc_cpuset_t cpuset) +{ + int i; + bool one = false; + + /* count the number of bits that are set - there is + * one bit for each available pu. We could just + * subtract the first and last indices, but there + * may be "holes" in the bitmap corresponding to + * offline or unallowed cpus - so we have to + * search for them. Return false if we anything + * other than one + */ + for (i = hwloc_bitmap_first(cpuset); i <= hwloc_bitmap_last(cpuset); i++) { + if (hwloc_bitmap_isset(cpuset, i)) { + if (one) { + return false; + } + one = true; + } + } + + return one; +} + +/* + * Make a prettyprint string for a cset in a map format. + * Example: [B./..] + * Key: [] - signifies socket + * / - divider between cores + * . - signifies PU a process not bound to + * B - signifies PU a process is bound to + */ +static int cset2mapstr(char *str, int len, hwloc_topology_t topo, hwloc_cpuset_t cpuset) +{ + char tmp[OMPI_AFFINITY_STRING_MAX - 1] = {0}; + int core_index, pu_index; + const int stmp = sizeof(tmp) - 1; + hwloc_obj_t socket, core, pu; + + str[0] = tmp[stmp] = '\0'; + + /* if the cpuset is all zero, then not bound */ + if (hwloc_bitmap_iszero(cpuset)) { + return OMPI_ERR_NOT_BOUND; + } + + /* Iterate over all existing sockets */ + for (socket = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0); NULL != socket; + socket = socket->next_cousin) { + strncat(str, "[", len - strlen(str) - 1); + + /* Iterate over all existing cores in this socket */ + core_index = 0; + for (core = hwloc_get_obj_inside_cpuset_by_type(topo, socket->cpuset, HWLOC_OBJ_CORE, + core_index); + NULL != core; + core = hwloc_get_obj_inside_cpuset_by_type(topo, socket->cpuset, HWLOC_OBJ_CORE, + ++core_index)) { + if (core_index > 0) { + strncat(str, "/", len - strlen(str) - 1); + } + + /* Iterate over all existing PUs in this core */ + pu_index = 0; + for (pu = hwloc_get_obj_inside_cpuset_by_type(topo, core->cpuset, HWLOC_OBJ_PU, + pu_index); + NULL != pu; pu = hwloc_get_obj_inside_cpuset_by_type(topo, core->cpuset, + HWLOC_OBJ_PU, ++pu_index)) { + + /* Is this PU in the cpuset? */ + if (hwloc_bitmap_isset(cpuset, pu->os_index)) { + strncat(str, "B", len - strlen(str) - 1); + } else { + strncat(str, ".", len - strlen(str) - 1); + } + } + } + strncat(str, "]", len - strlen(str) - 1); + } + + return OMPI_SUCCESS; +} /* * Where is this process currently bound? (prettyprint) @@ -145,24 +422,24 @@ static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) /* we are bound if the two cpusets are not equal, or if there is only ONE PU available to us */ if (0 != hwloc_bitmap_compare(boundset, rootset) || - opal_hwloc_base_single_cpu(rootset) || - opal_hwloc_base_single_cpu(boundset)) { + is_single_cpu(rootset) || + is_single_cpu(boundset)) { bound = true; } } /* If we are not bound, indicate that */ if (!bound) { - strncat(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1); + strncat(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); ret = OMPI_SUCCESS; } /* If we are bound, print it out */ else { - ret = opal_hwloc_base_cset2str(str, OMPI_AFFINITY_STRING_MAX, - opal_hwloc_topology, - boundset); - if (OPAL_ERR_NOT_BOUND == ret) { + ret = cset2str(str, OMPI_AFFINITY_STRING_MAX, + opal_hwloc_topology, + boundset); + if (OMPI_ERR_NOT_BOUND == ret) { opal_string_copy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX); ret = OMPI_SUCCESS; } @@ -182,7 +459,7 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) { bool first = true; int i, num_cores, num_pus; - char tmp[BUFSIZ]; + char tmp[OMPI_AFFINITY_STRING_MAX - 1] = {0}; const int stmp = sizeof(tmp) - 1; hwloc_obj_t socket, core, c2; @@ -192,12 +469,12 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) NULL != socket; socket = socket->next_cousin) { /* If this isn't the first socket, add a delimiter */ if (!first) { - strncat(str, "; ", OMPI_AFFINITY_STRING_MAX - strlen(str)); + strncat(str, "; ", OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } first = false; snprintf(tmp, stmp, "socket %d has ", socket->os_index); - strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str)); + strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); /* Find out how many cores are inside this socket, and get an object pointing to the first core. Also count how many PUs @@ -217,13 +494,13 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) /* Only 1 core */ if (1 == num_cores) { strncat(str, "1 core with ", - OMPI_AFFINITY_STRING_MAX - strlen(str)); + OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); if (1 == num_pus) { strncat(str, "1 hwt", - OMPI_AFFINITY_STRING_MAX - strlen(str)); + OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } else { snprintf(tmp, stmp, "%d hwts", num_pus); - strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str)); + strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } } @@ -232,7 +509,7 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) bool same = true; snprintf(tmp, stmp, "%d cores", num_cores); - strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str)); + strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); /* Do all the cores have the same number of PUs? */ for (c2 = core; NULL != c2; c2 = c2->next_cousin) { @@ -248,9 +525,9 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) /* Yes, they all have the same number of PUs */ if (same) { snprintf(tmp, stmp, ", each with %d hwt", num_pus); - strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str)); + strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); if (num_pus != 1) { - strncat(str, "s", OMPI_AFFINITY_STRING_MAX - strlen(str)); + strncat(str, "s", OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } } @@ -258,11 +535,11 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) else { bool first_iter = true; - strncat(str, "with (", OMPI_AFFINITY_STRING_MAX - strlen(str)); + strncat(str, "with (", OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); for (c2 = core; NULL != c2; c2 = c2->next_cousin) { if (!first_iter) { strncat(str, ", ", - OMPI_AFFINITY_STRING_MAX - strlen(str)); + OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } first_iter = false; @@ -270,10 +547,10 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) core->cpuset, HWLOC_OBJ_PU); snprintf(tmp, stmp, "%d", i); - strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str)); + strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } strncat(str, ") hwts", - OMPI_AFFINITY_STRING_MAX - strlen(str)); + OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } } } @@ -297,10 +574,10 @@ static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(cpuset, opal_process_info.cpuset); - if(OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(str, - OMPI_AFFINITY_STRING_MAX, - opal_hwloc_topology, - cpuset)) + if(OMPI_ERR_NOT_BOUND == cset2mapstr(str, + OMPI_AFFINITY_STRING_MAX, + opal_hwloc_topology, + cpuset)) { opal_string_copy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX); } @@ -333,24 +610,24 @@ static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) /* we are bound if the two cpusets are not equal, or if there is only ONE PU available to us */ if (0 != hwloc_bitmap_compare(boundset, rootset) || - opal_hwloc_base_single_cpu(rootset) || - opal_hwloc_base_single_cpu(boundset)) { + is_single_cpu(rootset) || + is_single_cpu(boundset)) { bound = true; } } /* If we are not bound, indicate that */ if (!bound) { - strncat(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1); + strncat(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); ret = OMPI_SUCCESS; } /* If we are bound, print it out */ else { - ret = opal_hwloc_base_cset2mapstr(str, OMPI_AFFINITY_STRING_MAX, - opal_hwloc_topology, - boundset); - if (OPAL_ERR_NOT_BOUND == ret) { + ret = cset2mapstr(str, OMPI_AFFINITY_STRING_MAX, + opal_hwloc_topology, + boundset); + if (OMPI_ERR_NOT_BOUND == ret) { opal_string_copy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX); ret = OMPI_SUCCESS; } @@ -373,7 +650,6 @@ static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) { int core_index, pu_index; - int len = OMPI_AFFINITY_STRING_MAX; hwloc_obj_t socket, core, pu; str[0] = '\0'; @@ -383,7 +659,7 @@ static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) HWLOC_OBJ_SOCKET, 0); NULL != socket; socket = socket->next_cousin) { - strncat(str, "[", len - strlen(str)); + strncat(str, "[", OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); /* Iterate over all existing cores in this socket */ core_index = 0; @@ -395,7 +671,7 @@ static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) socket->cpuset, HWLOC_OBJ_CORE, ++core_index)) { if (core_index > 0) { - strncat(str, "/", len - strlen(str)); + strncat(str, "/", OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } /* Iterate over all existing PUs in this core */ @@ -407,10 +683,10 @@ static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, core->cpuset, HWLOC_OBJ_PU, ++pu_index)) { - strncat(str, ".", len - strlen(str)); + strncat(str, ".", OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } } - strncat(str, "]", len - strlen(str)); + strncat(str, "]", OMPI_AFFINITY_STRING_MAX - strlen(str) - 1); } return OMPI_SUCCESS; diff --git a/ompi/mpiext/cuda/c/MPIX_Query_cuda_support.3in b/ompi/mpiext/cuda/c/MPIX_Query_cuda_support.3in index 676ec570bc9..711d9fe1f9c 100644 --- a/ompi/mpiext/cuda/c/MPIX_Query_cuda_support.3in +++ b/ompi/mpiext/cuda/c/MPIX_Query_cuda_support.3in @@ -24,8 +24,33 @@ There is no C++ binding for this function. .SH DESCRIPTION .ft R +This routine return 1 if MPI library is build with CUDA and runtime supports CUDA buffers. +This routine must be called after MPI is initialized by a call to MPI_Init or MPI_Init_thread. + .SH Examples .ft R +.nf + +#include +#include "mpi.h" + +#include "mpi-ext.h" /* Needed for CUDA-aware check */ + +int main(int argc, char *argv[]) +{ + + MPI_Init(&argc, &argv); + + if (MPIX_Query_cuda_support()) { + printf("This MPI library has CUDA-aware support.\n"); + } else { + printf("This MPI library does not have CUDA-aware support.\n"); + } + MPI_Finalize(); + + return 0; +} +.fi .SH See Also .ft R diff --git a/ompi/mpiext/cuda/c/mpiext_cuda.c b/ompi/mpiext/cuda/c/mpiext_cuda.c index 499d1441c39..55a1ad93160 100644 --- a/ompi/mpiext/cuda/c/mpiext_cuda.c +++ b/ompi/mpiext/cuda/c/mpiext_cuda.c @@ -21,11 +21,12 @@ #include #include "opal/constants.h" +#include "opal/runtime/opal_params.h" #include "ompi/mpiext/cuda/c/mpiext_cuda_c.h" /* If CUDA-aware support is configured in, return 1. Otherwise, return 0. * This API may be extended to return more features in the future. */ int MPIX_Query_cuda_support(void) { - return OPAL_CUDA_SUPPORT; + return opal_built_with_cuda_support && opal_cuda_runtime_initialized; } diff --git a/ompi/mpiext/example/configure.m4 b/ompi/mpiext/example/configure.m4 index 58370e0a14c..664b69e1254 100644 --- a/ompi/mpiext/example/configure.m4 +++ b/ompi/mpiext/example/configure.m4 @@ -55,3 +55,12 @@ AC_DEFUN([OMPI_MPIEXT_example_NEED_INIT], [1]) # libraries for any of the bindings. If finer-grained control is needed # someday, we may need to split this into multiple macros. #AC_DEFUN([OMPI_MPIEXT_example_HAVE_OBJECT], [0]) + +# This is a hook that runs after the CONFIG macro and after the +# configure system decides if this extension will be built (such +# as if it was disabled by configure option). This is a great +# place to put any globally-visible AC_DEFINE or AM_CONDITIONAL +# macros that are based on this component building. +AC_DEFUN([OMPI_MPIEXT_example_POST_CONFIG], [ + opal_show_verbose "example component build result: $1" +]) diff --git a/ompi/mpiext/example/use-mpi-f08/Makefile.am b/ompi/mpiext/example/use-mpi-f08/Makefile.am index f495b4414d6..ef2ddd79d13 100644 --- a/ompi/mpiext/example/use-mpi-f08/Makefile.am +++ b/ompi/mpiext/example/use-mpi-f08/Makefile.am @@ -2,6 +2,7 @@ # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2022 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -9,6 +10,14 @@ # $HEADER$ # +# Note that Automake's Fortran-buidling rules uses CPPFLAGS and +# AM_CPPFLAGS. This can cause weirdness (e.g., +# https://github.com/open-mpi/ompi/issues/7253 and +# https://github.com/open-mpi/ompi/issues/9716). Let's just zero +# those out and rely on AM_FCFLAGS. +CPPFLAGS = +AM_CPPFLAGS = + # This file builds the use_mpi_f08-based bindings for MPI extensions. It # is optional in MPI extensions. diff --git a/ompi/mpiext/ftmpi/c/profile/Makefile.am b/ompi/mpiext/ftmpi/c/profile/Makefile.am index 1651dd77f44..89472e27b9e 100644 --- a/ompi/mpiext/ftmpi/c/profile/Makefile.am +++ b/ompi/mpiext/ftmpi/c/profile/Makefile.am @@ -2,6 +2,7 @@ # Copyright (c) 2016-2018 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. +# Copyright (c) 2021 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -38,11 +39,3 @@ $(nodist_libpmpiext_ftmpi_c_la_SOURCES): # These files were created by targets above MAINTAINERCLEANFILES = $(nodist_libpmpiext_ftmpi_c_la_SOURCES) - -# Don't want these targets in here - -tags-recursive: -tags: -: -GTAGS: -ID: diff --git a/ompi/mpiext/ftmpi/use-mpi-f08/Makefile.am b/ompi/mpiext/ftmpi/use-mpi-f08/Makefile.am index d06c0aa6837..075bd2fbe12 100644 --- a/ompi/mpiext/ftmpi/use-mpi-f08/Makefile.am +++ b/ompi/mpiext/ftmpi/use-mpi-f08/Makefile.am @@ -6,6 +6,7 @@ # Copyright (c) 2018 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. +# Copyright (c) 2022 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -13,6 +14,14 @@ # $HEADER$ # +# Note that Automake's Fortran-buidling rules uses CPPFLAGS and +# AM_CPPFLAGS. This can cause weirdness (e.g., +# https://github.com/open-mpi/ompi/issues/7253 and +# https://github.com/open-mpi/ompi/issues/9716). Let's just zero +# those out and rely on AM_FCFLAGS. +CPPFLAGS = +AM_CPPFLAGS = + # This file builds the use_mpi_f08-based bindings for MPI extensions. It # is optional in MPI extensions. diff --git a/ompi/mpiext/mpiext.c b/ompi/mpiext/mpiext.c index 3a0012f125a..1c59b520c8f 100644 --- a/ompi/mpiext/mpiext.c +++ b/ompi/mpiext/mpiext.c @@ -1,3 +1,14 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + #include "ompi_config.h" #include @@ -5,17 +16,16 @@ #include "ompi/constants.h" #include "ompi/mpiext/mpiext.h" #include "ompi/mpiext/static-components.h" +#include "ompi/instance/instance.h" - -int -ompi_mpiext_init(void) +static int ompi_mpiext_fini (void) { const ompi_mpiext_component_t **tmp = ompi_mpiext_components; int ret; while (NULL != (*tmp)) { - if (NULL != (*tmp)->init) { - ret = (*tmp)->init(); + if (NULL != (*tmp)->fini) { + ret = (*tmp)->fini(); if (OMPI_SUCCESS != ret) return ret; } tmp++; @@ -24,20 +34,21 @@ ompi_mpiext_init(void) return OMPI_SUCCESS; } - int -ompi_mpiext_fini(void) +ompi_mpiext_init(void) { const ompi_mpiext_component_t **tmp = ompi_mpiext_components; int ret; while (NULL != (*tmp)) { - if (NULL != (*tmp)->fini) { - ret = (*tmp)->fini(); + if (NULL != (*tmp)->init) { + ret = (*tmp)->init(); if (OMPI_SUCCESS != ret) return ret; } tmp++; } + ompi_mpi_instance_append_finalize (ompi_mpiext_fini); + return OMPI_SUCCESS; } diff --git a/ompi/mpiext/mpiext.h b/ompi/mpiext/mpiext.h index 6a93563c791..e5488cdc357 100644 --- a/ompi/mpiext/mpiext.h +++ b/ompi/mpiext/mpiext.h @@ -1,4 +1,11 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * * $HEADER$ */ #if defined(c_plusplus) || defined(__cplusplus) @@ -8,7 +15,6 @@ extern "C" { #include "ompi_config.h" OMPI_DECLSPEC int ompi_mpiext_init(void); -OMPI_DECLSPEC int ompi_mpiext_fini(void); typedef int (*ompi_mpiext_init_fn_t)(void); typedef int (*ompi_mpiext_fini_fn_t)(void); diff --git a/ompi/mpiext/pcollreq/Makefile.am b/ompi/mpiext/pcollreq/Makefile.am deleted file mode 100644 index 329a4d1a9df..00000000000 --- a/ompi/mpiext/pcollreq/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -# -# Copyright (c) 2017-2018 FUJITSU LIMITED. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -SUBDIRS = c mpif-h use-mpi use-mpi-f08 - -EXTRA_DIST = README.md diff --git a/ompi/mpiext/pcollreq/README.md b/ompi/mpiext/pcollreq/README.md deleted file mode 100644 index 124019ea735..00000000000 --- a/ompi/mpiext/pcollreq/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# Open MPI extension: pcollreq - -Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. - -This extension provides the feature of persistent collective -communication operations and persistent neighborhood collective -communication operations, which is planned to be included in the next -MPI Standard after MPI-3.1 as of Nov. 2018. - -See `MPIX_Barrier_init(3)` for more details. - -The code will be moved to the `ompi/mpi` directory and the `MPIX_` -prefix will be switch to the `MPI_` prefix once the MPI Standard which -includes this feature is published. diff --git a/ompi/mpiext/pcollreq/c/MPIX_Allgather_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Allgather_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Allgather_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Allgatherv_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Allgatherv_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Allgatherv_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Allreduce_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Allreduce_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Allreduce_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Alltoall_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Alltoall_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Alltoall_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Alltoallv_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Alltoallv_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Alltoallv_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Alltoallw_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Alltoallw_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Alltoallw_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Barrier_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Barrier_init.3in deleted file mode 100644 index db6cccc9c54..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Barrier_init.3in +++ /dev/null @@ -1,565 +0,0 @@ -.\" -*- nroff -*- -.\" Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -.\" $COPYRIGHT$ -.TH MPIX_Barrier_init 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.SH NAME -\fBMPIX_Allgather_init, MPIX_Allgatherv_init, MPIX_Allreduce_init, MPIX_Alltoall_init, MPIX_Alltoallv_init, MPIX_Alltoallw_init, MPIX_Barrier_init, MPIX_Bcast_init, MPIX_Exscan_init, MPIX_Gather_init, MPIX_Gatherv_init, MPIX_Reduce_init, MPIX_Reduce_scatter_init, MPIX_Reduce_scatter_block_init, MPIX_Scan_init, MPIX_Scatter_init, MPIX_Scatterv_init, MPIX_Neighbor_allgather_init, MPIX_Neighbor_allgatherv_init, MPIX_Neighbor_alltoall_init, MPIX_Neighbor_alltoallv_init, MPIX_Neighbor_alltoallw_init\fP \- Builds a handle for a collective communication or neighborhood collective communication - -.SH SYNTAX -.ft R -.SH C Syntax -.nf -#include -#include -int MPIX_Allgather_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, - MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, - MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Allgatherv_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, - MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], - const int \fIdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, - MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) - -int MPIX_Allreduce_init(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int \fIcount\fP, - MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Alltoall_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, - MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, - MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Alltoallv_init(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\fP[], MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, - const int \fIrecvcounts\fP[], const int \fIrdispls\fP[], MPI_Datatype \fIrecvtype\fP, - MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) - -int MPIX_Alltoallw_init(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], void *\fIrecvbuf\fP, - const int \fIrecvcounts\fP[], const int \fIrdispls\fP[], - const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Barrier_init(MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Bcast_init(void *\fIbuffer\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, - int \fIroot\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) - -int MPIX_Exscan_init(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int \fIcount\fP, - MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Gather_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, - MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, - MPI_Datatype \fIrecvtype\fP, int \fIroot\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Gatherv_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, - MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], - const int \fIdispls\fP[], MPI_Datatype \fIrecvtype\fP, int \fIroot\fP, MPI_Comm \fIcomm\fP, - MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) - -int MPIX_Reduce_init(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int \fIcount\fP, - MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, int \fIroot\fP, MPI_Comm \fIcomm\fP, - MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) - -int MPIX_Reduce_scatter_init(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, - const int \fIrecvcounts\fP[], MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, - MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) - -int MPIX_Reduce_scatter_block_init(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, - int \fIrecvcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, MPI_Comm \fIcomm\fP, - MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) - -int MPIX_Scan_init(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int \fIcount\fP, - MPI_Datatype \fIdatatype\fP, MPI_Op \fIop\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Scatter_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, - MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, - MPI_Datatype \fIrecvtype\fP, int \fIroot\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Scatterv_init(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIdispls\fP[], MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, - int \fIrecvcount\fP, MPI_Datatype \fIrecvtype\fP, int \fIroot\fP, MPI_Comm \fIcomm\fP, - MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) - -int MPIX_Neighbor_allgather_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, - MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, - MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Neighbor_allgatherv_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, - MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], - const int \fIdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, - MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) - -int MPIX_Neighbor_alltoall_init(const void *\fIsendbuf\fP, int \fIsendcount\fP, - MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, int \fIrecvcount\fP, - MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Neighbor_alltoallv_init(const void *\fIsendbuf\fP, - const int \fIsendcounts\fP[], const int \fIsdispls\fP[], MPI_Datatype \fIsendtype\fP, - void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const int \fIrdispls\fP[], - MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -int MPIX_Neighbor_alltoallw_init(const void *\fIsendbuf\fP, - const int \fIsendcounts\fP[], const MPI_Aint \fIsdispls\fP[], - const MPI_Datatype \fIsendtypes\fP[], void *\fIrecvbuf\fP, - const int \fIrecvcounts\fP[], const MPI_Aint \fIrdispls\fP[], - const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, - MPI_Request *\fIrequest\fP) - -.fi -.SH Fortran Syntax -.nf -USE MPI -USE MPI_EXT -! or the older form: INCLUDE 'mpif.h'; INCLUDE 'mpif-ext.h' -MPIX_ALLGATHER_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, - RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) - INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, COMM, INFO\fP - INTEGER \fIREQUEST, IERROR\fP - -MPIX_ALLGATHERV_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, - RECVCOUNT, DISPLS, RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) - INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT\fP(*) - INTEGER \fIDISPLS\fP(*)\fI, RECVTYPE, COMM, INFO, REQUEST, IERROR\fP - -MPIX_ALLREDUCE_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, INFO, - REQUEST, IERROR\fP) - \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP - -MPIX_ALLTOALL_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, - RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE\fP - INTEGER \fICOMM, INFO, REQUEST, IERROR\fP - -MPIX_ALLTOALLV_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, - RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, COMM, INFO, REQUEST, - IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPE\fP - INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPE\fP - INTEGER \fICOMM, INFO, REQUEST, IERROR\fP - -MPIX_ALLTOALLW_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, - RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, INFO, REQUEST, - IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPES(*)\fP - INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPES(*)\fP - INTEGER \fICOMM, INFO, REQUEST, IERROR\fP - -MPIX_BARRIER_INIT(\fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP) - INTEGER \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP - -MPIX_BCAST_INIT(\fIBUFFER\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIROOT\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, - \fIIERROR\fP) - \fIBUFFER\fP(*) - INTEGER \fICOUNT\fP, \fIDATATYPE\fP, \fIROOT\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP - -MPIX_EXSCAN_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, INFO, - REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fICOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP - -MPIX_GATHER_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, - RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT\fP - INTEGER \fICOMM, INFO, REQUEST, IERROR\fP - -MPIX_GATHERV_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNTS, - DISPLS, RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNTS(*), DISPLS(*)\fP - INTEGER \fIRECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP - -MPIX_REDUCE_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, ROOT, COMM, - INFO, REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fICOUNT, DATATYPE, OP, ROOT, COMM, INFO, REQUEST, IERROR\fP - -MPIX_REDUCE_SCATTER_INIT(\fISENDBUF, RECVBUF, RECVCOUNTS, DATATYPE, OP, - COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fIRECVCOUNTS(*), DATATYPE, OP, COMM, INFO, REQUEST, IERROR \fP - -MPIX_REDUCE_SCATTER_BLOCK_INIT(\fISENDBUF, RECVBUF, RECVCOUNT, DATATYPE, - OP, COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fIRECVCOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR \fP - -MPIX_SCAN_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, INFO, - REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fICOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP - -MPIX_SCATTER_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, - RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT\fP - INTEGER \fICOMM, INFO, REQUEST, IERROR\fP - -MPIX_SCATTERV_INIT(\fISENDBUF, SENDCOUNTS, DISPLS, SENDTYPE, RECVBUF, - RECVCOUNT, RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNTS(*), DISPLS(*), SENDTYPE\fP - INTEGER \fIRECVCOUNT, RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP - -MPIX_NEIGHBOR_ALLGATHER_INIT(\fISENDBUF\fP, \fISENDCOUNT\fP, \fISENDTYPE\fP, \fIRECVBUF\fP, - \fIRECVCOUNT\fP, \fIRECVTYPE\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP) - \fISENDBUF\fP(*), \fIRECVBUF\fP(*) - INTEGER \fISENDCOUNT\fP, \fISENDTYPE\fP, \fIRECVCOUNT\fP, \fIRECVTYPE\fP, \fICOMM\fP, - INTEGER \fIINFO, REQUEST, IERROR\fP - -MPIX_NEIGHBOR_ALLGATHERV_INIT(\fISENDBUF\fP, \fISENDCOUNT\fP, \fISENDTYPE\fP, \fIRECVBUF\fP, - \fIRECVCOUNT\fP, \fIDISPLS\fP, \fIRECVTYPE\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP) - \fISENDBUF\fP(*), \fIRECVBUF\fP(*) - INTEGER \fISENDCOUNT\fP, \fISENDTYPE\fP, \fIRECVCOUNT\fP(*), - INTEGER \fIDISPLS\fP(*), \fIRECVTYPE\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP - -MPIX_NEIGHBOR_ALLTOALL_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, - RECVCOUNT, RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE\fP - INTEGER \fICOMM, INFO, REQUEST, IERROR\fP - -MPIX_NEIGHBOR_ALLTOALLV_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, - RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, COMM, INFO, REQUEST, - IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPE\fP - INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPE\fP - INTEGER \fICOMM, INFO, REQUEST, IERROR\fP - -MPIX_NEIGHBOR_ALLTOALLW_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, - RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, INFO, REQUEST, - IERROR\fP) - \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNTS(*), SENDTYPES(*)\fP - INTEGER \fIRECVCOUNTS(*), RECVTYPES(*)\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fISDISPLS(*), RDISPLS(*)\fP - INTEGER \fICOMM, INFO, REQUEST, IERROR\fP - -.fi -.SH Fortran 2008 Syntax -.nf -USE mpi_f08 -USE mpi_f08_ext -MPIX_Allgather_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, - \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Allgatherv_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, - \fIdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIsendcount\fP - INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP, \fIdispls(*)\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Allreduce_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, \fIinfo\fP, - \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIcount\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP - TYPE(MPI_Op), INTENT(IN) :: \fIop\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Alltoall_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, - \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Alltoallv_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtype\fP, \fIrecvbuf\fP, - \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIsdispls(*)\fP, - \fIrecvcounts(*)\fP, \fIrdispls(*)\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Alltoallw_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtypes\fP, \fIrecvbuf\fP, - \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtypes\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIsdispls(*)\fP, - \fIrecvcounts(*)\fP, \fIrdispls(*)\fP - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: \fIsendtypes(*)\fP, - \fIrecvtypes(*)\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Barrier_init(\fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Bcast_init(\fIbuffer\fP, \fIcount\fP, \fIdatatype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, - \fIierror\fP) - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIbuffer\fP - INTEGER, INTENT(IN) :: \fIcount\fP, \fIroot\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Exscan_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, \fIinfo\fP, - \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIcount\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP - TYPE(MPI_Op), INTENT(IN) :: \fIop\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Gather_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, - \fIrecvcount\fP, \fIrecvtype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP, \fIroot\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Gatherv_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, - \fIdispls\fP, \fIrecvtype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIroot\fP - INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP, \fIdispls(*)\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Reduce_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIroot\fP, \fIcomm\fP, - \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIcount\fP, \fIroot\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP - TYPE(MPI_Op), INTENT(IN) :: \fIop\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Reduce_scatter_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, \fIdatatype\fP, \fIop\fP, - \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP - TYPE(MPI_Op), INTENT(IN) :: \fIop\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Reduce_scatter_block_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIdatatype\fP, - \fIop\fP, \fIcomm\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIrecvcount\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP - TYPE(MPI_Op), INTENT(IN) :: \fIop\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Reduce_scatter_block_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIdatatype\fP, \fIop\fP, - \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIrecvcount\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP - TYPE(MPI_Op), INTENT(IN) :: \fIop\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Scan_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, \fIinfo\fP, - \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIcount\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP - TYPE(MPI_Op), INTENT(IN) :: \fIop\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Scatter_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, - \fIrecvtype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP, \fIroot\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Scatterv_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIdispls\fP, \fIsendtype\fP, \fIrecvbuf\fP, - \fIrecvcount\fP, \fIrecvtype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIdispls(*)\fP - INTEGER, INTENT(IN) :: \fIrecvcount\fP, \fIroot\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Neighbor_allgather_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, - \fIrecvcount\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Neighbor_allgatherv_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, - \fIrecvcounts\fP, \fIdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIsendcount\fP - INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP, \fIdispls(*)\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Neighbor_alltoall_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, - \fIrecvcount\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Neighbor_alltoallv_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtype\fP, - \fIrecvbuf\fP, \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, - \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIsdispls(*)\fP, - \fIrecvcounts(*)\fP, \fIrdispls(*)\fP - TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -MPIX_Neighbor_alltoallw_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtypes\fP, - \fIrecvbuf\fP, \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtypes\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, - \fIierror\fP) - TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP - TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP - INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIrecvcounts(*)\fP - INTEGER(KIND=MPI_ADDRESS_KIND), INTENT(IN), ASYNCHRONOUS :: - \fIsdispls(*)\fP, \fIrdispls(*)\fP - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: \fIsendtypes(*)\fP, - \fIrecvtypes(*)\fP - TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP - TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP - TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP - INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP - -.fi - -.SH DESCRIPTION -.ft R -Creates a persistent communication request for a collective operation or neighborhood collective operation. - -As of Nov. 2018, the feature of persistent collective communication operations and persistent collective neighborhood communication operations is planned to be included in the next MPI Standard after MPI-3.1. -.nf - - https://github.com/mpi-forum/mpi-issues/issues/25 -.fi - -Open MPI implements 2018 Draft Specification of the MPI standard shown in the following URL. -.nf - - https://www.mpi-forum.org/docs/drafts/mpi-2018-draft-report.pdf -.fi - -The interface may still change in the standard. Therefore the prefix \fIMPIX_\fP is used instead of \fIMPI_\fP for these request creation routines. To start, complete, and free the created request, usual MPI routines (\fIMPI_Start\fP etc.) can be used. - -Future versions of Open MPI will switch to the \fIMPI_\fP prefix and will not require the C header file \fImpi-ext.h\fP, the Fortran modules \fImpi_ext\fP and \fImpi_f08_ext\fP, and the Fortran header file \fImpif-ext.h\fP once the MPI Standard which includes this feature is published. - -.SH EXAMPLE -.nf - - MPI_Request req; - MPIX_Barrier_init(MPI_COMM_WORLD, MPI_INFO_NULL, &req); - MPI_Start(&req); - MPI_Wait(&req, MPI_STATUS_IGNORE); - MPI_Request_free(&req); -.fi - -.SH SEE ALSO -.ft R -.sp -MPI_Start -.br -MPI_Startall -.br -MPI_Request_free diff --git a/ompi/mpiext/pcollreq/c/MPIX_Bcast_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Bcast_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Bcast_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Exscan_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Exscan_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Exscan_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Gather_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Gather_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Gather_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Gatherv_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Gatherv_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Gatherv_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_allgather_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Neighbor_allgather_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_allgather_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_allgatherv_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Neighbor_allgatherv_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_allgatherv_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_alltoall_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Neighbor_alltoall_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_alltoall_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_alltoallv_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Neighbor_alltoallv_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_alltoallv_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_alltoallw_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Neighbor_alltoallw_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Neighbor_alltoallw_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Reduce_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Reduce_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Reduce_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Reduce_scatter_block_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Reduce_scatter_block_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Reduce_scatter_block_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Reduce_scatter_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Reduce_scatter_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Reduce_scatter_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Scan_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Scan_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Scan_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Scatter_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Scatter_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Scatter_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/MPIX_Scatterv_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Scatterv_init.3in deleted file mode 100644 index a9a04748c96..00000000000 --- a/ompi/mpiext/pcollreq/c/MPIX_Scatterv_init.3in +++ /dev/null @@ -1 +0,0 @@ -.so man3/MPIX_Barrier_init.3 diff --git a/ompi/mpiext/pcollreq/c/Makefile.am b/ompi/mpiext/pcollreq/c/Makefile.am deleted file mode 100644 index d4aeb8ef362..00000000000 --- a/ompi/mpiext/pcollreq/c/Makefile.am +++ /dev/null @@ -1,97 +0,0 @@ -# -# Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. -# Copyright (c) 2018 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -SUBDIRS = profile - -# OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols -# to be replaced by PMPI_*. -# In this directory, we need it to be 0 - -AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=0 - -include $(top_srcdir)/Makefile.ompi-rules - -noinst_LTLIBRARIES = libmpiext_pcollreq_c.la - -# This is where the top-level header file (that is included in -# ) must be installed. -ompidir = $(ompiincludedir)/mpiext - -# This is the header file that is installed. -ompi_HEADERS = mpiext_pcollreq_c.h - -# Sources for the convenience libtool library. Other than the one -# header file, all source files in the extension have no file naming -# conventions. -libmpiext_pcollreq_c_la_SOURCES = \ - $(ompi_HEADERS) \ - mpiext_pcollreq_c.c - -if BUILD_MPI_BINDINGS_LAYER -libmpiext_pcollreq_c_la_SOURCES += \ - allgather_init.c \ - allgatherv_init.c \ - allreduce_init.c \ - alltoall_init.c \ - alltoallv_init.c \ - alltoallw_init.c \ - barrier_init.c \ - bcast_init.c \ - exscan_init.c \ - gather_init.c \ - gatherv_init.c \ - reduce_init.c \ - reduce_scatter_block_init.c \ - reduce_scatter_init.c \ - scan_init.c \ - scatter_init.c \ - scatterv_init.c \ - \ - neighbor_allgather_init.c \ - neighbor_allgatherv_init.c \ - neighbor_alltoall_init.c \ - neighbor_alltoallv_init.c \ - neighbor_alltoallw_init.c -endif - -libmpiext_pcollreq_c_la_LIBADD = \ - profile/libpmpiext_pcollreq_c.la - -# Man page installation -nodist_man_MANS = \ - MPIX_Allgather_init.3 \ - MPIX_Allgatherv_init.3 \ - MPIX_Allreduce_init.3 \ - MPIX_Alltoall_init.3 \ - MPIX_Alltoallv_init.3 \ - MPIX_Alltoallw_init.3 \ - MPIX_Barrier_init.3 \ - MPIX_Bcast_init.3 \ - MPIX_Exscan_init.3 \ - MPIX_Gather_init.3 \ - MPIX_Gatherv_init.3 \ - MPIX_Reduce_init.3 \ - MPIX_Reduce_scatter_block_init.3 \ - MPIX_Reduce_scatter_init.3 \ - MPIX_Scan_init.3 \ - MPIX_Scatter_init.3 \ - MPIX_Scatterv_init.3 \ - MPIX_Neighbor_allgather_init.3 \ - MPIX_Neighbor_allgatherv_init.3 \ - MPIX_Neighbor_alltoall_init.3 \ - MPIX_Neighbor_alltoallv_init.3 \ - MPIX_Neighbor_alltoallw_init.3 - -# Man page sources -EXTRA_DIST = $(nodist_man_MANS:.3=.3in) - -distclean-local: - rm -f $(nodist_man_MANS) diff --git a/ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.c b/ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.c deleted file mode 100644 index aa4d9acfab0..00000000000 --- a/ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.c +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -void mpiext_pcollreq_dummy(void); - -void mpiext_pcollreq_dummy() { -} diff --git a/ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h b/ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h deleted file mode 100644 index 11bdb9937d9..00000000000 --- a/ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -OMPI_DECLSPEC int MPIX_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Allreduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Barrier_init(MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Bcast_init(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Exscan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Reduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Scan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Scatterv_init(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); - -OMPI_DECLSPEC int MPIX_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int MPIX_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Info info, MPI_Request *request); - - /* - * Profiling MPI API - */ -OMPI_DECLSPEC int PMPIX_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Allreduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Barrier_init(MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Bcast_init(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Exscan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Reduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Scan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Scatterv_init(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); - -OMPI_DECLSPEC int PMPIX_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Info info, MPI_Request *request); -OMPI_DECLSPEC int PMPIX_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Info info, MPI_Request *request); diff --git a/ompi/mpiext/pcollreq/c/profile/Makefile.am b/ompi/mpiext/pcollreq/c/profile/Makefile.am deleted file mode 100644 index 95595743229..00000000000 --- a/ompi/mpiext/pcollreq/c/profile/Makefile.am +++ /dev/null @@ -1,66 +0,0 @@ -# -# Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. -# Copyright (c) 2018 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols -# to be replaced by PMPI_*. -# In this directory, we need it to be 1 - -AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=1 - -include $(top_srcdir)/Makefile.ompi-rules - -# Convenience libtool library that will be slurped up into libmpi.la. -noinst_LTLIBRARIES = libpmpiext_pcollreq_c.la - -# This is where the top-level header file (that is included in -# ) must be installed. -ompidir = $(ompiincludedir)/mpiext - -# This is the header file that is installed. -ompi_HEADERS = pmpiext_pcollreq_c.h - -# Sources for the convenience libtool library. Other than the one -# header file, all source files in the extension have no file naming -# conventions. -nodist_libpmpiext_pcollreq_c_la_SOURCES = \ - $(ompi_HEADERS) \ - pallgather_init.c \ - pallgatherv_init.c \ - pallreduce_init.c \ - palltoall_init.c \ - palltoallv_init.c \ - palltoallw_init.c \ - pbarrier_init.c \ - pbcast_init.c \ - pexscan_init.c \ - pgather_init.c \ - pgatherv_init.c \ - preduce_init.c \ - preduce_scatter_block_init.c \ - preduce_scatter_init.c \ - pscan_init.c \ - pscatter_init.c \ - pscatterv_init.c \ - \ - pneighbor_allgather_init.c \ - pneighbor_allgatherv_init.c \ - pneighbor_alltoall_init.c \ - pneighbor_alltoallv_init.c \ - pneighbor_alltoallw_init.c - -# -# Sym link in the sources from the real MPI directory -# -$(nodist_libpmpiext_pcollreq_c_la_SOURCES): - $(OMPI_V_LN_S) if test ! -r $@ ; then \ - pname=`echo $@ | cut -b '2-'` ; \ - $(LN_S) $(top_srcdir)/ompi/mpiext/pcollreq/c/$$pname $@ ; \ - fi diff --git a/ompi/mpiext/pcollreq/configure.m4 b/ompi/mpiext/pcollreq/configure.m4 deleted file mode 100644 index 26d974b9927..00000000000 --- a/ompi/mpiext/pcollreq/configure.m4 +++ /dev/null @@ -1,30 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2017-2018 FUJITSU LIMITED. All rights reserved. -# Copyright (c) 2018 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# OMPI_MPIEXT_pcollreq_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([OMPI_MPIEXT_pcollreq_CONFIG],[ - AC_CONFIG_FILES([ - ompi/mpiext/pcollreq/Makefile - ompi/mpiext/pcollreq/c/Makefile - ompi/mpiext/pcollreq/c/profile/Makefile - ompi/mpiext/pcollreq/mpif-h/Makefile - ompi/mpiext/pcollreq/mpif-h/profile/Makefile - ompi/mpiext/pcollreq/use-mpi/Makefile - ompi/mpiext/pcollreq/use-mpi-f08/Makefile - ]) - - AS_IF([test "$ENABLE_pcollreq" = "1" || \ - test "$ENABLE_EXT_ALL" = "1"], - [$1], - [$2]) -]) diff --git a/ompi/mpiext/pcollreq/mpif-h/Makefile.am b/ompi/mpiext/pcollreq/mpif-h/Makefile.am deleted file mode 100644 index 7282ac086d7..00000000000 --- a/ompi/mpiext/pcollreq/mpif-h/Makefile.am +++ /dev/null @@ -1,88 +0,0 @@ -# -# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -# Copyright (c) 2018 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -SUBDIRS = profile - -# This file builds the mpif.h-based bindings for MPI extensions. It -# is optional in MPI extensions. - -# We must set these #defines so that the inner OMPI MPI prototype -# header files do the Right Thing. -AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=0 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 - -# Note that the mpif.h-based bindings are optional -- they can only be -# built if OMPI is also building the Fortran-based bindings. So we -# initially set some Makefile macros to empty, and then conditionally -# add to them later. -noinst_LTLIBRARIES = - -# Directory where the header file to be included in mpif-ext.h must be -# installed. -ompidir = $(ompiincludedir)/mpiext - -# Just like noinst_LTLIBRARIES, set this macro to empty and -# conditionally add to it later. -ompi_HEADERS = - -# Use the Automake conditional to know if we're building the mpif.h -# bindings. -if OMPI_BUILD_FORTRAN_MPIFH_BINDINGS - -# If we are, build the convenience libtool library that will be -# slurped up into libmpi_mpifh.la. -noinst_LTLIBRARIES += libmpiext_pcollreq_mpifh.la - -# This is the header file that is installed. -ompi_HEADERS += mpiext_pcollreq_mpifh.h - -noinst_HEADERS = mpiext_pcollreq_prototypes.h - -# Sources for the convenience libtool library. -libmpiext_pcollreq_mpifh_la_SOURCES = \ - $(ompi_HEADERS) \ - mpiext_pcollreq_mpifh.c - -if BUILD_MPI_BINDINGS_LAYER -libmpiext_pcollreq_mpifh_la_SOURCES += \ - allgather_init_f.c \ - allgatherv_init_f.c \ - allreduce_init_f.c \ - alltoall_init_f.c \ - alltoallv_init_f.c \ - alltoallw_init_f.c \ - barrier_init_f.c \ - bcast_init_f.c \ - exscan_init_f.c \ - gather_init_f.c \ - gatherv_init_f.c \ - reduce_init_f.c \ - reduce_scatter_init_f.c \ - reduce_scatter_block_init_f.c \ - scan_init_f.c \ - scatter_init_f.c \ - scatterv_init_f.c \ - \ - neighbor_allgather_init_f.c \ - neighbor_allgatherv_init_f.c \ - neighbor_alltoall_init_f.c \ - neighbor_alltoallv_init_f.c \ - neighbor_alltoallw_init_f.c -endif - -libmpiext_pcollreq_mpifh_la_LIBADD = \ - profile/libpmpiext_pcollreq_mpifh.la - -endif diff --git a/ompi/mpiext/pcollreq/mpif-h/allgather_init_f.c b/ompi/mpiext/pcollreq/mpif-h/allgather_init_f.c deleted file mode 100644 index 84e3f4259da..00000000000 --- a/ompi/mpiext/pcollreq/mpif-h/allgather_init_f.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" - -#if OMPI_BUILD_MPI_PROFILING -#if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_ALLGATHER_INIT = ompix_allgather_init_f -#pragma weak pmpix_allgather_init = ompix_allgather_init_f -#pragma weak pmpix_allgather_init_ = ompix_allgather_init_f -#pragma weak pmpix_allgather_init__ = ompix_allgather_init_f - -#pragma weak PMPIX_Allgather_init_f = ompix_allgather_init_f -#pragma weak PMPIX_Allgather_init_f08 = ompix_allgather_init_f -#else -OMPI_GENERATE_F77_BINDINGS (PMPIX_ALLGATHER_INIT, - pmpix_allgather_init, - pmpix_allgather_init_, - pmpix_allgather_init__, - pompix_allgather_init_f, - (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), - (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) -#endif -#endif - -#if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_ALLGATHER_INIT = ompix_allgather_init_f -#pragma weak mpix_allgather_init = ompix_allgather_init_f -#pragma weak mpix_allgather_init_ = ompix_allgather_init_f -#pragma weak mpix_allgather_init__ = ompix_allgather_init_f - -#pragma weak MPIX_Allgather_init_f = ompix_allgather_init_f -#pragma weak MPIX_Allgather_init_f08 = ompix_allgather_init_f -#else -#if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_ALLGATHER_INIT, - mpix_allgather_init, - mpix_allgather_init_, - mpix_allgather_init__, - ompix_allgather_init_f, - (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), - (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) -#else -#define ompix_allgather_init_f pompix_allgather_init_f -#endif -#endif - - -void ompix_allgather_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, - MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) -{ - int ierr_c; - MPI_Comm c_comm; - MPI_Request c_req; - MPI_Datatype c_sendtype, c_recvtype; - MPI_Info c_info; - - c_comm = PMPI_Comm_f2c(*comm); - c_sendtype = PMPI_Type_f2c(*sendtype); - c_recvtype = PMPI_Type_f2c(*recvtype); - c_info = PMPI_Info_f2c(*info); - - sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); - sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); - recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - - ierr_c = PMPIX_Allgather_init(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_FINT_2_INT(*recvcount), - c_recvtype, c_comm, c_info, &c_req); - - if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); - - if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_req); -} diff --git a/ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_mpifh.c b/ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_mpifh.c deleted file mode 100644 index 36db262fad7..00000000000 --- a/ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_mpifh.c +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -void mpiext_pcollreq_mpifh_dummy(void); - -void mpiext_pcollreq_mpifh_dummy() { -} diff --git a/ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_mpifh.h b/ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_mpifh.h deleted file mode 100644 index 95e9b644c6f..00000000000 --- a/ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_mpifh.h +++ /dev/null @@ -1,15 +0,0 @@ -! -*- fortran -*- -! -! Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. -! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -! Since the OMPI mpif.h interface does not prototype subroutines, do not -! declare any subroutines here. diff --git a/ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h b/ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h deleted file mode 100644 index 811a94b82e2..00000000000 --- a/ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Inria. All rights reserved. - * Copyright (c) 2011-2013 Universite Bordeaux 1 - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2016-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * This file prototypes all MPI fortran functions in all four fortran - * symbol conventions as well as all the internal real OMPI wrapper - * functions (different from any of the four fortran symbol - * conventions for clarity, at the cost of more typing for me...). - * This file is included in the top-level build ONLY. The prototyping - * is done ONLY for MPI_* bindings - * - * Zeroth, the OMPI wrapper functions, with a ompi_ prefix and _f - * suffix. - * - * This is needed ONLY if the lower-level prototypes_pmpi.h has not - * already been included. - * - * Note about function pointers: all function pointers are prototyped - * here as (void*) rather than including the .h file that defines the - * proper type (e.g., "op/op.h" defines ompi_op_fortran_handler_fn_t, - * which is the function pointer type for fortran op callback - * functions). This is because there is no type checking coming in - * from fortran, so why bother? Also, including "op/op.h" (and - * friends) makes the all the f77 bindings files dependant on these - * files -- any change to any one of them will cause the recompilation - * of the entire set of f77 bindings (ugh!). - */ - -#include "ompi_config.h" -#include "ompi/errhandler/errhandler.h" -#include "ompi/attribute/attribute.h" -#include "ompi/op/op.h" -#include "ompi/request/grequest.h" -#include "ompi/mpi/fortran/base/datarep.h" - -#include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" - -BEGIN_C_DECLS - -/* These are the prototypes for the "real" back-end fortran functions. */ -#define PN2(ret, mixed_name, lower_name, upper_name, args) \ - /* Prototype the actual OMPI function */ \ - OMPI_DECLSPEC ret o##lower_name##_f args; \ - /* Prototype the 4 versions of the MPI mpif.h name */ \ - OMPI_DECLSPEC ret lower_name args; \ - OMPI_DECLSPEC ret lower_name##_ args; \ - OMPI_DECLSPEC ret lower_name##__ args; \ - OMPI_DECLSPEC ret upper_name args; \ - /* Prototype the use mpi/use mpi_f08 names */ \ - OMPI_DECLSPEC ret mixed_name##_f08 args; \ - OMPI_DECLSPEC ret mixed_name##_f args; \ - /* Prototype the actual POMPI function */ \ - OMPI_DECLSPEC ret po##lower_name##_f args; \ - /* Prototype the 4 versions of the PMPI mpif.h name */ \ - OMPI_DECLSPEC ret p##lower_name args; \ - OMPI_DECLSPEC ret p##lower_name##_ args; \ - OMPI_DECLSPEC ret p##lower_name##__ args; \ - OMPI_DECLSPEC ret P##upper_name args; \ - /* Prototype the use mpi/use mpi_f08 PMPI names */ \ - OMPI_DECLSPEC ret P##mixed_name##_f08 args; \ - OMPI_DECLSPEC ret P##mixed_name##_f args - -PN2(void, MPIX_Allgather_init, mpix_allgather_init, MPIX_ALLGATHER_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Allgatherv_init, mpix_allgatherv_init, MPIX_ALLGATHERV_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -END_C_DECLS -PN2(void, MPIX_Allreduce_init, mpix_allreduce_init, MPIX_ALLREDUCE_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Alltoall_init, mpix_alltoall_init, MPIX_ALLTOALL_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Alltoallv_init, mpix_alltoallv_init, MPIX_ALLTOALLV_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Alltoallw_init, mpix_alltoallw_init, MPIX_ALLTOALLW_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Barrier_init, mpix_barrier_init, MPIX_BARRIER_INIT, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Bcast_init, mpix_bcast_init, MPIX_BCAST_INIT, (char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Exscan_init, mpix_exscan_init, MPIX_EXSCAN_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Gather_init, mpix_gather_init, MPIX_GATHER_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Gatherv_init, mpix_gatherv_init, MPIX_GATHERV_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Reduce_init, mpix_reduce_init, MPIX_REDUCE_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Reduce_scatter_init, mpix_reduce_scatter_init, MPIX_REDUCE_SCATTER_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Reduce_scatter_block_init, mpix_reduce_scatter_block_init, MPIX_REDUCE_SCATTER_BLOCK_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Scan_init, mpix_scan_init, MPIX_SCAN_INIT, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Scatter_init, mpix_scatter_init, MPIX_SCATTER_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Scatterv_init, mpix_scatterv_init, MPIX_SCATTERV_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Neighbor_allgather_init, mpix_neighbor_allgather_init, MPIX_NEIGHBOR_ALLGATHER_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Neighbor_allgatherv_init, mpix_neighbor_allgatherv_init, MPIX_NEIGHBOR_ALLGATHERV_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Neighbor_alltoall_init, mpix_neighbor_alltoall_init, MPIX_NEIGHBOR_ALLTOALL_INIT, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Neighbor_alltoallv_init, mpix_neighbor_alltoallv_init, MPIX_NEIGHBOR_ALLTOALLV_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); -PN2(void, MPIX_Neighbor_alltoallw_init, mpix_neighbor_alltoallw_init, MPIX_NEIGHBOR_ALLTOALLW_INIT, (char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Aint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr)); diff --git a/ompi/mpiext/pcollreq/mpif-h/neighbor_allgather_init_f.c b/ompi/mpiext/pcollreq/mpif-h/neighbor_allgather_init_f.c deleted file mode 100644 index 52ca6db627f..00000000000 --- a/ompi/mpiext/pcollreq/mpif-h/neighbor_allgather_init_f.c +++ /dev/null @@ -1,103 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" - -#if OMPI_BUILD_MPI_PROFILING -#if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak PMPIX_NEIGHBOR_ALLGATHER_INIT = ompix_neighbor_allgather_init_f -#pragma weak pmpix_neighbor_allgather_init = ompix_neighbor_allgather_init_f -#pragma weak pmpix_neighbor_allgather_init_ = ompix_neighbor_allgather_init_f -#pragma weak pmpix_neighbor_allgather_init__ = ompix_neighbor_allgather_init_f - -#pragma weak PMPIX_Neighbor_allgather_init_f = ompix_neighbor_allgather_init_f -#pragma weak PMPIX_Neighbor_allgather_init_f08 = ompix_neighbor_allgather_init_f -#else -OMPI_GENERATE_F77_BINDINGS (PMPIX_NEIGHBOR_ALLGATHER_INIT, - pmpix_neighbor_allgather_init, - pmpix_neighbor_allgather_init_, - pmpix_neighbor_allgather_init__, - pompix_neighbor_allgather_init_f, - (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), - (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) -#endif -#endif - -#if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPIX_NEIGHBOR_ALLGATHER_INIT = ompix_neighbor_allgather_init_f -#pragma weak mpix_neighbor_allgather_init = ompix_neighbor_allgather_init_f -#pragma weak mpix_neighbor_allgather_init_ = ompix_neighbor_allgather_init_f -#pragma weak mpix_neighbor_allgather_init__ = ompix_neighbor_allgather_init_f - -#pragma weak MPIX_Neighbor_allgather_init_f = ompix_neighbor_allgather_init_f -#pragma weak MPIX_Neighbor_allgather_init_f08 = ompix_neighbor_allgather_init_f -#else -#if ! OMPI_BUILD_MPI_PROFILING -OMPI_GENERATE_F77_BINDINGS (MPIX_NEIGHBOR_ALLGATHER_INIT, - mpix_neighbor_allgather_init, - mpix_neighbor_allgather_init_, - mpix_neighbor_allgather_init__, - ompix_neighbor_allgather_init_f, - (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr), - (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, ierr) ) -#else -#define ompix_neighbor_allgather_init_f pompix_neighbor_allgather_init_f -#endif -#endif - - -void ompix_neighbor_allgather_init_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, - MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) -{ - int ierr_c; - MPI_Comm c_comm; - MPI_Info c_info; - MPI_Request c_req; - MPI_Datatype c_sendtype, c_recvtype; - - c_comm = PMPI_Comm_f2c(*comm); - c_sendtype = PMPI_Type_f2c(*sendtype); - c_recvtype = PMPI_Type_f2c(*recvtype); - c_info = PMPI_Info_f2c(*info); - - sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); - sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); - recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - - ierr_c = PMPIX_Neighbor_allgather_init(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_FINT_2_INT(*recvcount), - c_recvtype, c_comm, c_info, &c_req); - - if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); - - if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_req); -} diff --git a/ompi/mpiext/pcollreq/mpif-h/profile/Makefile.am b/ompi/mpiext/pcollreq/mpif-h/profile/Makefile.am deleted file mode 100644 index 235dd1ed5e5..00000000000 --- a/ompi/mpiext/pcollreq/mpif-h/profile/Makefile.am +++ /dev/null @@ -1,80 +0,0 @@ -# -# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2018 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This file builds the mpif.h-based bindings for MPI extensions. It -# is optional in MPI extensions. - -# We must set these #defines so that the inner OMPI MPI prototype -# header files do the Right Thing. -AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=1 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 - -# Note that the mpif.h-based bindings are optional -- they can only be -# built if OMPI is also building the Fortran-based bindings. So we -# initially set some Makefile macros to empty, and then conditionally -# add to them later. -noinst_LTLIBRARIES = - -# Directory where the header file to be included in mpif-ext.h must be -# installed. -ompidir = $(ompiincludedir)/ompi/mpiext/pcollreq/mpif-h - -# Just like noinst_LTLIBRARIES, set this macro to empty and -# conditionally add to it later. -ompi_HEADERS = - -# Use the Automake conditional to know if we're building the mpif.h -# bindings. -if OMPI_BUILD_FORTRAN_MPIFH_BINDINGS - -# If we are, build the convenience libtool library that will be -# slurped up into libmpi_mpifh.la. -noinst_LTLIBRARIES += libpmpiext_pcollreq_mpifh.la - -# Sources for the convenience libtool library. -nodist_libpmpiext_pcollreq_mpifh_la_SOURCES = \ - pallgather_init_f.c \ - pallgatherv_init_f.c \ - pallreduce_init_f.c \ - palltoall_init_f.c \ - palltoallv_init_f.c \ - palltoallw_init_f.c \ - pbarrier_init_f.c \ - pbcast_init_f.c \ - pexscan_init_f.c \ - pgather_init_f.c \ - pgatherv_init_f.c \ - preduce_init_f.c \ - preduce_scatter_init_f.c \ - preduce_scatter_block_init_f.c \ - pscan_init_f.c \ - pscatter_init_f.c \ - pscatterv_init_f.c \ - \ - pneighbor_allgather_init_f.c \ - pneighbor_allgatherv_init_f.c \ - pneighbor_alltoall_init_f.c \ - pneighbor_alltoallv_init_f.c \ - pneighbor_alltoallw_init_f.c - -# -# Sym link in the sources from the real MPI directory -# -$(nodist_libpmpiext_pcollreq_mpifh_la_SOURCES): - $(OMPI_V_LN_S) if test ! -r $@ ; then \ - pname=`echo $@ | cut -b '2-'` ; \ - $(LN_S) $(top_srcdir)/ompi/mpiext/pcollreq/mpif-h/$$pname $@ ; \ - fi - -endif diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/Makefile.am b/ompi/mpiext/pcollreq/use-mpi-f08/Makefile.am deleted file mode 100644 index b03a78e610f..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/Makefile.am +++ /dev/null @@ -1,115 +0,0 @@ -# -# Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017-2020 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This file builds the use_mpi_f08-based bindings for MPI extensions. It -# is optional in MPI extensions. - -# Note that Automake's Fortran-buidling rules uses CPPFLAGS and -# AM_CPPFLAGS. This can cause weirdness (e.g., -# https://github.com/open-mpi/ompi/issues/7253). Let's just zero -# those out and rely on AM_FCFLAGS. -CPPFLAGS = -AM_CPPFLAGS = - -# We must set these #defines and include paths so that the inner OMPI -# MPI prototype header files do the Right Thing. -AM_FCFLAGS = $(OMPI_FC_MODULE_FLAG)$(top_builddir)/ompi/mpi/fortran/use-mpi \ - $(OMPI_FC_MODULE_FLAG)$(top_builddir)/ompi/mpi/fortran/use-mpi-f08/mod \ - -I$(top_builddir) -I$(top_srcdir) $(FCFLAGS_f90) - -# Note that the mpi_f08-based bindings are optional -- they can only -# be built if OMPI is also building the Fortran-based bindings. So we -# initially set some Makefile macros to empty, and then conditionally -# add to them later. -noinst_LTLIBRARIES = - -# Use the Automake conditional to know if we're building the mpif.h -# bindings. -if OMPI_BUILD_FORTRAN_USEMPIF08_BINDINGS - -# If we are, build the convenience libtool library that will be -# slurped up into libmpi_usempif08.la. -noinst_LTLIBRARIES += libmpiext_pcollreq_usempif08.la - -# Note that no header files are installed; instead, -# mpiext_pcollreq_usempif08.h is automatically slurped up into the -# mpi_f08_ext module. It must be listed so that it is included in -# dist tarballs. -noinst_HEADERS = mpiext_pcollreq_usempif08.h - -mpi_api_files = \ - allgather_init_f08.F90 \ - allgatherv_init_f08.F90 \ - allreduce_init_f08.F90 \ - alltoall_init_f08.F90 \ - alltoallv_init_f08.F90 \ - alltoallw_init_f08.F90 \ - barrier_init_f08.F90 \ - bcast_init_f08.F90 \ - exscan_init_f08.F90 \ - gather_init_f08.F90 \ - gatherv_init_f08.F90 \ - reduce_init_f08.F90 \ - reduce_scatter_block_init_f08.F90 \ - reduce_scatter_init_f08.F90 \ - scan_init_f08.F90 \ - scatter_init_f08.F90 \ - scatterv_init_f08.F90 \ - \ - neighbor_allgather_init_f08.F90 \ - neighbor_allgatherv_init_f08.F90 \ - neighbor_alltoall_init_f08.F90 \ - neighbor_alltoallv_init_f08.F90 \ - neighbor_alltoallw_init_f08.F90 - -pmpi_api_files = \ - profile/pallgather_init_f08.F90 \ - profile/pallgatherv_init_f08.F90 \ - profile/pallreduce_init_f08.F90 \ - profile/palltoall_init_f08.F90 \ - profile/palltoallv_init_f08.F90 \ - profile/palltoallw_init_f08.F90 \ - profile/pbarrier_init_f08.F90 \ - profile/pbcast_init_f08.F90 \ - profile/pexscan_init_f08.F90 \ - profile/pgather_init_f08.F90 \ - profile/pgatherv_init_f08.F90 \ - profile/preduce_init_f08.F90 \ - profile/preduce_scatter_block_init_f08.F90 \ - profile/preduce_scatter_init_f08.F90 \ - profile/pscan_init_f08.F90 \ - profile/pscatter_init_f08.F90 \ - profile/pscatterv_init_f08.F90 \ - \ - profile/pneighbor_allgather_init_f08.F90 \ - profile/pneighbor_allgatherv_init_f08.F90 \ - profile/pneighbor_alltoall_init_f08.F90 \ - profile/pneighbor_alltoallv_init_f08.F90 \ - profile/pneighbor_alltoallw_init_f08.F90 - -mpi_api_lo_files = $(mpi_api_files:.F90=.lo) -pmpi_api_lo_files = $(pmpi_api_files:.F90=.lo) - -$(mpi_api_lo_files): mpiext_pcollreq_f08.lo -$(pmpi_api_lo_files): mpiext_pcollreq_f08.lo - -# Sources for the convenience libtool library. -libmpiext_pcollreq_usempif08_la_SOURCES = \ - mpiext_pcollreq_f08.F90 \ - $(mpi_api_files) \ - $(pmpi_api_files) - -# Remove the intermediate module file -distclean-local: - rm -f mpiext_pcollreq_f08.mod - -endif diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_f08.F90 deleted file mode 100644 index f457c640f90..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_f08.F90 +++ /dev/null @@ -1,346 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -! University Research and Technology -! Corporation. All rights reserved. -! Copyright (c) 2004-2005 The University of Tennessee and The University -! of Tennessee Research Foundation. All rights -! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -! University of Stuttgart. All rights reserved. -! Copyright (c) 2004-2005 The Regents of the University of California. -! All rights reserved. -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2016-2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -module mpiext_pcollreq_f08 - - use mpi_f08_types - use mpi_f08_interfaces ! this module contains the mpi_f08 interface declarations - use pmpi_f08_interfaces ! this module contains the pmpi_f08 interface declarations - use mpi_f08_callbacks ! this module contains the mpi_f08 attribute callback subroutines - use mpi_f08_interfaces_callbacks ! this module contains the mpi_f08 callback interfaces - -interface - -subroutine ompix_allgather_init_f(sendbuf,sendcount,sendtype,recvbuf, & - recvcount,recvtype,comm,info,request,ierror) & - BIND(C, name="ompix_allgather_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount - INTEGER, INTENT(IN) :: sendtype - INTEGER, INTENT(IN) :: recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_allgather_init_f - -subroutine ompix_allgatherv_init_f(sendbuf,sendcount,sendtype,recvbuf, & - recvcounts,displs,recvtype,comm,info,request,ierror) & - BIND(C, name="ompix_allgatherv_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN) :: recvcounts(*), displs(*) - INTEGER, INTENT(IN) :: sendtype - INTEGER, INTENT(IN) :: recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_allgatherv_init_f - -subroutine ompix_allreduce_init_f(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) & - BIND(C, name="ompix_allreduce_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: count - INTEGER, INTENT(IN) :: datatype - INTEGER, INTENT(IN) :: op - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_allreduce_init_f - -subroutine ompix_alltoall_init_f(sendbuf,sendcount,sendtype,recvbuf, & - recvcount,recvtype,comm,info,request,ierror) & - BIND(C, name="ompix_alltoall_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount - INTEGER, INTENT(IN) :: sendtype - INTEGER, INTENT(IN) :: recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_alltoall_init_f - -subroutine ompix_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype, & - recvbuf,recvcounts,rdispls,recvtype,comm,info,request,ierror) & - BIND(C, name="ompix_alltoallv_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - INTEGER, INTENT(IN) :: sendtype - INTEGER, INTENT(IN) :: recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_alltoallv_init_f - -subroutine ompix_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes, & - recvbuf,recvcounts,rdispls,recvtypes,comm,info,request,ierror) & - BIND(C, name="ompix_alltoallw_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - INTEGER, INTENT(IN) :: sendtypes - INTEGER, INTENT(IN) :: recvtypes - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_alltoallw_init_f - -subroutine ompix_barrier_init_f(comm,info,request,ierror) & - BIND(C, name="ompix_barrier_init_f") - implicit none - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_barrier_init_f - -subroutine ompix_bcast_init_f(buffer,count,datatype,root,comm,info,request,ierror) & - BIND(C, name="ompix_bcast_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buffer - INTEGER, INTENT(IN) :: count, root - INTEGER, INTENT(IN) :: datatype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_bcast_init_f - -subroutine ompix_exscan_init_f(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) & - BIND(C, name="ompix_exscan_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: count - INTEGER, INTENT(IN) :: datatype - INTEGER, INTENT(IN) :: op - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_exscan_init_f - -subroutine ompix_gather_init_f(sendbuf,sendcount,sendtype,recvbuf, & - recvcount,recvtype,root,comm,info,request,ierror) & - BIND(C, name="ompix_gather_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount, root - INTEGER, INTENT(IN) :: sendtype - INTEGER, INTENT(IN) :: recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_gather_init_f - -subroutine ompix_gatherv_init_f(sendbuf,sendcount,sendtype,recvbuf, & - recvcounts,displs,recvtype,root,comm,info,request,ierror) & - BIND(C, name="ompix_gatherv_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: sendcount, root - INTEGER, INTENT(IN) :: recvcounts(*), displs(*) - INTEGER, INTENT(IN) :: sendtype - INTEGER, INTENT(IN) :: recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_gatherv_init_f - -subroutine ompix_reduce_init_f(sendbuf,recvbuf,count,datatype,op,root,comm,info,request,ierror) & - BIND(C, name="ompix_reduce_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: count, root - INTEGER, INTENT(IN) :: datatype - INTEGER, INTENT(IN) :: op - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_reduce_init_f - -subroutine ompix_reduce_scatter_init_f(sendbuf,recvbuf,recvcounts, & - datatype,op,comm,info,request,ierror) & - BIND(C, name="ompix_reduce_scatter_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: recvcounts(*) - INTEGER, INTENT(IN) :: datatype - INTEGER, INTENT(IN) :: op - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_reduce_scatter_init_f - -subroutine ompix_reduce_scatter_block_init_f(sendbuf,recvbuf,recvcount, & - datatype,op,comm,info,request,ierror) & - BIND(C, name="ompix_reduce_scatter_block_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: recvcount - INTEGER, INTENT(IN) :: datatype - INTEGER, INTENT(IN) :: op - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_reduce_scatter_block_init_f - -subroutine ompix_scan_init_f(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) & - BIND(C, name="ompix_scan_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: count - INTEGER, INTENT(IN) :: datatype - INTEGER, INTENT(IN) :: op - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_scan_init_f - -subroutine ompix_scatter_init_f(sendbuf,sendcount,sendtype,recvbuf, & - recvcount,recvtype,root,comm,info,request,ierror) & - BIND(C, name="ompix_scatter_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount, root - INTEGER, INTENT(IN) :: sendtype - INTEGER, INTENT(IN) :: recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_scatter_init_f - -subroutine ompix_scatterv_init_f(sendbuf,sendcounts,displs,sendtype, & - recvbuf,recvcount,recvtype,root,comm,info,request,ierror) & - BIND(C, name="ompix_scatterv_init_f") - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf, recvbuf - INTEGER, INTENT(IN) :: recvcount, root - INTEGER, INTENT(IN) :: sendcounts(*), displs(*) - INTEGER, INTENT(IN) :: sendtype - INTEGER, INTENT(IN) :: recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_scatterv_init_f - -subroutine ompix_neighbor_allgather_init_f(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & - comm,info,request,ierror) & - BIND(C, name="ompix_neighbor_allgather_init_f") - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount - INTEGER, INTENT(IN) :: sendtype, recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_neighbor_allgather_init_f - -subroutine ompix_neighbor_allgatherv_init_f(sendbuf,sendcount,sendtype,recvbuf,recvcounts,displs, & - recvtype,comm,info,request,ierror) & - BIND(C, name="ompix_neighbor_allgatherv_init_f") - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN) :: recvcounts(*), displs(*) - INTEGER, INTENT(IN) :: sendtype, recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_neighbor_allgatherv_init_f - -subroutine ompix_neighbor_alltoall_init_f(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype, & - comm,info,request,ierror) & - BIND(C, name="ompix_neighbor_alltoall_init_f") - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount - INTEGER, INTENT(IN) :: sendtype, recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_neighbor_alltoall_init_f - -subroutine ompix_neighbor_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype,recvbuf,recvcounts, & - rdispls,recvtype,comm,info,request,ierror) & - BIND(C, name="ompix_neighbor_alltoallv_init_f") - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - INTEGER, INTENT(IN) :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - INTEGER, INTENT(IN) :: sendtype, recvtype - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_neighbor_alltoallv_init_f - -subroutine ompix_neighbor_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes,recvbuf,recvcounts, & - rdispls,recvtypes,comm,info,request,ierror) & - BIND(C, name="ompix_neighbor_alltoallw_init_f") - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Request, MPI_ADDRESS_KIND - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - INTEGER, INTENT(IN) :: sendcounts(*), recvcounts(*) - INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: sdispls(*), rdispls(*) - INTEGER, INTENT(IN) :: sendtypes, recvtypes - INTEGER, INTENT(IN) :: comm - INTEGER, INTENT(IN) :: info - INTEGER, INTENT(OUT) :: request - INTEGER, INTENT(OUT) :: ierror -end subroutine ompix_neighbor_alltoallw_init_f - -end interface - -end module mpiext_pcollreq_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_usempif08.h b/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_usempif08.h deleted file mode 100644 index 33e3556cce7..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_usempif08.h +++ /dev/null @@ -1,986 +0,0 @@ -! -*- fortran -*- -! -! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -! This whole file will be included in the mpi_f08_ext module interface -! section. Note that the extension's mpif.h file will be included -! first, so there's no need to re-define anything that's in there (e.g., -! OMPI_EXAMPLE_GLOBAL). - -! Declare any interfaces, subroutines, and global variables/constants -! here. Note that the mpiext_example_mpif.h will automatically be -! included before this, so anything declared there does not need to be -! replicated here. - -interface mpix_allgather_init - subroutine mpix_allgather_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_allgather_init_f08 -end interface mpix_allgather_init - -interface mpix_allgatherv_init - subroutine mpix_allgatherv_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in), asynchronous :: recvcounts(*), displs(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_allgatherv_init_f08 -end interface mpix_allgatherv_init - -interface mpix_allreduce_init - subroutine mpix_allreduce_init_f08(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: count - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_allreduce_init_f08 -end interface mpix_allreduce_init - -interface mpix_alltoall_init - subroutine mpix_alltoall_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_alltoall_init_f08 -end interface mpix_alltoall_init - -interface mpix_alltoallv_init - subroutine mpix_alltoallv_init_f08(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_alltoallv_init_f08 -end interface mpix_alltoallv_init - -interface mpix_alltoallw_init - subroutine mpix_alltoallw_init_f08(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - type(mpi_datatype), intent(in), asynchronous :: sendtypes(*), recvtypes(*) - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_alltoallw_init_f08 -end interface mpix_alltoallw_init - -interface mpix_barrier_init - subroutine mpix_barrier_init_f08(comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_comm, mpi_info, mpi_request - implicit none - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_barrier_init_f08 -end interface mpix_barrier_init - -interface mpix_bcast_init - subroutine mpix_bcast_init_f08(buffer, count, datatype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: buffer - !GCC$ ATTRIBUTES NO_ARG_CHECK :: buffer - !$PRAGMA IGNORE_TKR buffer - !DIR$ IGNORE_TKR buffer - !IBM* IGNORE_TKR buffer - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: buffer - integer, intent(in) :: count, root - type(mpi_datatype), intent(in) :: datatype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_bcast_init_f08 -end interface mpix_bcast_init - -interface mpix_exscan_init - subroutine mpix_exscan_init_f08(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: count - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_exscan_init_f08 -end interface mpix_exscan_init - -interface mpix_gather_init - subroutine mpix_gather_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount, root - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_gather_init_f08 -end interface mpix_gather_init - -interface mpix_gatherv_init - subroutine mpix_gatherv_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, root - integer, intent(in), asynchronous :: recvcounts(*), displs(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_gatherv_init_f08 -end interface mpix_gatherv_init - -interface mpix_reduce_init - subroutine mpix_reduce_init_f08(sendbuf, recvbuf, count, & - datatype, op, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: count, root - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_reduce_init_f08 -end interface mpix_reduce_init - -interface mpix_reduce_scatter_init - subroutine mpix_reduce_scatter_init_f08(sendbuf, recvbuf, recvcounts, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: recvcounts(*) - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_reduce_scatter_init_f08 -end interface mpix_reduce_scatter_init - -interface mpix_reduce_scatter_block_init - subroutine mpix_reduce_scatter_block_init_f08(sendbuf, recvbuf, recvcount, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: recvcount - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_reduce_scatter_block_init_f08 -end interface mpix_reduce_scatter_block_init - -interface mpix_scan_init - subroutine mpix_scan_init_f08(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: count - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_scan_init_f08 -end interface mpix_scan_init - -interface mpix_scatter_init - subroutine mpix_scatter_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount, root - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_scatter_init_f08 -end interface mpix_scatter_init - -interface mpix_scatterv_init - subroutine mpix_scatterv_init_f08(sendbuf, sendcounts, displs, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: recvcount, root - integer, intent(in), asynchronous :: sendcounts(*), displs(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_scatterv_init_f08 -end interface mpix_scatterv_init - -interface mpix_neighbor_allgather_init - subroutine mpix_neighbor_allgather_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_allgather_init_f08 -end interface mpix_neighbor_allgather_init - -interface mpix_neighbor_allgatherv_init - subroutine mpix_neighbor_allgatherv_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in), asynchronous :: recvcounts(*), displs(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_allgatherv_init_f08 -end interface mpix_neighbor_allgatherv_init - -interface mpix_neighbor_alltoall_init - subroutine mpix_neighbor_alltoall_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_alltoall_init_f08 -end interface mpix_neighbor_alltoall_init - -interface mpix_neighbor_alltoallv_init - subroutine mpix_neighbor_alltoallv_init_f08(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_alltoallv_init_f08 -end interface mpix_neighbor_alltoallv_init - -interface mpix_neighbor_alltoallw_init - subroutine mpix_neighbor_alltoallw_init_f08(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_address_kind, mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: sendcounts(*), recvcounts(*) - integer(mpi_address_kind), intent(in), asynchronous :: sdispls(*), rdispls(*) - type(mpi_datatype), intent(in), asynchronous :: sendtypes(*), recvtypes(*) - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_alltoallw_init_f08 -end interface mpix_neighbor_alltoallw_init - -interface pmpix_allgather_init - subroutine pmpix_allgather_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_allgather_init_f08 -end interface pmpix_allgather_init - -interface pmpix_allgatherv_init - subroutine pmpix_allgatherv_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in), asynchronous :: recvcounts(*), displs(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_allgatherv_init_f08 -end interface pmpix_allgatherv_init - -interface pmpix_allreduce_init - subroutine pmpix_allreduce_init_f08(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: count - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_allreduce_init_f08 -end interface pmpix_allreduce_init - -interface pmpix_alltoall_init - subroutine pmpix_alltoall_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_alltoall_init_f08 -end interface pmpix_alltoall_init - -interface pmpix_alltoallv_init - subroutine pmpix_alltoallv_init_f08(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_alltoallv_init_f08 -end interface pmpix_alltoallv_init - -interface pmpix_alltoallw_init - subroutine pmpix_alltoallw_init_f08(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - type(mpi_datatype), intent(in), asynchronous :: sendtypes(*), recvtypes(*) - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_alltoallw_init_f08 -end interface pmpix_alltoallw_init - -interface pmpix_barrier_init - subroutine pmpix_barrier_init_f08(comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_comm, mpi_info, mpi_request - implicit none - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_barrier_init_f08 -end interface pmpix_barrier_init - -interface pmpix_bcast_init - subroutine pmpix_bcast_init_f08(buffer, count, datatype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: buffer - !GCC$ ATTRIBUTES NO_ARG_CHECK :: buffer - !$PRAGMA IGNORE_TKR buffer - !DIR$ IGNORE_TKR buffer - !IBM* IGNORE_TKR buffer - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: buffer - integer, intent(in) :: count, root - type(mpi_datatype), intent(in) :: datatype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_bcast_init_f08 -end interface pmpix_bcast_init - -interface pmpix_exscan_init - subroutine pmpix_exscan_init_f08(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: count - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_exscan_init_f08 -end interface pmpix_exscan_init - -interface pmpix_gather_init - subroutine pmpix_gather_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount, root - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_gather_init_f08 -end interface pmpix_gather_init - -interface pmpix_gatherv_init - subroutine pmpix_gatherv_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, root - integer, intent(in) :: recvcounts(*), displs(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_gatherv_init_f08 -end interface pmpix_gatherv_init - -interface pmpix_reduce_init - subroutine pmpix_reduce_init_f08(sendbuf, recvbuf, count, & - datatype, op, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: count, root - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_reduce_init_f08 -end interface pmpix_reduce_init - -interface pmpix_reduce_scatter_init - subroutine pmpix_reduce_scatter_init_f08(sendbuf, recvbuf, recvcounts, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: recvcounts(*) - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_reduce_scatter_init_f08 -end interface pmpix_reduce_scatter_init - -interface pmpix_reduce_scatter_block_init - subroutine pmpix_reduce_scatter_block_init_f08(sendbuf, recvbuf, recvcount, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: recvcount - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_reduce_scatter_block_init_f08 -end interface pmpix_reduce_scatter_block_init - -interface pmpix_scan_init - subroutine pmpix_scan_init_f08(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: count - type(mpi_datatype), intent(in) :: datatype - type(mpi_op), intent(in) :: op - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_scan_init_f08 -end interface pmpix_scan_init - -interface pmpix_scatter_init - subroutine pmpix_scatter_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount, root - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_scatter_init_f08 -end interface pmpix_scatter_init - -interface pmpix_scatterv_init - subroutine pmpix_scatterv_init_f08(sendbuf, sendcounts, displs, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: recvcount, root - integer, intent(in), asynchronous :: sendcounts(*), displs(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_scatterv_init_f08 -end interface pmpix_scatterv_init - -interface pmpix_neighbor_allgather_init - subroutine pmpix_neighbor_allgather_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_allgather_init_f08 -end interface pmpix_neighbor_allgather_init - -interface pmpix_neighbor_allgatherv_init - subroutine pmpix_neighbor_allgatherv_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in), asynchronous :: recvcounts(*), displs(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_allgatherv_init_f08 -end interface pmpix_neighbor_allgatherv_init - -interface pmpix_neighbor_alltoall_init - subroutine pmpix_neighbor_alltoall_init_f08(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in) :: sendcount, recvcount - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_alltoall_init_f08 -end interface pmpix_neighbor_alltoall_init - -interface pmpix_neighbor_alltoallv_init - subroutine pmpix_neighbor_alltoallv_init_f08(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - type(mpi_datatype), intent(in) :: sendtype, recvtype - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_alltoallv_init_f08 -end interface pmpix_neighbor_alltoallv_init - -interface pmpix_neighbor_alltoallw_init - subroutine pmpix_neighbor_alltoallw_init_f08(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) - use :: mpi_f08_types, only : mpi_address_kind, mpi_datatype, mpi_comm, mpi_info, mpi_request - implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in), asynchronous :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, asynchronous :: recvbuf - integer, intent(in), asynchronous :: sendcounts(*), recvcounts(*) - integer(mpi_address_kind), intent(in), asynchronous :: sdispls(*), rdispls(*) - type(mpi_datatype), intent(in) :: sendtypes(*), recvtypes(*) - type(mpi_comm), intent(in) :: comm - type(mpi_info), intent(in) :: info - type(mpi_request), intent(out) :: request - integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_alltoallw_init_f08 -end interface pmpix_neighbor_alltoallw_init diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pallgather_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pallgather_init_f08.F90 deleted file mode 100644 index d9e1e1ffb79..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pallgather_init_f08.F90 +++ /dev/null @@ -1,32 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_allgather_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_allgather_init_f(sendbuf,sendcount,sendtype%MPI_VAL,& - recvbuf,recvcount,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Allgather_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pallgatherv_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pallgatherv_init_f08.F90 deleted file mode 100644 index d719fcfeb37..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pallgatherv_init_f08.F90 +++ /dev/null @@ -1,34 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& - displs,recvtype,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_allgatherv_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_allgatherv_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcounts,& - displs,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Allgatherv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pallreduce_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pallreduce_init_f08.F90 deleted file mode 100644 index e89a652c241..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pallreduce_init_f08.F90 +++ /dev/null @@ -1,32 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Allreduce_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_allreduce_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: count - TYPE(MPI_Datatype), INTENT(IN) :: datatype - TYPE(MPI_Op), INTENT(IN) :: op - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_allreduce_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& - op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Allreduce_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/palltoall_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/palltoall_init_f08.F90 deleted file mode 100644 index 6bf635c3bd4..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/palltoall_init_f08.F90 +++ /dev/null @@ -1,33 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,& - recvcount,recvtype,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_alltoall_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_alltoall_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,& - recvcount,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Alltoall_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/palltoallv_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/palltoallv_init_f08.F90 deleted file mode 100644 index 7b16e20d404..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/palltoallv_init_f08.F90 +++ /dev/null @@ -1,33 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,& - recvcounts,rdispls,recvtype,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_alltoallv_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype%MPI_VAL,& - recvbuf,recvcounts,rdispls,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Alltoallv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/palltoallw_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/palltoallw_init_f08.F90 deleted file mode 100644 index f2d108e5e0c..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/palltoallw_init_f08.F90 +++ /dev/null @@ -1,42 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Alltoallw_init_f08(sendbuf,sendcounts,sdispls,sendtypes,& - recvbuf,recvcounts,rdispls,recvtypes,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_alltoallw_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: sendtypes(*), recvtypes(*) - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - ! Note that we pass a scalar here for both the sendtypes and - ! recvtypes arguments, even though the real Alltoallw function - ! expects an array of integers. This is a hack: we know that - ! [send|recv]types(1)%MPI_VAL will pass the address of the first - ! integer in the array of Type(MPI_Datatype) derived types. And - ! since Type(MPI_Datatype) are exactly memory-equivalent to a - ! single INTEGER, passing the address of the first one is the same - ! as passing the address to an array of integers. To be clear: the - ! back-end ompi_alltoallw_f is expecting a pointer to an array of - ! integers. So it all works out (but is a hack :-\ ). - call ompix_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes(1)%MPI_VAL,& - recvbuf,recvcounts,rdispls,recvtypes(1)%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Alltoallw_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pbarrier_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pbarrier_init_f08.F90 deleted file mode 100644 index 71b12c20e8d..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pbarrier_init_f08.F90 +++ /dev/null @@ -1,23 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! $COPYRIGHT$ - -subroutine PMPIX_Barrier_init_f08(comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_barrier_init_f - implicit none - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_barrier_init_f(comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Barrier_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pbcast_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pbcast_init_f08.F90 deleted file mode 100644 index 8b0e83cb214..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pbcast_init_f08.F90 +++ /dev/null @@ -1,29 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Bcast_init_f08(buffer,count,datatype,root,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_bcast_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: buffer - INTEGER, INTENT(IN) :: count, root - TYPE(MPI_Datatype), INTENT(IN) :: datatype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_bcast_init_f(buffer,count,datatype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Bcast_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pexscan_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pexscan_init_f08.F90 deleted file mode 100644 index 0feb7fb45aa..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pexscan_init_f08.F90 +++ /dev/null @@ -1,32 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Exscan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_exscan_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: count - TYPE(MPI_Datatype), INTENT(IN) :: datatype - TYPE(MPI_Op), INTENT(IN) :: op - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_exscan_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& - op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Exscan_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pgather_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pgather_init_f08.F90 deleted file mode 100644 index 45b8ac796e2..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pgather_init_f08.F90 +++ /dev/null @@ -1,33 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Gather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,& - recvtype,root,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_gather_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount, root - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_gather_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcount,& - recvtype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Gather_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pgatherv_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pgatherv_init_f08.F90 deleted file mode 100644 index 42c7ad89722..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pgatherv_init_f08.F90 +++ /dev/null @@ -1,34 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Gatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& - displs,recvtype,root,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_gatherv_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: sendcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_gatherv_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcounts,& - displs,recvtype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Gatherv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_allgather_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_allgather_init_f08.F90 deleted file mode 100644 index c8a9caa8de7..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_allgather_init_f08.F90 +++ /dev/null @@ -1,32 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2013 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Neighbor_allgather_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_neighbor_allgather_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_neighbor_allgather_init_f(sendbuf,sendcount,sendtype%MPI_VAL,& - recvbuf,recvcount,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Neighbor_allgather_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_allgatherv_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_allgatherv_init_f08.F90 deleted file mode 100644 index 66ce3aefc15..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_allgatherv_init_f08.F90 +++ /dev/null @@ -1,34 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2013 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Neighbor_allgatherv_init_f08(sendbuf,sendcount,sendtype,recvbuf,recvcounts,& - displs,recvtype,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_neighbor_allgatherv_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: sendcount - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*), displs(*) - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_neighbor_allgatherv_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcounts,& - displs,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Neighbor_allgatherv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_alltoall_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_alltoall_init_f08.F90 deleted file mode 100644 index 76c7d045b30..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_alltoall_init_f08.F90 +++ /dev/null @@ -1,33 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2013 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Neighbor_alltoall_init_f08(sendbuf,sendcount,sendtype,recvbuf,& - recvcount,recvtype,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_neighbor_alltoall_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_neighbor_alltoall_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,& - recvcount,recvtype%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Neighbor_alltoall_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_alltoallv_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_alltoallv_init_f08.F90 deleted file mode 100644 index 47ca428d016..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_alltoallv_init_f08.F90 +++ /dev/null @@ -1,34 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2013 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Neighbor_alltoallv_init_f08(sendbuf,sendcounts,sdispls,sendtype,recvbuf,& - recvcounts,rdispls,recvtype,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_neighbor_alltoallv_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), sdispls(*), recvcounts(*), rdispls(*) - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_neighbor_alltoallv_init_f(sendbuf,sendcounts,sdispls,sendtype%MPI_VAL,& - recvbuf,recvcounts,rdispls,recvtype%MPI_VAL,& - comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Neighbor_alltoallv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_alltoallw_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_alltoallw_init_f08.F90 deleted file mode 100644 index bd323b00055..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pneighbor_alltoallw_init_f08.F90 +++ /dev/null @@ -1,34 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2013 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Neighbor_alltoallw_init_f08(sendbuf,sendcounts,sdispls,sendtypes,& - recvbuf,recvcounts,rdispls,recvtypes,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request, MPI_ADDRESS_KIND - use :: mpiext_pcollreq_f08, only : ompix_neighbor_alltoallw_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), recvcounts(*) - INTEGER(MPI_ADDRESS_KIND), INTENT(IN), ASYNCHRONOUS :: sdispls(*), rdispls(*) - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: sendtypes(*) - TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: recvtypes(*) - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_neighbor_alltoallw_init_f(sendbuf,sendcounts,sdispls,sendtypes(1)%MPI_VAL,& - recvbuf,recvcounts,rdispls,recvtypes(1)%MPI_VAL,& - comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Neighbor_alltoallw_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/preduce_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/preduce_init_f08.F90 deleted file mode 100644 index ab70eed49e0..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/preduce_init_f08.F90 +++ /dev/null @@ -1,32 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Reduce_init_f08(sendbuf,recvbuf,count,datatype,op,root,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_reduce_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: count, root - TYPE(MPI_Datatype), INTENT(IN) :: datatype - TYPE(MPI_Op), INTENT(IN) :: op - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_reduce_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& - op%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Reduce_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/preduce_scatter_block_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/preduce_scatter_block_init_f08.F90 deleted file mode 100644 index eeb7ccac9ca..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/preduce_scatter_block_init_f08.F90 +++ /dev/null @@ -1,32 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Reduce_scatter_block_init_f08(sendbuf,recvbuf,recvcount,datatype,op,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_reduce_scatter_block_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: recvcount - TYPE(MPI_Datatype), INTENT(IN) :: datatype - TYPE(MPI_Op), INTENT(IN) :: op - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_reduce_scatter_block_init_f(sendbuf,recvbuf,recvcount,& - datatype%MPI_VAL,op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Reduce_scatter_block_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/preduce_scatter_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/preduce_scatter_init_f08.F90 deleted file mode 100644 index 791ffd4ac42..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/preduce_scatter_init_f08.F90 +++ /dev/null @@ -1,32 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Reduce_scatter_init_f08(sendbuf,recvbuf,recvcounts,datatype,op,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_reduce_scatter_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN), ASYNCHRONOUS :: recvcounts(*) - TYPE(MPI_Datatype), INTENT(IN) :: datatype - TYPE(MPI_Op), INTENT(IN) :: op - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_reduce_scatter_init_f(sendbuf,recvbuf,recvcounts,datatype%MPI_VAL,& - op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Reduce_scatter_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pscan_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pscan_init_f08.F90 deleted file mode 100644 index 668f0425397..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pscan_init_f08.F90 +++ /dev/null @@ -1,32 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Scan_init_f08(sendbuf,recvbuf,count,datatype,op,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Op, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_scan_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: count - TYPE(MPI_Datatype), INTENT(IN) :: datatype - TYPE(MPI_Op), INTENT(IN) :: op - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_scan_init_f(sendbuf,recvbuf,count,datatype%MPI_VAL,& - op%MPI_VAL,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Scan_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pscatter_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pscatter_init_f08.F90 deleted file mode 100644 index a02b807dd05..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pscatter_init_f08.F90 +++ /dev/null @@ -1,33 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Scatter_init_f08(sendbuf,sendcount,sendtype,recvbuf,& - recvcount,recvtype,root,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_scatter_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: sendcount, recvcount, root - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_scatter_init_f(sendbuf,sendcount,sendtype%MPI_VAL,recvbuf,recvcount,& - recvtype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Scatter_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pscatterv_init_f08.F90 b/ompi/mpiext/pcollreq/use-mpi-f08/profile/pscatterv_init_f08.F90 deleted file mode 100644 index 6d138816945..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi-f08/profile/pscatterv_init_f08.F90 +++ /dev/null @@ -1,34 +0,0 @@ -! -*- f90 -*- -! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! $COPYRIGHT$ - -#include "ompi/mpi/fortran/configure-fortran-output.h" - -subroutine PMPIX_Scatterv_init_f08(sendbuf,sendcounts,displs,sendtype,recvbuf,& - recvcount,recvtype,root,comm,info,request,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm, MPI_Info, MPI_Request - use :: mpiext_pcollreq_f08, only : ompix_scatterv_init_f - implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN), ASYNCHRONOUS :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, ASYNCHRONOUS :: recvbuf - INTEGER, INTENT(IN) :: recvcount, root - INTEGER, INTENT(IN), ASYNCHRONOUS :: sendcounts(*), displs(*) - TYPE(MPI_Datatype), INTENT(IN) :: sendtype - TYPE(MPI_Datatype), INTENT(IN) :: recvtype - TYPE(MPI_Comm), INTENT(IN) :: comm - TYPE(MPI_Info), INTENT(IN) :: info - TYPE(MPI_Request), INTENT(OUT) :: request - INTEGER, OPTIONAL, INTENT(OUT) :: ierror - integer :: c_ierror - - call ompix_scatterv_init_f(sendbuf,sendcounts,displs,sendtype%MPI_VAL,recvbuf,& - recvcount,recvtype%MPI_VAL,root,comm%MPI_VAL,info%MPI_VAL,request%MPI_VAL,c_ierror) - if (present(ierror)) ierror = c_ierror - -end subroutine PMPIX_Scatterv_init_f08 diff --git a/ompi/mpiext/pcollreq/use-mpi/Makefile.am b/ompi/mpiext/pcollreq/use-mpi/Makefile.am deleted file mode 100644 index 225ffa8d589..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -# -# Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# There's nothing to build or install for the "use mpi" bindings for -# this MPI extension. However, we must list the header file that will -# be slurped up to be part of the mpi_ext Fortran module so that it is -# included in dist tarballs. - -noinst_HEADERS = mpiext_pcollreq_usempi.h diff --git a/ompi/mpiext/pcollreq/use-mpi/mpiext_pcollreq_usempi.h b/ompi/mpiext/pcollreq/use-mpi/mpiext_pcollreq_usempi.h deleted file mode 100644 index f083470d180..00000000000 --- a/ompi/mpiext/pcollreq/use-mpi/mpiext_pcollreq_usempi.h +++ /dev/null @@ -1,983 +0,0 @@ -! -*- fortran -*- -! -! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. -! Copyright (c) 2018 Research Organization for Information Science -! and Technology (RIST). All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -! This whole file will be included in the mpi_ext module interface -! section. Note that the extension's mpif.h file will be included -! first, so there's no need to re-define anything that's in there (e.g., -! OMPI_EXAMPLE_GLOBAL). - -! Declare any interfaces, subroutines, and global variables/constants -! here. Note that the mpiext_example_mpif.h will automatically be -! included before this, so anything declared there does not need to be -! replicated here. - -interface mpix_allgather_init - subroutine mpix_allgather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_allgather_init -end interface mpix_allgather_init - -interface pmpix_allgather_init - subroutine pmpix_allgather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_allgather_init -end interface pmpix_allgather_init - -interface mpix_allgatherv_init - subroutine mpix_allgatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: displs - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_allgatherv_init -end interface mpix_allgatherv_init - -interface pmpix_allgatherv_init - subroutine pmpix_allgatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: displs - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_allgatherv_init -end interface pmpix_allgatherv_init - -interface mpix_allreduce_init - subroutine mpix_allreduce_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_allreduce_init -end interface mpix_allreduce_init - -interface pmpix_allreduce_init - subroutine pmpix_allreduce_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_allreduce_init -end interface pmpix_allreduce_init - -interface mpix_alltoall_init - subroutine mpix_alltoall_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_alltoall_init -end interface mpix_alltoall_init - -interface pmpix_alltoall_init - subroutine pmpix_alltoall_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_alltoall_init -end interface pmpix_alltoall_init - -interface mpix_alltoallv_init - subroutine mpix_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer, dimension(*), intent(in) :: sdispls - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: rdispls - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_alltoallv_init -end interface mpix_alltoallv_init - -interface pmpix_alltoallv_init - subroutine pmpix_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer, dimension(*), intent(in) :: sdispls - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: rdispls - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_alltoallv_init -end interface pmpix_alltoallv_init - -interface mpix_alltoallw_init - subroutine mpix_alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer, dimension(*), intent(in) :: sdispls - integer, dimension(*), intent(in) :: sendtypes - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: rdispls - integer, dimension(*), intent(in) :: recvtypes - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_alltoallw_init -end interface mpix_alltoallw_init - -interface pmpix_alltoallw_init - subroutine pmpix_alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer, dimension(*), intent(in) :: sdispls - integer, dimension(*), intent(in) :: sendtypes - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: rdispls - integer, dimension(*), intent(in) :: recvtypes - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_alltoallw_init -end interface pmpix_alltoallw_init - -interface mpix_barrier_init - subroutine mpix_barrier_init(comm, info, request, ierror) - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_barrier_init -end interface mpix_barrier_init - -interface pmpix_barrier_init - subroutine pmpix_barrier_init(comm, info, request, ierror) - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_barrier_init -end interface pmpix_barrier_init - -interface mpix_bcast_init - subroutine mpix_bcast_init(buffer, count, datatype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: buffer - !GCC$ ATTRIBUTES NO_ARG_CHECK :: buffer - !$PRAGMA IGNORE_TKR buffer - !DIR$ IGNORE_TKR buffer - !IBM* IGNORE_TKR buffer - OMPI_FORTRAN_IGNORE_TKR_TYPE :: buffer - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_bcast_init -end interface mpix_bcast_init - -interface pmpix_bcast_init - subroutine pmpix_bcast_init(buffer, count, datatype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: buffer - !GCC$ ATTRIBUTES NO_ARG_CHECK :: buffer - !$PRAGMA IGNORE_TKR buffer - !DIR$ IGNORE_TKR buffer - !IBM* IGNORE_TKR buffer - OMPI_FORTRAN_IGNORE_TKR_TYPE :: buffer - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_bcast_init -end interface pmpix_bcast_init - -interface mpix_exscan_init - subroutine mpix_exscan_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_exscan_init -end interface mpix_exscan_init - -interface pmpix_exscan_init - subroutine pmpix_exscan_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_exscan_init -end interface pmpix_exscan_init - -interface mpix_gather_init - subroutine mpix_gather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_gather_init -end interface mpix_gather_init - -interface pmpix_gather_init - subroutine pmpix_gather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_gather_init -end interface pmpix_gather_init - -interface mpix_gatherv_init - subroutine mpix_gatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: displs - integer, intent(in) :: recvtype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_gatherv_init -end interface mpix_gatherv_init - -interface pmpix_gatherv_init - subroutine pmpix_gatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: displs - integer, intent(in) :: recvtype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_gatherv_init -end interface pmpix_gatherv_init - -interface mpix_reduce_init - subroutine mpix_reduce_init(sendbuf, recvbuf, count, & - datatype, op, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_reduce_init -end interface mpix_reduce_init - -interface pmpix_reduce_init - subroutine pmpix_reduce_init(sendbuf, recvbuf, count, & - datatype, op, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_reduce_init -end interface pmpix_reduce_init - -interface mpix_reduce_scatter_init - subroutine mpix_reduce_scatter_init(sendbuf, recvbuf, recvcounts, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: recvcounts - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_reduce_scatter_init -end interface mpix_reduce_scatter_init - -interface pmpix_reduce_scatter_init - subroutine pmpix_reduce_scatter_init(sendbuf, recvbuf, recvcounts, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: recvcounts - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_reduce_scatter_init -end interface pmpix_reduce_scatter_init - -interface mpix_reduce_scatter_block_init - subroutine mpix_reduce_scatter_block_init(sendbuf, recvbuf, recvcount, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: recvcount - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_reduce_scatter_block_init -end interface mpix_reduce_scatter_block_init - -interface pmpix_reduce_scatter_block_init - subroutine pmpix_reduce_scatter_block_init(sendbuf, recvbuf, recvcount, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: recvcount - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_reduce_scatter_block_init -end interface pmpix_reduce_scatter_block_init - -interface mpix_scan_init - subroutine mpix_scan_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_scan_init -end interface mpix_scan_init - -interface pmpix_scan_init - subroutine pmpix_scan_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: op - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_scan_init -end interface pmpix_scan_init - -interface mpix_scatter_init - subroutine mpix_scatter_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_scatter_init -end interface mpix_scatter_init - -interface pmpix_scatter_init - subroutine pmpix_scatter_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_scatter_init -end interface pmpix_scatter_init - -interface mpix_scatterv_init - subroutine mpix_scatterv_init(sendbuf, sendcounts, displs, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer, dimension(*), intent(in) :: displs - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_scatterv_init -end interface mpix_scatterv_init - -interface pmpix_scatterv_init - subroutine pmpix_scatterv_init(sendbuf, sendcounts, displs, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer, dimension(*), intent(in) :: displs - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: root - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_scatterv_init -end interface pmpix_scatterv_init - -interface mpix_neighbor_allgather_init - subroutine mpix_neighbor_allgather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_neighbor_allgather_init -end interface mpix_neighbor_allgather_init - -interface pmpix_neighbor_allgather_init - subroutine pmpix_neighbor_allgather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_neighbor_allgather_init -end interface pmpix_neighbor_allgather_init - -interface mpix_neighbor_allgatherv_init - subroutine mpix_neighbor_allgatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: displs - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_neighbor_allgatherv_init -end interface mpix_neighbor_allgatherv_init - -interface pmpix_neighbor_allgatherv_init - subroutine pmpix_neighbor_allgatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: displs - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_neighbor_allgatherv_init -end interface pmpix_neighbor_allgatherv_init - -interface mpix_neighbor_alltoall_init - subroutine mpix_neighbor_alltoall_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_neighbor_alltoall_init -end interface mpix_neighbor_alltoall_init - -interface pmpix_neighbor_alltoall_init - subroutine pmpix_neighbor_alltoall_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, intent(in) :: sendcount - integer, intent(in) :: sendtype - integer, intent(in) :: recvcount - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_neighbor_alltoall_init -end interface pmpix_neighbor_alltoall_init - -interface mpix_neighbor_alltoallv_init - subroutine mpix_neighbor_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer, dimension(*), intent(in) :: sdispls - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: rdispls - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_neighbor_alltoallv_init -end interface mpix_neighbor_alltoallv_init - -interface pmpix_neighbor_alltoallv_init - subroutine pmpix_neighbor_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer, dimension(*), intent(in) :: sdispls - integer, intent(in) :: sendtype - integer, dimension(*), intent(in) :: recvcounts - integer, dimension(*), intent(in) :: rdispls - integer, intent(in) :: recvtype - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_neighbor_alltoallv_init -end interface pmpix_neighbor_alltoallv_init - -interface mpix_neighbor_alltoallw_init - subroutine mpix_neighbor_alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) - include 'mpif-config.h' - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer(kind=mpi_address_kind), dimension(*), intent(in) :: sdispls - integer, dimension(*), intent(in) :: sendtypes - integer, dimension(*), intent(in) :: recvcounts - integer(kind=mpi_address_kind), dimension(*), intent(in) :: rdispls - integer, dimension(*), intent(in) :: recvtypes - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine mpix_neighbor_alltoallw_init -end interface mpix_neighbor_alltoallw_init - -interface pmpix_neighbor_alltoallw_init - subroutine pmpix_neighbor_alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) - include 'mpif-config.h' - !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !GCC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf - !$PRAGMA IGNORE_TKR sendbuf, recvbuf - !DIR$ IGNORE_TKR sendbuf, recvbuf - !IBM* IGNORE_TKR sendbuf, recvbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE, intent(in) :: sendbuf - OMPI_FORTRAN_IGNORE_TKR_TYPE :: recvbuf - integer, dimension(*), intent(in) :: sendcounts - integer(kind=mpi_address_kind), dimension(*), intent(in) :: sdispls - integer, dimension(*), intent(in) :: sendtypes - integer, dimension(*), intent(in) :: recvcounts - integer(kind=mpi_address_kind), dimension(*), intent(in) :: rdispls - integer, dimension(*), intent(in) :: recvtypes - integer, intent(in) :: comm - integer, intent(in) :: info - integer, intent(out) :: request - integer, intent(out) :: ierror - end subroutine pmpix_neighbor_alltoallw_init -end interface pmpix_neighbor_alltoallw_init - diff --git a/ompi/op/op.c b/ompi/op/op.c index 1b547764c16..87634f42f72 100644 --- a/ompi/op/op.c +++ b/ompi/op/op.c @@ -17,6 +17,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +35,7 @@ #include "ompi/op/op.h" #include "ompi/mca/op/base/base.h" #include "ompi/datatype/ompi_datatype_internal.h" +#include "ompi/instance/instance.h" /* @@ -47,6 +50,7 @@ opal_pointer_array_t *ompi_op_f_to_c_table = {0}; static int add_intrinsic(ompi_op_t *op, int fort_handle, int flags, const char *name); +static int ompi_op_finalize (void); /* * Class information @@ -300,15 +304,23 @@ int ompi_op_init(void) ompi_mpi_op_replace.op.op_type = OMPI_OP_REPLACE; } + ompi_mpi_instance_append_finalize (ompi_op_finalize); + /* All done */ return OMPI_SUCCESS; } -/* - * Clean up the op resources + +/** + * Finalize the op interface. + * + * @returns OMPI_SUCCESS Always + * + * Invokes on instance teardown if ompi_op_init() was called; tears down the op interface, and + * destroys the F2C translation table. */ -int ompi_op_finalize(void) +static int ompi_op_finalize (void) { /* clean up the intrinsic ops */ OBJ_DESTRUCT(&ompi_mpi_op_no_op); diff --git a/ompi/op/op.h b/ompi/op/op.h index 2cddcc97bb2..4cb798acdb4 100644 --- a/ompi/op/op.h +++ b/ompi/op/op.h @@ -20,6 +20,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -326,16 +327,6 @@ extern struct opal_pointer_array_t *ompi_op_f_to_c_table; */ int ompi_op_init(void); -/** - * Finalize the op interface. - * - * @returns OMPI_SUCCESS Always - * - * Invokes from ompi_mpi_finalize(); tears down the op interface, and - * destroys the F2C translation table. - */ -int ompi_op_finalize(void); - /** * Create a ompi_op_t with a user-defined callback (vs. creating an * intrinsic ompi_op_t). @@ -510,10 +501,41 @@ static inline bool ompi_op_is_valid(ompi_op_t * op, ompi_datatype_t * ddt, * is not defined to have that operation, it is likely to seg fault. */ static inline void ompi_op_reduce(ompi_op_t * op, void *source, - void *target, int count, + void *target, size_t full_count, ompi_datatype_t * dtype) { MPI_Fint f_dtype, f_count; + int count = full_count; + + /* + * If the full_count is > INT_MAX then we need to call the reduction op + * in iterations of counts <= INT_MAX since it has an `int *len` + * parameter. + * + * Note: When we add BigCount support then we can distinguish between + * a reduction operation with `int *len` and `MPI_Count *len`. At which + * point we can avoid this loop. + */ + if( OPAL_UNLIKELY(full_count > INT_MAX) ) { + size_t done_count = 0, shift; + int iter_count; + ptrdiff_t ext, lb; + + ompi_datatype_get_extent(dtype, &lb, &ext); + + while(done_count < full_count) { + if(done_count + INT_MAX > full_count) { + iter_count = full_count - done_count; + } else { + iter_count = INT_MAX; + } + shift = done_count * ext; + // Recurse one level in iterations of 'int' + ompi_op_reduce(op, (char*)source + shift, (char*)target + shift, iter_count, dtype); + done_count += iter_count; + } + return; + } /* * Call the reduction function. Two dimensions: a) if both the op diff --git a/ompi/patterns/net/allreduce.c b/ompi/patterns/net/allreduce.c index be192255507..7442cd33f1e 100644 --- a/ompi/patterns/net/allreduce.c +++ b/ompi/patterns/net/allreduce.c @@ -5,6 +5,8 @@ * All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +36,7 @@ void recv_completion(nt status, struct ompi_process_name_t* peer, struct iovec* int count, ompi_rml_tag_t tag, void* cbdata) { /* set receive completion flag */ - MB(); + opal_atomic_mb(); *(int *)cbdata=1; } @@ -232,7 +234,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, send_buffer^=1; } - MB(); + opal_atomic_mb(); /* * Signal parent that data is ready */ @@ -255,7 +257,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, *recv_done=0; *send_done=0; - MB(); + opal_atomic_mb(); /* post non-blocking receive */ recv_iov.iov_base=scratch_bufers[send_buffer]; diff --git a/ompi/request/grequest.c b/ompi/request/grequest.c index 02affd642aa..a14f93fa6dc 100644 --- a/ompi/request/grequest.c +++ b/ompi/request/grequest.c @@ -9,8 +9,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -121,13 +122,20 @@ static void ompi_grequest_construct(ompi_grequest_t* greq) */ static void ompi_grequest_destruct(ompi_grequest_t* greq) { - MPI_Fint ierr; - if (greq->greq_free.c_free != NULL) { + /* We were already putting query_fn()'s return value into + * status.MPI_ERROR but for MPI_{Wait,Test}*. If there's a + * free callback to invoke, the standard says to use the + * return value from free_fn() callback, too. + */ if (greq->greq_funcs_are_c) { - greq->greq_free.c_free(greq->greq_state); + greq->greq_base.req_status.MPI_ERROR = + greq->greq_free.c_free(greq->greq_state); } else { + MPI_Fint ierr; greq->greq_free.f_free((MPI_Aint*)greq->greq_state, &ierr); + greq->greq_base.req_status.MPI_ERROR = + OMPI_FINT_2_INT(ierr); } } @@ -214,8 +222,21 @@ int ompi_grequest_invoke_query(ompi_request_t *request, if (g->greq_funcs_are_c) { rc = g->greq_query.c_query(g->greq_state, status); } else { + /* request->req_status.MPI_ERROR was initialized to success + * and it's meant to be unmodified in the case of callback + * success, and set when callbacks return a failure. But + * if we leave fstatus uninitialized this sets + * req_status.MPI_ERROR to whatever happened to be on the + * stack at fstatus (f_query isn't supposed to directly set + * its status.MPI_ERROR, according to the standard) + * + * So the Status_c2f below only really cares about transferring + * the MPI_ERROR setting into fstatus so that when it's transferred + * back in the f2c call, it has the starting value. + */ MPI_Fint ierr; MPI_Fint fstatus[sizeof(MPI_Status) / sizeof(int)]; + MPI_Status_c2f(status, fstatus); g->greq_query.f_query((MPI_Aint*)g->greq_state, fstatus, &ierr); MPI_Status_f2c(fstatus, status); rc = OMPI_FINT_2_INT(ierr); diff --git a/ompi/request/req_ft.c b/ompi/request/req_ft.c index c6dac235c2b..d287ee90fbf 100644 --- a/ompi/request/req_ft.c +++ b/ompi/request/req_ft.c @@ -13,6 +13,9 @@ * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,9 +94,9 @@ bool ompi_request_is_failed_fn(ompi_request_t *req) req->req_status.MPI_ERROR = MPI_ERR_REVOKED; opal_output_verbose(10, ompi_ftmpi_output_handle, - "%s ompi_request_is_failed: %p (peer %d, tag %d) is on communicator %s(%d) that has been revoked!", + "%s ompi_request_is_failed: %p (peer %d, tag %d) is on communicator %s(%s) that has been revoked!", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), (void*)req, pml_req->req_peer, pml_req->req_tag, - req->req_mpi_object.comm->c_name, req->req_mpi_object.comm->c_contextid); + req->req_mpi_object.comm->c_name, ompi_comm_print_cid(req->req_mpi_object.comm)); goto return_with_error; } @@ -129,9 +132,9 @@ bool ompi_request_is_failed_fn(ompi_request_t *req) req->req_status.MPI_ERROR = MPI_ERR_PROC_FAILED; } opal_output_verbose(10, ompi_ftmpi_output_handle, - "%s ompi_request_is_failed: Request %p (peer %d, tag %d) in comm %s(%d) peer ANY_SOURCE %s!", + "%s ompi_request_is_failed: Request %p (peer %d, tag %d) in comm %s(%s) peer ANY_SOURCE %s!", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), (void*)req, pml_req->req_peer, pml_req->req_tag, - req->req_mpi_object.comm->c_name, req->req_mpi_object.comm->c_contextid, + req->req_mpi_object.comm->c_name, ompi_comm_print_cid(req->req_mpi_object.comm), ompi_mpi_errnum_get_string(req->req_status.MPI_ERROR)); goto return_with_error; } @@ -144,9 +147,9 @@ bool ompi_request_is_failed_fn(ompi_request_t *req) req->req_status.MPI_ERROR = MPI_ERR_PROC_FAILED; assert(MPI_ANY_SOURCE != pml_req->req_peer); /* this case is handled above, so... */ opal_output_verbose(10, ompi_ftmpi_output_handle, - "%s ompi_request_is_failed: Request %p (peer %d, tag %d) in comm %s(%d) mpi_source %3d failed - Ret %s", + "%s ompi_request_is_failed: Request %p (peer %d, tag %d) in comm %s(%s) mpi_source %3d failed - Ret %s", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), (void*)req, pml_req->req_peer, pml_req->req_tag, - req->req_mpi_object.comm->c_name, req->req_mpi_object.comm->c_contextid, + req->req_mpi_object.comm->c_name, ompi_comm_print_cid(req->req_mpi_object.comm), req->req_status.MPI_SOURCE, ompi_mpi_errnum_get_string(req->req_status.MPI_ERROR)); goto return_with_error; diff --git a/ompi/request/req_test.c b/ompi/request/req_test.c index 8fc088ac9c1..cd04645a0c0 100644 --- a/ompi/request/req_test.c +++ b/ompi/request/req_test.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -35,13 +35,13 @@ int ompi_request_default_test(ompi_request_t ** rptr, #if OPAL_ENABLE_PROGRESS_THREADS == 0 int do_it_once = 0; - recheck_request_status: +recheck_request_status: #endif opal_atomic_mb(); if( request->req_state == OMPI_REQUEST_INACTIVE ) { *completed = true; if (MPI_STATUS_IGNORE != status) { - *status = ompi_status_empty; + OMPI_COPY_STATUS(status, ompi_status_empty, false); } return OMPI_SUCCESS; } @@ -53,17 +53,9 @@ int ompi_request_default_test(ompi_request_t ** rptr, STATUS_IGNORE. See MPI-2:8.2. */ if (OMPI_REQUEST_GEN == request->req_type) { ompi_grequest_invoke_query(request, &request->req_status); - if (MPI_STATUS_IGNORE != status) { - int old_error = status->MPI_ERROR; - *status = request->req_status; - status->MPI_ERROR = old_error; - } - } else if (MPI_STATUS_IGNORE != status) { - /* Do *NOT* set a new value for status->MPI_ERROR here! - See MPI-1.1 doc, sec 3.2.5, p.22 */ - int old_error = status->MPI_ERROR; - *status = request->req_status; - status->MPI_ERROR = old_error; + } + if (MPI_STATUS_IGNORE != status) { + OMPI_COPY_STATUS(status, request->req_status, false); } if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; @@ -94,9 +86,10 @@ int ompi_request_default_test(ompi_request_t ** rptr, * If we run the opal_progress then check the status of the request before * leaving. We will call the opal_progress only once per call. */ - opal_progress(); - do_it_once++; - goto recheck_request_status; + ++do_it_once; + if (0 != opal_progress()) { + goto recheck_request_status; + } } #endif *completed = false; @@ -134,19 +127,9 @@ int ompi_request_default_test_any( STATUS_IGNORE */ if (OMPI_REQUEST_GEN == request->req_type) { ompi_grequest_invoke_query(request, &request->req_status); - if (MPI_STATUS_IGNORE != status) { - /* Do *NOT* set a new value for status->MPI_ERROR - here! See MPI-1.1 doc, sec 3.2.5, p.22 */ - int old_error = status->MPI_ERROR; - *status = request->req_status; - status->MPI_ERROR = old_error; - } - } else if (MPI_STATUS_IGNORE != status) { - /* Do *NOT* set a new value for status->MPI_ERROR - here! See MPI-1.1 doc, sec 3.2.5, p.22 */ - int old_error = status->MPI_ERROR; - *status = request->req_status; - status->MPI_ERROR = old_error; + } + if (MPI_STATUS_IGNORE != status) { + OMPI_COPY_STATUS(status, request->req_status, false); } if( request->req_persistent ) { @@ -184,7 +167,7 @@ int ompi_request_default_test_any( } else { *completed = true; if (MPI_STATUS_IGNORE != status) { - *status = ompi_status_empty; + OMPI_COPY_STATUS(status, ompi_status_empty, false); } } return OMPI_SUCCESS; @@ -201,15 +184,15 @@ int ompi_request_default_test_all( ompi_request_t **rptr; size_t num_completed = 0; ompi_request_t *request; + int do_it_once = 0; opal_atomic_mb(); - rptr = requests; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; + for (i = 0; i < count; i++) { + request = requests[i]; - if( request->req_state == OMPI_REQUEST_INACTIVE || - REQUEST_COMPLETE(request) ) { + if( request->req_state == OMPI_REQUEST_INACTIVE || REQUEST_COMPLETE(request) ) { num_completed++; + continue; } #if OPAL_ENABLE_FT_MPI /* Check for dead requests due to process failure */ @@ -217,20 +200,29 @@ int ompi_request_default_test_all( if(OPAL_UNLIKELY( ompi_request_is_failed(request) && MPI_ERR_PROC_FAILED_PENDING == request->req_status.MPI_ERROR )) { if (MPI_STATUSES_IGNORE != statuses) { - statuses[i] = request->req_status; + OMPI_COPY_STATUS(&statuses[i], request->req_status, true); statuses[i].MPI_ERROR = MPI_ERR_PROC_FAILED_PENDING; } *completed = false; return MPI_ERR_PROC_FAILED_PENDING; } #endif /* OPAL_ENABLE_FT_MPI */ +#if OPAL_ENABLE_PROGRESS_THREADS == 0 + if (0 == do_it_once) { + ++do_it_once; + if (0 != opal_progress()) { + /* continue walking the list, retest the current request */ + --i; + continue; + } + } +#endif /* OPAL_ENABLE_PROGRESS_THREADS */ + /* short-circuit */ + break; } if (num_completed != count) { *completed = false; -#if OPAL_ENABLE_PROGRESS_THREADS == 0 - opal_progress(); -#endif return OMPI_SUCCESS; } @@ -246,13 +238,13 @@ int ompi_request_default_test_all( * to ompi_status_empty. */ if( request->req_state == OMPI_REQUEST_INACTIVE ) { - statuses[i] = ompi_status_empty; + OMPI_COPY_STATUS(&statuses[i], ompi_status_empty, true); continue; } if (OMPI_REQUEST_GEN == request->req_type) { ompi_grequest_invoke_query(request, &request->req_status); } - statuses[i] = request->req_status; + OMPI_COPY_STATUS(&statuses[i], request->req_status, true); if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; continue; @@ -336,6 +328,7 @@ int ompi_request_default_test_some( } if( REQUEST_COMPLETE(request) ) { indices[num_requests_done++] = i; + continue; } #if OPAL_ENABLE_FT_MPI /* Check for dead requests due to process failure */ @@ -372,7 +365,7 @@ int ompi_request_default_test_some( /* Special case for MPI_ANY_SOURCE */ if(OPAL_UNLIKELY( MPI_ERR_PROC_FAILED_PENDING == request->req_status.MPI_ERROR )) { if (MPI_STATUSES_IGNORE != statuses) { - statuses[i] = request->req_status; + OMPI_COPY_STATUS(&statuses[i], request->req_status, true); statuses[i].MPI_ERROR = MPI_ERR_PROC_FAILED_PENDING; } rc = MPI_ERR_PROC_FAILED_PENDING; @@ -387,7 +380,7 @@ int ompi_request_default_test_some( ompi_grequest_invoke_query(request, &request->req_status); } if (MPI_STATUSES_IGNORE != statuses) { - statuses[i] = request->req_status; + OMPI_COPY_STATUS(&statuses[i], request->req_status, true); } if (MPI_SUCCESS != request->req_status.MPI_ERROR) { diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index 1b46888b905..14a8dcbf134 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -43,10 +43,7 @@ int ompi_request_default_wait( /* Special case for MPI_ANY_SOURCE */ if( MPI_ERR_PROC_FAILED_PENDING == req->req_status.MPI_ERROR ) { if( MPI_STATUS_IGNORE != status ) { - status->MPI_TAG = req->req_status.MPI_TAG; - status->MPI_SOURCE = req->req_status.MPI_SOURCE; - status->_ucount = req->req_status._ucount; - status->_cancelled = req->req_status._cancelled; + OMPI_COPY_STATUS(status, req->req_status, false); } return MPI_ERR_PROC_FAILED_PENDING; } @@ -59,17 +56,12 @@ int ompi_request_default_wait( ompi_grequest_invoke_query(req, &req->req_status); } if( MPI_STATUS_IGNORE != status ) { - /* Do *NOT* set status->MPI_ERROR here! See MPI-1.1 doc, sec - 3.2.5, p.22 */ - status->MPI_TAG = req->req_status.MPI_TAG; - status->MPI_SOURCE = req->req_status.MPI_SOURCE; - status->_ucount = req->req_status._ucount; - status->_cancelled = req->req_status._cancelled; + OMPI_COPY_STATUS(status, req->req_status, false); } if( req->req_persistent ) { if( req->req_state == OMPI_REQUEST_INACTIVE ) { if (MPI_STATUS_IGNORE != status) { - *status = ompi_status_empty; + OMPI_COPY_STATUS(status, ompi_status_empty, false); } return OMPI_SUCCESS; } @@ -142,7 +134,7 @@ int ompi_request_default_wait_any(size_t count, if(num_requests_null_inactive == count) { *index = MPI_UNDEFINED; if (MPI_STATUS_IGNORE != status) { - *status = ompi_status_empty; + OMPI_COPY_STATUS(status, ompi_status_empty, false); } /* No signal-in-flight can be in this case */ WAIT_SYNC_RELEASE_NOWAIT(&sync); @@ -206,11 +198,7 @@ int ompi_request_default_wait_any(size_t count, rc = ompi_grequest_invoke_query(request, &request->req_status); } if (MPI_STATUS_IGNORE != status) { - /* Do *NOT* set status->MPI_ERROR here! See MPI-1.1 doc, - sec 3.2.5, p.22 */ - int old_error = status->MPI_ERROR; - *status = request->req_status; - status->MPI_ERROR = old_error; + OMPI_COPY_STATUS(status, request->req_status, false); } rc = request->req_status.MPI_ERROR; if( request->req_persistent ) { @@ -255,7 +243,7 @@ int ompi_request_default_wait_all( size_t count, continue; } - if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync)) { + if (REQUEST_COMPLETE(request) || !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync)) { if( OPAL_LIKELY( REQUEST_COMPLETE(request) ) ) { if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) { failed++; @@ -321,7 +309,7 @@ int ompi_request_default_wait_all( size_t count, request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { - statuses[i] = ompi_status_empty; + OMPI_COPY_STATUS(&statuses[i], ompi_status_empty, true); continue; } @@ -356,7 +344,7 @@ int ompi_request_default_wait_all( size_t count, ompi_grequest_invoke_query(request, &request->req_status); } - statuses[i] = request->req_status; + OMPI_COPY_STATUS(&statuses[i], request->req_status, true); if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; @@ -593,7 +581,7 @@ int ompi_request_default_wait_some(size_t count, if( MPI_ERR_PROC_FAILED_PENDING == request->req_status.MPI_ERROR ) { rc = MPI_ERR_IN_STATUS; if (MPI_STATUSES_IGNORE != statuses) { - statuses[i] = request->req_status; + OMPI_COPY_STATUS(&statuses[i], request->req_status, true); statuses[i].MPI_ERROR = MPI_ERR_PROC_FAILED_PENDING; } else { if( (MPI_ERR_PROC_FAILED == request->req_status.MPI_ERROR) || @@ -612,7 +600,7 @@ int ompi_request_default_wait_some(size_t count, ompi_grequest_invoke_query(request, &request->req_status); } if (MPI_STATUSES_IGNORE != statuses) { - statuses[i] = request->req_status; + OMPI_COPY_STATUS(&statuses[i], request->req_status, true); } if (MPI_SUCCESS != request->req_status.MPI_ERROR) { diff --git a/ompi/request/request.c b/ompi/request/request.c index abf33449d89..1ff70f9d45f 100644 --- a/ompi/request/request.c +++ b/ompi/request/request.c @@ -18,6 +18,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -104,6 +106,16 @@ OBJ_CLASS_INSTANCE( ompi_request_destruct); +static int ompi_request_finalize (void) +{ + OMPI_REQUEST_FINI( &ompi_request_null.request ); + OBJ_DESTRUCT( &ompi_request_null.request ); + OMPI_REQUEST_FINI( &ompi_request_empty ); + OBJ_DESTRUCT( &ompi_request_empty ); + OBJ_DESTRUCT( &ompi_request_f_to_c_table ); + return OMPI_SUCCESS; +} + int ompi_request_init(void) { @@ -173,21 +185,11 @@ int ompi_request_init(void) ompi_status_empty._ucount = 0; ompi_status_empty._cancelled = 0; - return OMPI_SUCCESS; -} - + ompi_mpi_instance_append_finalize (ompi_request_finalize); -int ompi_request_finalize(void) -{ - OMPI_REQUEST_FINI( &ompi_request_null.request ); - OBJ_DESTRUCT( &ompi_request_null.request ); - OMPI_REQUEST_FINI( &ompi_request_empty ); - OBJ_DESTRUCT( &ompi_request_empty ); - OBJ_DESTRUCT( &ompi_request_f_to_c_table ); return OMPI_SUCCESS; } - int ompi_request_persistent_noop_create(ompi_request_t** request) { ompi_request_t *req; diff --git a/ompi/request/request.h b/ompi/request/request.h index eb3f829b45b..52f61df80b2 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -16,6 +16,8 @@ * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -205,6 +207,28 @@ do { \ } \ } while (0); +/* + * Except in procedures that return MPI_ERR_IN_STATUS, the MPI_ERROR + * field of a status object shall never be modified + * See MPI-1.1 doc, sec 3.2.5, p.22 + * + * Add a small macro that helps setting the status appropriately + * depending on the use case + */ +#define OMPI_COPY_STATUS(pdst, src, is_err_in_status) \ +do { \ + if (is_err_in_status) { \ + *(pdst) = (src); \ + } \ + else { \ + (pdst)->MPI_TAG = (src).MPI_TAG; \ + (pdst)->MPI_SOURCE = (src).MPI_SOURCE; \ + (pdst)->_ucount = (src)._ucount; \ + (pdst)->_cancelled = (src)._cancelled; \ + } \ +} while(0); + + /** * Non-blocking test for request completion. * @@ -365,11 +389,6 @@ OMPI_DECLSPEC extern ompi_request_fns_t ompi_request_functions; */ int ompi_request_init(void); -/** - * Shut down the MPI_Request subsystem; invoked during MPI_FINALIZE. - */ -int ompi_request_finalize(void); - /** * Create a persistent request that does nothing (e.g., to MPI_PROC_NULL). */ @@ -426,39 +445,44 @@ static inline bool ompi_request_tag_is_collective(int tag) { static inline void ompi_request_wait_completion(ompi_request_t *req) { - if (opal_using_threads () && !REQUEST_COMPLETE(req)) { - void *_tmp_ptr; - ompi_wait_sync_t sync; + if (opal_using_threads ()) { + if(!REQUEST_COMPLETE(req)) { + void *_tmp_ptr; + ompi_wait_sync_t sync; + + #if OPAL_ENABLE_FT_MPI -redo: - if(OPAL_UNLIKELY( ompi_request_is_failed(req) )) { - return; - } + redo: + if(OPAL_UNLIKELY( ompi_request_is_failed(req) )) { + return; + } #endif /* OPAL_ENABLE_FT_MPI */ - _tmp_ptr = REQUEST_PENDING; + _tmp_ptr = REQUEST_PENDING; - WAIT_SYNC_INIT(&sync, 1); + WAIT_SYNC_INIT(&sync, 1); - if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) { - SYNC_WAIT(&sync); - } else { - /* completed before we had a chance to swap in the sync object */ - WAIT_SYNC_SIGNALLED(&sync); - } + if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) { + SYNC_WAIT(&sync); + } else { + /* completed before we had a chance to swap in the sync object */ + WAIT_SYNC_SIGNALLED(&sync); + } #if OPAL_ENABLE_FT_MPI - if (OPAL_UNLIKELY(OMPI_SUCCESS != sync.status)) { - OPAL_OUTPUT_VERBOSE((50, ompi_ftmpi_output_handle, "Status %d reported for sync %p rearming req %p", sync.status, (void*)&sync, (void*)req)); - _tmp_ptr = &sync; - if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, REQUEST_PENDING)) { - opal_output_verbose(10, ompi_ftmpi_output_handle, "Status %d reported for sync %p rearmed req %p", sync.status, (void*)&sync, (void*)req); - WAIT_SYNC_RELEASE(&sync); - goto redo; + if (OPAL_UNLIKELY(OMPI_SUCCESS != sync.status)) { + OPAL_OUTPUT_VERBOSE((50, ompi_ftmpi_output_handle, "Status %d reported for sync %p rearming req %p", sync.status, (void*)&sync, (void*)req)); + _tmp_ptr = &sync; + if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, REQUEST_PENDING)) { + opal_output_verbose(10, ompi_ftmpi_output_handle, "Status %d reported for sync %p rearmed req %p", sync.status, (void*)&sync, (void*)req); + WAIT_SYNC_RELEASE(&sync); + goto redo; + } } - } #endif /* OPAL_ENABLE_FT_MPI */ - assert(REQUEST_COMPLETE(req)); - WAIT_SYNC_RELEASE(&sync); + assert(REQUEST_COMPLETE(req)); + WAIT_SYNC_RELEASE(&sync); + } + opal_atomic_rmb(); } else { while(!REQUEST_COMPLETE(req)) { opal_progress(); diff --git a/ompi/runtime/mpiruntime.h b/ompi/runtime/mpiruntime.h index 81c9741c2e2..3e5b5885718 100644 --- a/ompi/runtime/mpiruntime.h +++ b/ompi/runtime/mpiruntime.h @@ -242,12 +242,6 @@ void ompi_mpi_dynamics_disable(const char *msg); */ bool ompi_mpi_dynamics_is_enabled(const char *function); -/** - * Clean up memory / resources by the MPI dynamics process - * functionality checker - */ -void ompi_mpi_dynamics_finalize(void); - END_C_DECLS #endif /* OMPI_MPI_MPIRUNTIME_H */ diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index 45292f7cbfc..bfb78114590 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -19,7 +19,7 @@ * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights + * Copyright (c) 2019-2021 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -197,5 +197,6 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, kill the entire job. Wah wah. */ ompi_rte_abort(errcode, NULL); - /* Does not return */ + /* Does not return - but we add a return to keep compiler warnings at bay*/ + return 0; } diff --git a/ompi/runtime/ompi_mpi_dynamics.c b/ompi/runtime/ompi_mpi_dynamics.c index 9e9f92f84f8..25403c91892 100644 --- a/ompi/runtime/ompi_mpi_dynamics.c +++ b/ompi/runtime/ompi_mpi_dynamics.c @@ -28,16 +28,30 @@ #include "ompi/runtime/params.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/instance/instance.h" static char *ompi_mpi_dynamics_disabled_msg = "Enabled"; +static int ompi_mpi_dynamics_finalize (void) +{ + // If dynamics were disabled, then we have a message to free + if (!ompi_mpi_dynamics_enabled) { + free(ompi_mpi_dynamics_disabled_msg); + ompi_mpi_dynamics_disabled_msg = NULL; + } + + return OMPI_SUCCESS; +} + void ompi_mpi_dynamics_disable(const char *msg) { assert(msg); ompi_mpi_dynamics_enabled = false; ompi_mpi_dynamics_disabled_msg = strdup(msg); + + ompi_mpi_instance_append_finalize (ompi_mpi_dynamics_finalize); } bool ompi_mpi_dynamics_is_enabled(const char *function) @@ -53,12 +67,3 @@ bool ompi_mpi_dynamics_is_enabled(const char *function) ompi_mpi_dynamics_disabled_msg); return false; } - -void ompi_mpi_dynamics_finalize(void) -{ - // If dynamics were disabled, then we have a message to free - if (!ompi_mpi_dynamics_enabled) { - free(ompi_mpi_dynamics_disabled_msg); - ompi_mpi_dynamics_disabled_msg = NULL; - } -} diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index ee2dd73b993..038e34cef8a 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -19,7 +19,6 @@ * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2019 Triad National Security, LLC. All rights * reserved. @@ -61,7 +60,6 @@ #include "opal/mca/allocator/base/base.h" #include "opal/mca/pmix/pmix-internal.h" #include "opal/util/timings.h" - #include "mpi.h" #include "ompi/constants.h" #include "ompi/errhandler/errcode.h" @@ -87,9 +85,15 @@ #include "ompi/mca/io/base/base.h" #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/runtime/params.h" -#include "ompi/dpm/dpm.h" -#include "ompi/mpiext/mpiext.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/hook/hook.h" #include "ompi/mca/hook/base/base.h" +#include "ompi/communicator/communicator.h" +#include "ompi/attribute/attribute.h" +#include "ompi/instance/instance.h" + +#include "mpi.h" +#include "ompi/constants.h" extern bool ompi_enable_timing; @@ -104,13 +108,8 @@ static void fence_cbfunc(pmix_status_t status, void *cbdata) int ompi_mpi_finalize(void) { int ret = MPI_SUCCESS; - opal_list_item_t *item; - ompi_proc_t** procs; - size_t nprocs; - volatile bool active; - uint32_t key; - ompi_datatype_t * datatype; pmix_status_t rc; + volatile bool active; ompi_hook_base_mpi_finalize_top(); @@ -138,8 +137,6 @@ int ompi_mpi_finalize(void) opal_atomic_wmb(); opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_STARTED); - ompi_mpiext_fini(); - /* Per MPI-2:4.8, we have to free MPI_COMM_SELF before doing anything else in MPI_FINALIZE (to include setting up such that MPI_FINALIZED will return true). */ @@ -196,11 +193,6 @@ int ompi_mpi_finalize(void) opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT); - /* As finalize is the last legal MPI call, we are allowed to force the release - * of the user buffer used for bsend, before going anywhere further. - */ - (void)mca_pml_base_bsend_detach(NULL, NULL); - #if OPAL_ENABLE_PROGRESS_THREADS == 0 opal_progress_set_event_flag(OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK); #endif @@ -299,212 +291,7 @@ int ompi_mpi_finalize(void) OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - /* Shut down any bindings-specific issues: C++, F77, F90 */ - - /* Remove all memory associated by MPI_REGISTER_DATAREP (per - MPI-2:9.5.3, there is no way for an MPI application to - *un*register datareps, but we don't want the OMPI layer causing - memory leaks). */ - while (NULL != (item = opal_list_remove_first(&ompi_registered_datareps))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&ompi_registered_datareps); - - /* Remove all F90 types from the hash tables */ - OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_integer_hashtable) - OBJ_RELEASE(datatype); - OBJ_DESTRUCT(&ompi_mpi_f90_integer_hashtable); - OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_real_hashtable) - OBJ_RELEASE(datatype); - OBJ_DESTRUCT(&ompi_mpi_f90_real_hashtable); - OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_complex_hashtable) - OBJ_RELEASE(datatype); - OBJ_DESTRUCT(&ompi_mpi_f90_complex_hashtable); - - /* Free communication objects */ - - /* free file resources */ - if (OMPI_SUCCESS != (ret = ompi_file_finalize())) { - goto done; - } - - /* free window resources */ - if (OMPI_SUCCESS != (ret = ompi_win_finalize())) { - goto done; - } - if (OMPI_SUCCESS != (ret = ompi_osc_base_finalize())) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_part_base_finalize())) { - goto done; - } - - - /* free communicator resources. this MUST come before finalizing the PML - * as this will call into the pml */ - if (OMPI_SUCCESS != (ret = ompi_comm_finalize())) { - goto done; - } - - /* call del_procs on all allocated procs even though some may not be known - * to the pml layer. the pml layer is expected to be resilient and ignore - * any unknown procs. */ - nprocs = 0; - procs = ompi_proc_get_allocated (&nprocs); - MCA_PML_CALL(del_procs(procs, nprocs)); - free(procs); - - /* free pml resource */ - if(OMPI_SUCCESS != (ret = mca_pml_base_finalize())) { - goto done; - } - - /* free requests */ - if (OMPI_SUCCESS != (ret = ompi_request_finalize())) { - goto done; - } - - if (OMPI_SUCCESS != (ret = ompi_message_finalize())) { - goto done; - } - - /* If requested, print out a list of memory allocated by ALLOC_MEM - but not freed by FREE_MEM */ - if (0 != ompi_debug_show_mpi_alloc_mem_leaks) { - mca_mpool_base_tree_print(ompi_debug_show_mpi_alloc_mem_leaks); - } - - /* Now that all MPI objects dealing with communications are gone, - shut down MCA types having to do with communications */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework) ) ) { - OMPI_ERROR_LOG(ret); - goto done; - } - - /* shut down buffered send code */ - mca_pml_base_bsend_fini(); - - /* Free secondary resources */ - - /* free attr resources */ - if (OMPI_SUCCESS != (ret = ompi_attr_finalize())) { - goto done; - } - - /* free group resources */ - if (OMPI_SUCCESS != (ret = ompi_group_finalize())) { - goto done; - } - - /* finalize the DPM subsystem */ - if ( OMPI_SUCCESS != (ret = ompi_dpm_finalize())) { - goto done; - } - - /* free internal error resources */ - if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) { - goto done; - } - - /* free error code resources */ - if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_finalize())) { - goto done; - } - - /* free errhandler resources */ - if (OMPI_SUCCESS != (ret = ompi_errhandler_finalize())) { - goto done; - } - - /* Free all other resources */ - - /* free op resources */ - if (OMPI_SUCCESS != (ret = ompi_op_finalize())) { - goto done; - } - - /* free ddt resources */ - if (OMPI_SUCCESS != (ret = ompi_datatype_finalize())) { - goto done; - } - - /* free info resources */ - if (OMPI_SUCCESS != (ret = ompi_mpiinfo_finalize())) { - goto done; - } - - /* Close down MCA modules */ - - /* io is opened lazily, so it's only necessary to close it if it - was actually opened */ - if (0 < ompi_io_base_framework.framework_refcnt) { - /* May have been "opened" multiple times. We want it closed now */ - ompi_io_base_framework.framework_refcnt = 1; - - if (OMPI_SUCCESS != mca_base_framework_close(&ompi_io_base_framework)) { - goto done; - } - } - (void) mca_base_framework_close(&ompi_topo_base_framework); - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_osc_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_part_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_coll_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_bml_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_mpool_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_rcache_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_allocator_base_framework))) { - goto done; - } - - /* free proc resources */ - if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) { - goto done; - } - - if (NULL != ompi_mpi_main_thread) { - OBJ_RELEASE(ompi_mpi_main_thread); - ompi_mpi_main_thread = NULL; - } - - /* Clean up memory/resources from the MPI dynamic process - functionality checker */ - ompi_mpi_dynamics_finalize(); - - /* Leave the RTE */ - - if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) { - goto done; - } - ompi_rte_initialized = false; - - /* Now close the hook framework */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_hook_base_framework) ) ) { - OMPI_ERROR_LOG(ret); - goto done; - } - - if (OPAL_SUCCESS != (ret = opal_finalize_util())) { - goto done; - } - - if (0 == opal_initialized) { - /* if there is no MPI_T_init_thread that has been MPI_T_finalize'd, - * then be gentle to the app and release all the memory now (instead - * of the opal library destructor */ - opal_class_finalize(); - } + ompi_mpi_instance_finalize (&ompi_mpi_instance_default); /* cleanup environment */ opal_unsetenv("OMPI_COMMAND", &environ); @@ -512,7 +299,7 @@ int ompi_mpi_finalize(void) /* All done */ - done: + done: opal_atomic_wmb(); opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_COMPLETED); diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 4acc36b4b20..c94474cff60 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2006-2022 Cisco Systems, Inc. All rights reserved * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2009 University of Houston. All rights reserved. @@ -18,8 +18,8 @@ * Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2018 Mellanox Technologies Ltd. All rights reserved. * * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. @@ -27,6 +27,8 @@ * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. * All Rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -64,7 +66,6 @@ #include "opal/mca/mpool/base/base.h" #include "opal/mca/btl/base/base.h" #include "opal/mca/pmix/base/base.h" -#include "opal/util/timings.h" #include "opal/util/opal_environ.h" #include "ompi/constants.h" @@ -117,11 +118,6 @@ OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) = */ #include -#if OPAL_CC_USE_PRAGMA_IDENT -#pragma ident OMPI_IDENT_STRING -#elif OPAL_CC_USE_IDENT -#ident OMPI_IDENT_STRING -#endif const char ompi_version_string[] = OMPI_IDENT_STRING; /* @@ -217,21 +213,31 @@ struct ompi_status_public_t *ompi_mpi_statuses_ignore_addr = # if OMPI_FORTRAN_CAPS MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUS_IGNORE; MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUSES_IGNORE; +MPI_Fint *MPI_F08_STATUS_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUS_IGNORE; +MPI_Fint *MPI_F08_STATUSES_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUSES_IGNORE; # elif OMPI_FORTRAN_PLAIN MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore; MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore; +MPI_Fint *MPI_F08_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore; +MPI_Fint *MPI_F08_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore; # elif OMPI_FORTRAN_SINGLE_UNDERSCORE MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore_; MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore_; +MPI_Fint *MPI_F08_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore_; +MPI_Fint *MPI_F08_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore_; # elif OMPI_FORTRAN_DOUBLE_UNDERSCORE MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore__; MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore__; +MPI_Fint *MPI_F08_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore__; +MPI_Fint *MPI_F08_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore__; # else # error Unrecognized Fortran name mangling scheme # endif #else MPI_Fint *MPI_F_STATUS_IGNORE = NULL; MPI_Fint *MPI_F_STATUSES_IGNORE = NULL; +MPI_Fint *MPI_F08_STATUS_IGNORE = NULL; +MPI_Fint *MPI_F08_STATUSES_IGNORE = NULL; #endif /* OMPI_BUILD_FORTRAN_BINDINGS */ @@ -262,57 +268,6 @@ MPI_Fint *MPI_F_STATUSES_IGNORE = NULL; #include "mpif-c-constants.h" -/* - * Hash tables for MPI_Type_create_f90* functions - */ -opal_hash_table_t ompi_mpi_f90_integer_hashtable = {{0}}; -opal_hash_table_t ompi_mpi_f90_real_hashtable = {{0}}; -opal_hash_table_t ompi_mpi_f90_complex_hashtable = {{0}}; - -/* - * Per MPI-2:9.5.3, MPI_REGISTER_DATAREP is a memory leak. There is - * no way to *de*register datareps once they've been registered. So - * we have to track all registrations here so that they can be - * de-registered during MPI_FINALIZE so that memory-tracking debuggers - * don't show Open MPI as leaking memory. - */ -opal_list_t ompi_registered_datareps = {{0}}; - -bool ompi_enable_timing = false; -extern bool ompi_mpi_yield_when_idle; -extern int ompi_mpi_event_tick_rate; - -/** - * Static functions used to configure the interactions between the OPAL and - * the runtime. - */ -static char* -_process_name_print_for_opal(const opal_process_name_t procname) -{ - ompi_process_name_t* rte_name = (ompi_process_name_t*)&procname; - return OMPI_NAME_PRINT(rte_name); -} - -static int -_process_name_compare(const opal_process_name_t p1, const opal_process_name_t p2) -{ - ompi_process_name_t* o1 = (ompi_process_name_t*)&p1; - ompi_process_name_t* o2 = (ompi_process_name_t*)&p2; - return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, o1, o2); -} - -static int _convert_string_to_process_name(opal_process_name_t *name, - const char* name_string) -{ - return ompi_rte_convert_string_to_process_name(name, name_string); -} - -static int _convert_process_name_to_string(char** name_string, - const opal_process_name_t *name) -{ - return ompi_rte_convert_process_name_to_string(name_string, name); -} - void ompi_mpi_thread_level(int requested, int *provided) { /** @@ -336,41 +291,6 @@ void ompi_mpi_thread_level(int requested, int *provided) MPI_THREAD_MULTIPLE); } -static int ompi_register_mca_variables(void) -{ - int ret; - - /* Register MPI variables */ - if (OMPI_SUCCESS != (ret = ompi_mpi_register_params())) { - return ret; - } - - /* check to see if we want timing information */ - /* TODO: enable OMPI init and OMPI finalize timings if - * this variable was set to 1! - */ - ompi_enable_timing = false; - (void) mca_base_var_register("ompi", "ompi", NULL, "timing", - "Request that critical timing loops be measured", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_enable_timing); - -#if OPAL_ENABLE_FT_MPI - /* Before loading any other part of the MPI library, we need to load - * the ft-mpi tune file to override default component selection when - * FT is desired ON; this does override openmpi-params.conf, but not - * command line or env. - */ - if( ompi_ftmpi_enabled ) { - mca_base_var_load_extra_files("ft-mpi", false); - } -#endif /* OPAL_ENABLE_FT_MPI */ - - return OMPI_SUCCESS; -} - static void fence_release(pmix_status_t status, void *cbdata) { volatile bool *active = (volatile bool*)cbdata; @@ -379,32 +299,19 @@ static void fence_release(pmix_status_t status, void *cbdata) OPAL_POST_OBJECT(active); } -static void evhandler_reg_callbk(pmix_status_t status, - size_t evhandler_ref, - void *cbdata) -{ - opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; - - lock->status = status; - OPAL_PMIX_WAKEUP_THREAD(lock); -} - - int ompi_mpi_init(int argc, char **argv, int requested, int *provided, bool reinit_ok) { int ret; - ompi_proc_t** procs; - size_t nprocs; char *error = NULL; +#if OPAL_USING_INTERNAL_PMIX + char *evar; +#endif volatile bool active; bool background_fence = false; pmix_info_t info[2]; - pmix_status_t codes[1] = { PMIX_ERR_PROC_ABORTED }; pmix_status_t rc; OMPI_TIMING_INIT(64); - opal_pmix_lock_t mylock; - opal_process_name_t pname; ompi_hook_base_mpi_init_top(argc, argv, requested, provided); @@ -439,267 +346,50 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, } } - /* Figure out the final MPI thread levels. If we were not - compiled for support for MPI threads, then don't allow - MPI_THREAD_MULTIPLE. Set this stuff up here early in the - process so that other components can make decisions based on - this value. */ - - ompi_mpi_thread_level(requested, provided); - - /* Setup enough to check get/set MCA params */ - memset(&opal_process_info, 0, sizeof(opal_process_info)); - if (OPAL_SUCCESS != (ret = opal_init_util(&argc, &argv))) { - error = "ompi_mpi_init: opal_init_util failed"; - goto error; - } - OMPI_TIMING_IMPORT_OPAL("opal_init_util"); - - /* If thread support was enabled, then setup OPAL to allow for them. This must be done - * early to prevent a race condition that can occur with orte_init(). */ - if (*provided != MPI_THREAD_SINGLE) { - opal_set_using_threads(true); + /* deal with OPAL_PREFIX to ensure that an internal PMIx installation + * is also relocated if necessary */ +#if OPAL_USING_INTERNAL_PMIX + if (NULL != (evar = getenv("OPAL_PREFIX"))) { + opal_setenv("PMIX_PREFIX", evar, true, &environ); } +#endif - /* Convince OPAL to use our naming scheme */ - opal_process_name_print = _process_name_print_for_opal; - opal_compare_proc = _process_name_compare; - opal_convert_string_to_process_name = _convert_string_to_process_name; - opal_convert_process_name_to_string = _convert_process_name_to_string; - opal_proc_for_name = ompi_proc_for_name; - - /* Register MCA variables */ - if (OPAL_SUCCESS != (ret = ompi_register_mca_variables())) { - error = "ompi_mpi_init: ompi_register_mca_variables failed"; - goto error; - } - - /* setup our internal nspace hack */ - opal_pmix_setup_nspace_tracker(); - /* init PMIx */ - if (PMIX_SUCCESS != (ret = PMIx_Init(&opal_process_info.myprocid, NULL, 0))) { - /* if we get PMIX_ERR_UNREACH indicating that we cannot reach the - * server, then we assume we are operating as a singleton */ - if (PMIX_ERR_UNREACH == ret) { - ompi_singleton = true; - } else { - /* we cannot run - this could be due to being direct launched - * without the required PMI support being built, so print - * out a help message indicating it */ - opal_show_help("help-mpi-runtime.txt", "no-pmi", true, PMIx_Error_string(ret)); - return OPAL_ERR_SILENT; - } - } - /* setup the process name fields - also registers the new nspace */ - OPAL_PMIX_CONVERT_PROCT(ret, &pname, &opal_process_info.myprocid); - if (OPAL_SUCCESS != ret) { - error = "ompi_mpi_init: converting process name"; - goto error; - } - OPAL_PROC_MY_NAME.jobid = pname.jobid; - OPAL_PROC_MY_NAME.vpid = pname.vpid; - opal_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid; - opal_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid; - - /* get our topology and cache line size */ - ret = opal_hwloc_base_get_topology(); - if (OPAL_SUCCESS != ret) { - error = "ompi_mpi_init: get topology"; - goto error; - } - - if (OPAL_SUCCESS != (ret = opal_arch_set_fortran_logical_size(sizeof(ompi_fortran_logical_t)))) { - error = "ompi_mpi_init: opal_arch_set_fortran_logical_size failed"; - goto error; - } - - /* _After_ opal_init_util() but _before_ orte_init(), we need to - set an MCA param that tells libevent that it's ok to use any - mechanism in libevent that is available on this platform (e.g., - epoll and friends). Per opal/event/event.s, we default to - select/poll -- but we know that MPI processes won't be using - pty's with the event engine, so it's ok to relax this - constraint and let any fd-monitoring mechanism be used. */ - - ret = mca_base_var_find("opal", "event", "*", "event_include"); - if (ret >= 0) { - char *allvalue = "all"; - /* We have to explicitly "set" the MCA param value here - because libevent initialization will re-register the MCA - param and therefore override the default. Setting the value - here puts the desired value ("all") in different storage - that is not overwritten if/when the MCA param is - re-registered. This is unless the user has specified a different - value for this MCA parameter. Make sure we check to see if the - default is specified before forcing "all" in case that is not what - the user desires. Note that we do *NOT* set this value as an - environment variable, just so that it won't be inherited by - any spawned processes and potentially cause unintented - side-effects with launching RTE tools... */ - mca_base_var_set_value(ret, allvalue, 4, MCA_BASE_VAR_SOURCE_DEFAULT, NULL); - } + ompi_mpi_thread_level(requested, provided); - /* open the ompi hook framework */ - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_hook_base_framework, 0))) { - error = "ompi_hook_base_open() failed"; + ret = ompi_mpi_instance_init (*provided, &ompi_mpi_info_null.info.super, MPI_ERRORS_ARE_FATAL, &ompi_mpi_instance_default); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + error = "ompi_mpi_init: ompi_mpi_instance_init failed"; goto error; } ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided); - - OMPI_TIMING_NEXT("initialization"); - - /* Setup RTE */ - if (OMPI_SUCCESS != (ret = ompi_rte_init(&argc, &argv))) { - error = "ompi_mpi_init: ompi_rte_init failed"; - goto error; - } - OMPI_TIMING_NEXT("rte_init"); - OMPI_TIMING_IMPORT_OPAL("orte_ess_base_app_setup"); - OMPI_TIMING_IMPORT_OPAL("rte_init"); - - ompi_rte_initialized = true; - /* if we are oversubscribed, then set yield_when_idle - * accordingly */ - if (ompi_mpi_oversubscribed) { - ompi_mpi_yield_when_idle = true; - } - - /* Register the default errhandler callback */ - /* we want to go first */ - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_PREPEND, NULL, PMIX_BOOL); - /* give it a name so we can distinguish it */ - PMIX_INFO_LOAD(&info[1], PMIX_EVENT_HDLR_NAME, "MPI-Default", PMIX_STRING); - OPAL_PMIX_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(codes, 1, info, 2, ompi_errhandler_callback, evhandler_reg_callbk, (void*)&mylock); - OPAL_PMIX_WAIT_THREAD(&mylock); - rc = mylock.status; - OPAL_PMIX_DESTRUCT_LOCK(&mylock); - PMIX_INFO_DESTRUCT(&info[0]); - PMIX_INFO_DESTRUCT(&info[1]); - if (PMIX_SUCCESS != rc) { - error = "Error handler registration"; - ret = opal_pmix_convert_status(rc); + /* initialize communicator subsystem */ + if (OMPI_SUCCESS != (ret = ompi_comm_init_mpi3 ())) { + error = "ompi_mpi_init: ompi_comm_init_mpi3 failed"; goto error; } - /* declare our presence for interlib coordination, and - * register for callbacks when other libs declare */ - if (OMPI_SUCCESS != (ret = ompi_interlib_declare(*provided, OMPI_IDENT_STRING))) { - error = "ompi_interlib_declare"; - goto error; - } - - /* initialize datatypes. This step should be done early as it will - * create the local convertor and local arch used in the proc - * init. + /* if we were not externally started, then we need to setup + * some envars so the MPI_INFO_ENV can get the cmd name + * and argv (but only if the user supplied a non-NULL argv!), and + * the requested thread level */ - if (OMPI_SUCCESS != (ret = ompi_datatype_init())) { - error = "ompi_datatype_init() failed"; - goto error; - } - - /* Initialize OMPI procs */ - if (OMPI_SUCCESS != (ret = ompi_proc_init())) { - error = "mca_proc_init() failed"; - goto error; - } - - /* Initialize the op framework. This has to be done *after* - ddt_init, but befor mca_coll_base_open, since some collective - modules (e.g., the hierarchical coll component) may need ops in - their query function. */ - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_op_base_framework, 0))) { - error = "ompi_op_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != - (ret = ompi_op_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "ompi_op_base_find_available() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = ompi_op_init())) { - error = "ompi_op_init() failed"; - goto error; - } - - /* Open up MPI-related MCA components */ - - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_allocator_base_framework, 0))) { - error = "mca_allocator_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_rcache_base_framework, 0))) { - error = "mca_rcache_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_mpool_base_framework, 0))) { - error = "mca_mpool_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_bml_base_framework, 0))) { - error = "mca_bml_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_bml_base_init (1, ompi_mpi_thread_multiple))) { - error = "mca_bml_base_init() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_pml_base_framework, 0))) { - error = "mca_pml_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_coll_base_framework, 0))) { - error = "mca_coll_base_open() failed"; - goto error; - } - - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_osc_base_framework, 0))) { - error = "ompi_osc_base_open() failed"; - goto error; - } - - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_part_base_framework, 0))) { - error = "ompi_part_base_open() failed"; - goto error; + if (NULL == getenv("OMPI_COMMAND") && NULL != argv && NULL != argv[0]) { + opal_setenv("OMPI_COMMAND", argv[0], true, &environ); } - - /* In order to reduce the common case for MPI apps (where they - don't use MPI-2 IO or MPI-1 topology functions), the io and - topo frameworks are initialized lazily, at the first use of - relevant functions (e.g., MPI_FILE_*, MPI_CART_*, MPI_GRAPH_*), - so they are not opened here. */ - - /* Select which MPI components to use */ - - if (OMPI_SUCCESS != - (ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "mca_pml_base_select() failed"; - goto error; + if (NULL == getenv("OMPI_ARGV") && 1 < argc) { + char *tmp; + tmp = opal_argv_join(&argv[1], ' '); + opal_setenv("OMPI_ARGV", tmp, true, &environ); + free(tmp); } - OMPI_TIMING_IMPORT_OPAL("orte_init"); - OMPI_TIMING_NEXT("rte_init-commit"); - - /* exchange connection info - this function may also act as a barrier - * if data exchange is required. The modex occurs solely across procs - * in our job. If a barrier is required, the "modex" function will - * perform it internally */ - rc = PMIx_Commit(); - if (PMIX_SUCCESS != rc) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Commit()"; - goto error; - } - OMPI_TIMING_NEXT("commit"); #if (OPAL_ENABLE_TIMING) if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && opal_pmix_collect_all_data && !ompi_singleton) { if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) { - ret - opal_pmix_convert_status(rc); + ret = opal_pmix_convert_status(rc); error = "timing: pmix-barrier-1 failed"; goto error; } @@ -757,150 +447,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, OMPI_TIMING_NEXT("modex"); - /* select buffered send allocator component to be used */ - if( OMPI_SUCCESS != - (ret = mca_pml_base_bsend_init(ompi_mpi_thread_multiple))) { - error = "mca_pml_base_bsend_init() failed"; - goto error; - } - - if (OMPI_SUCCESS != - (ret = mca_coll_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "mca_coll_base_find_available() failed"; - goto error; - } - - if (OMPI_SUCCESS != - (ret = ompi_osc_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "ompi_osc_base_find_available() failed"; - goto error; - } - - - if (OMPI_SUCCESS != - (ret = mca_part_base_select(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "mca_part_base_select() failed"; - goto error; - } - - /* io and topo components are not selected here -- see comment - above about the io and topo frameworks being loaded lazily */ - - /* Initialize each MPI handle subsystem */ - /* initialize requests */ - if (OMPI_SUCCESS != (ret = ompi_request_init())) { - error = "ompi_request_init() failed"; - goto error; - } - - if (OMPI_SUCCESS != (ret = ompi_message_init())) { - error = "ompi_message_init() failed"; - goto error; - } - - /* initialize error handlers */ - if (OMPI_SUCCESS != (ret = ompi_errhandler_init())) { - error = "ompi_errhandler_init() failed"; - goto error; - } - - /* initialize error codes */ - if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_init())) { - error = "ompi_mpi_errcode_init() failed"; - goto error; - } - - /* initialize internal error codes */ - if (OMPI_SUCCESS != (ret = ompi_errcode_intern_init())) { - error = "ompi_errcode_intern_init() failed"; - goto error; - } - - /* initialize info */ - if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init())) { - error = "ompi_info_init() failed"; - goto error; - } - - /* initialize groups */ - if (OMPI_SUCCESS != (ret = ompi_group_init())) { - error = "ompi_group_init() failed"; - goto error; - } - - /* initialize communicators */ - if (OMPI_SUCCESS != (ret = ompi_comm_init())) { - error = "ompi_comm_init() failed"; - goto error; - } - - /* initialize file handles */ - if (OMPI_SUCCESS != (ret = ompi_file_init())) { - error = "ompi_file_init() failed"; - goto error; - } - - /* initialize windows */ - if (OMPI_SUCCESS != (ret = ompi_win_init())) { - error = "ompi_win_init() failed"; - goto error; - } - - /* initialize attribute meta-data structure for comm/win/dtype */ - if (OMPI_SUCCESS != (ret = ompi_attr_init())) { - error = "ompi_attr_init() failed"; - goto error; - } - - /* identify the architectures of remote procs and setup - * their datatype convertors, if required - */ - if (OMPI_SUCCESS != (ret = ompi_proc_complete_init())) { - error = "ompi_proc_complete_init failed"; - goto error; - } - - /* start PML/BTL's */ - ret = MCA_PML_CALL(enable(true)); - if( OMPI_SUCCESS != ret ) { - error = "PML control failed"; - goto error; - } - - /* some btls/mtls require we call add_procs with all procs in the job. - * since the btls/mtls have no visibility here it is up to the pml to - * convey this requirement */ - if (mca_pml_base_requires_world ()) { - if (NULL == (procs = ompi_proc_world (&nprocs))) { - error = "ompi_proc_world () failed"; - goto error; - } - } else { - /* add all allocated ompi_proc_t's to PML (below the add_procs limit this - * behaves identically to ompi_proc_world ()) */ - if (NULL == (procs = ompi_proc_get_allocated (&nprocs))) { - error = "ompi_proc_get_allocated () failed"; - goto error; - } - } - ret = MCA_PML_CALL(add_procs(procs, nprocs)); - free(procs); - /* If we got "unreachable", then print a specific error message. - Otherwise, if we got some other failure, fall through to print - a generic message. */ - if (OMPI_ERR_UNREACH == ret) { - opal_show_help("help-mpi-runtime.txt", - "mpi_init:startup:pml-add-procs-fail", true); - error = NULL; - goto error; - } else if (OMPI_SUCCESS != ret) { - error = "PML add procs failed"; - goto error; - } - MCA_PML_CALL(add_comm(&ompi_mpi_comm_world.comm)); MCA_PML_CALL(add_comm(&ompi_mpi_comm_self.comm)); @@ -930,7 +476,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, */ if (ompi_mpi_show_mca_params) { ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank, - nprocs, + ompi_process_info.num_procs, ompi_process_info.nodename); } @@ -975,7 +521,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, CPU utilization for the remainder of MPI_INIT when we are blocking on RTE-level events, but may greatly reduce non-TCP latency. */ - opal_progress_set_event_flag(OPAL_EVLOOP_NONBLOCK); + int old_event_flags = opal_progress_set_event_flag(0); + opal_progress_set_event_flag(old_event_flags | OPAL_EVLOOP_NONBLOCK); #endif /* wire up the mpi interface, if requested. Do this after the @@ -987,23 +534,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, goto error; } - /* Setup the dynamic process management (DPM) subsystem */ - if (OMPI_SUCCESS != (ret = ompi_dpm_init())) { - error = "ompi_dpm_init() failed"; - goto error; - } - - /* Determine the overall threadlevel support of all processes - in MPI_COMM_WORLD. This has to be done before calling - coll_base_comm_select, since some of the collective components - e.g. hierarch, might create subcommunicators. The threadlevel - requested by all processes is required in order to know - which cid allocation algorithm can be used. */ - if (OMPI_SUCCESS != ( ret = ompi_comm_cid_init ())) { - error = "ompi_mpi_init: ompi_comm_cid_init failed"; - goto error; - } - /* Init coll for the comms. This has to be after dpm_base_select, (since dpm.mark_dyncomm is not set in the communicator creation function else), but before dpm.dyncom_init, since this function @@ -1020,32 +550,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, goto error; } - /* Check whether we have been spawned or not. We introduce that - at the very end, since we need collectives, datatypes, ptls - etc. up and running here.... */ - if (OMPI_SUCCESS != (ret = ompi_dpm_dyn_init())) { - error = "ompi_dpm_dyn_init() failed"; - goto error; - } - - /* see if yield_when_idle was specified - if so, use it */ - opal_progress_set_yield_when_idle(ompi_mpi_yield_when_idle); - - /* negative value means use default - just don't do anything */ - if (ompi_mpi_event_tick_rate >= 0) { - opal_progress_set_event_poll_rate(ompi_mpi_event_tick_rate); - } - - /* At this point, we are fully configured and in MPI mode. Any - communication calls here will work exactly like they would in - the user's code. Setup the connections between procs and warm - them up with simple sends, if requested */ - - if (OMPI_SUCCESS != (ret = ompi_mpiext_init())) { - error = "ompi_mpiext_init"; - goto error; - } - #if OPAL_ENABLE_FT_MPI /* start the failure detector */ if( ompi_ftmpi_enabled ) { @@ -1054,6 +558,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, } #endif + /* Check whether we have been spawned or not. We introduce that + at the very end, since we need collectives, datatypes, ptls + etc. up and running here.... */ + if (OMPI_SUCCESS != (ret = ompi_dpm_dyn_init())) { + return ret; + } + /* Fall through */ error: if (ret != OMPI_SUCCESS) { @@ -1069,21 +580,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, return ret; } - /* Initialize the registered datarep list to be empty */ - OBJ_CONSTRUCT(&ompi_registered_datareps, opal_list_t); - - /* Initialize the arrays used to store the F90 types returned by the - * MPI_Type_create_f90_XXX functions. - */ - OBJ_CONSTRUCT( &ompi_mpi_f90_integer_hashtable, opal_hash_table_t); - opal_hash_table_init(&ompi_mpi_f90_integer_hashtable, 16 /* why not? */); - - OBJ_CONSTRUCT( &ompi_mpi_f90_real_hashtable, opal_hash_table_t); - opal_hash_table_init(&ompi_mpi_f90_real_hashtable, FLT_MAX_10_EXP); - - OBJ_CONSTRUCT( &ompi_mpi_f90_complex_hashtable, opal_hash_table_t); - opal_hash_table_init(&ompi_mpi_f90_complex_hashtable, FLT_MAX_10_EXP); - /* All done. Wasn't that simple? */ opal_atomic_wmb(); opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_INIT_COMPLETED); diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index 311935b9552..4d7dc071030 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -20,7 +20,7 @@ * All rights reserved. * Copyright (c) 2016-2021 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ @@ -88,6 +88,8 @@ char *ompi_mpi_spc_attach_string = NULL; bool ompi_mpi_spc_dump_enabled = false; uint32_t ompi_pmix_connect_timeout = 0; +bool ompi_enable_timing = false; + static bool show_default_mca_params = false; static bool show_file_mca_params = false; static bool show_enviro_mca_params = false; @@ -187,7 +189,7 @@ int ompi_mpi_register_params(void) ompi_mpi_param_check = true; if (!MPI_PARAM_CHECK) { opal_output(0, "WARNING: MCA parameter mpi_no_free_handles set to true, but MPI"); - opal_output(0, "WARNING: parameter checking has been compiled out of Open MPI."); + opal_output(0, "WARNING: parameter checking has been compiled out of " OMPI_IDENT_STRING "."); opal_output(0, "WARNING: mpi_no_free_handles is therefore only partially effective!"); } } @@ -246,7 +248,8 @@ int ompi_mpi_register_params(void) /* File to use when dumping the parameters */ (void) mca_base_var_register("ompi", "mpi", NULL, "show_mca_params_file", - "If mpi_show_mca_params is true, setting this string to a valid filename tells Open MPI to dump all the MCA parameter values into a file suitable for reading via the mca_param_files parameter (good for reproducability of MPI jobs)", + "If mpi_show_mca_params is true, setting this string to a valid filename tells " + OMPI_IDENT_STRING " to dump all the MCA parameter values into a file suitable for reading via the mca_param_files parameter (good for reproducability of MPI jobs)", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, @@ -272,7 +275,7 @@ int ompi_mpi_register_params(void) /* Sparse group storage support */ (void) mca_base_var_register("ompi", "mpi", NULL, "have_sparse_group_storage", - "Whether this Open MPI installation supports storing of data in MPI groups in \"sparse\" formats (good for extremely large process count MPI jobs that create many communicators/groups)", + "Whether this " OMPI_IDENT_STRING " installation supports storing of data in MPI groups in \"sparse\" formats (good for extremely large process count MPI jobs that create many communicators/groups)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_DEFAULT_ONLY, OPAL_INFO_LVL_9, @@ -389,6 +392,30 @@ int ompi_mpi_register_params(void) 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &ompi_pmix_connect_timeout); + /* check to see if we want timing information */ + /* TODO: enable OMPI init and OMPI finalize timings if + * this variable was set to 1! + */ + ompi_enable_timing = false; + (void) mca_base_var_register("ompi", "ompi", NULL, "timing", + "Request that critical timing loops be measured", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_enable_timing); + +#if OPAL_ENABLE_FT_MPI + /* Before loading any other part of the MPI library, we need to load + * * the ft-mpi tune file to override default component selection when + * * FT is desired ON; this does override openmpi-params.conf, but not + * * command line or env. + * */ + if( ompi_ftmpi_enabled ) { + mca_base_var_load_extra_files("ft-mpi", false); + } +#endif /* OPAL_ENABLE_FT_MPI */ + + return OMPI_SUCCESS; } diff --git a/ompi/runtime/ompi_rte.c b/ompi/runtime/ompi_rte.c index b57934b29f0..dcfdbb43b3c 100644 --- a/ompi/runtime/ompi_rte.c +++ b/ompi/runtime/ompi_rte.c @@ -97,6 +97,7 @@ buffer_cleanup(void *value) } free (ptr); } + fns_init = false; } static opal_print_args_buffers_t* @@ -560,6 +561,35 @@ int ompi_rte_init(int *pargc, char ***pargv) goto error; } + /* setup our internal nspace hack */ + opal_pmix_setup_nspace_tracker(); + + /* initialize the selected module */ + if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&opal_process_info.myprocid, NULL, 0)))) { + /* if we get PMIX_ERR_UNREACH indicating that we cannot reach the + * server, then we assume we are operating as a singleton */ + if (PMIX_ERR_UNREACH == ret) { + ompi_singleton = true; + } else { + /* we cannot run - this could be due to being direct launched + * without the required PMI support being built, so print + * out a help message indicating it */ + opal_show_help("help-mpi-runtime.txt", "no-pmi", true, PMIx_Error_string(ret)); + return OPAL_ERR_SILENT; + } + } + + /* setup the process name fields - also registers the new nspace */ + OPAL_PMIX_CONVERT_PROCT(rc, &pname, &opal_process_info.myprocid); + if (OPAL_SUCCESS != rc) { + return rc; + } + OPAL_PROC_MY_NAME.jobid = pname.jobid; + OPAL_PROC_MY_NAME.vpid = pname.vpid; + opal_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid; + opal_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid; + + /* set our hostname */ ev1 = NULL; OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_HOSTNAME, &OPAL_PROC_MY_NAME, @@ -978,6 +1008,8 @@ int ompi_rte_finalize(void) opal_pmix_finalize_nspace_tracker(); + opal_finalize (); + return OMPI_SUCCESS; } diff --git a/ompi/runtime/ompi_spc.c b/ompi/runtime/ompi_spc.c index 4c0ed5a1b5e..2245568d729 100644 --- a/ompi/runtime/ompi_spc.c +++ b/ompi/runtime/ompi_spc.c @@ -9,6 +9,7 @@ * Copyright (c) 2019 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2020 IBM Corporation. All rights reserved. + * Copyright (c) 2022 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -166,6 +167,8 @@ static const ompi_spc_event_t ompi_spc_events_desc[OMPI_SPC_NUM_COUNTERS] = { "contained at once since the last reset of this counter. Note: This counter is reset each time it is read.", true, false), SET_COUNTER_ARRAY(OMPI_SPC_MAX_OOS_IN_QUEUE, "The maximum number of messages that the out of sequence message queue(s) within an MPI process " "contained at once since the last reset of this counter. Note: This counter is reset each time it is read.", true, false) + SET_COUNTER_ARRAY(OMPI_SPC_ISENDRECV, "The number of times MPI_Isendrecv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ISENDRECV_REPLACE, "The number of times MPI_Isendrecv_replace was called.", false, false), }; /* An array of event structures to store the event data (value, attachments, flags) */ diff --git a/ompi/runtime/ompi_spc.h b/ompi/runtime/ompi_spc.h index 374c50a5334..08009bdac5c 100644 --- a/ompi/runtime/ompi_spc.h +++ b/ompi/runtime/ompi_spc.h @@ -152,6 +152,8 @@ typedef enum ompi_spc_counters { OMPI_SPC_OOS_IN_QUEUE, OMPI_SPC_MAX_UNEXPECTED_IN_QUEUE, OMPI_SPC_MAX_OOS_IN_QUEUE, + OMPI_SPC_ISENDRECV, + OMPI_SPC_ISENDRECV_REPLACE, OMPI_SPC_NUM_COUNTERS /* This serves as the number of counters. It must be last. */ } ompi_spc_counters_t; diff --git a/ompi/runtime/params.h b/ompi/runtime/params.h index 0605a2786ac..247c8fcf728 100644 --- a/ompi/runtime/params.h +++ b/ompi/runtime/params.h @@ -16,7 +16,7 @@ * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2021 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ @@ -31,6 +31,8 @@ #include "ompi_config.h" +#include "ompi/runtime/mpiruntime.h" + BEGIN_C_DECLS /* @@ -180,6 +182,15 @@ OMPI_DECLSPEC extern bool ompi_mpi_spc_dump_enabled; */ OMPI_DECLSPEC extern uint32_t ompi_pmix_connect_timeout; + /** + * A boolean value that determines whether or not to enable runtime timing of + * init and finalize. + */ +OMPI_DECLSPEC extern bool ompi_enable_timing; + +OMPI_DECLSPEC extern int ompi_mpi_event_tick_rate; +OMPI_DECLSPEC extern bool ompi_mpi_yield_when_idle; + /** * Register MCA parameters used by the MPI layer. * @@ -190,6 +201,7 @@ OMPI_DECLSPEC extern uint32_t ompi_pmix_connect_timeout; */ OMPI_DECLSPEC int ompi_mpi_register_params(void); + /** * Display all MCA parameters used * diff --git a/ompi/tools/mpirun/Makefile.am b/ompi/tools/mpirun/Makefile.am index bcbc90fe95a..ab92d5cdf76 100644 --- a/ompi/tools/mpirun/Makefile.am +++ b/ompi/tools/mpirun/Makefile.am @@ -3,6 +3,7 @@ # Copyright (c) 2020 IBM Corporation. All rights reserved. # Copyright (c) 2021 Amazon.com, Inc. or its affiliates. # All Rights reserved. +# Copyright (c) 2021 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -11,10 +12,18 @@ # if OMPI_WANT_PRRTE + +bin_PROGRAMS = mpirun + +mpirun_SOURCES = \ + main.c + +mpirun_LDADD = \ + $(top_builddir)/opal/libopen-pal.la + install-exec-hook: - (cd $(DESTDIR)$(bindir); rm -f mpirun$(EXEEXT); $(LN_S) $(PRTE_PATH)$(EXEEXT) mpirun$(EXEEXT)) - (cd $(DESTDIR)$(bindir); rm -f mpiexec$(EXEEXT); $(LN_S) $(PRTE_PATH)$(EXEEXT) mpiexec$(EXEEXT)) - (cd $(DESTDIR)$(bindir); rm -f oshrun$(EXEEXT); $(LN_S) $(PRTE_PATH)$(EXEEXT) oshrun$(EXEEXT)) + (cd $(DESTDIR)$(bindir); rm -f mpiexec$(EXEEXT); $(LN_S) mpirun$(EXEEXT) mpiexec$(EXEEXT)) + (cd $(DESTDIR)$(bindir); rm -f oshrun$(EXEEXT); $(LN_S) mpirun$(EXEEXT) oshrun$(EXEEXT)) uninstall-local: rm -f $(DESTDIR)$(bindir)/mpirun$(EXEEXT) \ diff --git a/ompi/tools/mpirun/main.c b/ompi/tools/mpirun/main.c new file mode 100644 index 00000000000..c70397d148a --- /dev/null +++ b/ompi/tools/mpirun/main.c @@ -0,0 +1,125 @@ +/*************************************************************************** + * * + * Open MPI: Open Source High Performance Computing * + * * + * http://www.open-mpi.org/ * + * * + ***************************************************************************/ +#include "ompi_config.h" + +#include +#include +#include +#include +#if HAVE_SYS_STAT_H +# include +#endif /* HAVE_SYS_STAT_H */ + +#include "opal/mca/base/base.h" +#include "opal/mca/installdirs/base/base.h" +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" +#include "opal/util/os_dirpath.h" +#include "opal/util/os_path.h" +#include "opal/util/path.h" +#include "opal/util/printf.h" + +int main(int argc, char *argv[]) +{ + char *evar; +#if OPAL_USING_INTERNAL_PMIX || OMPI_USING_INTERNAL_PRRTE + char *pvar; +#endif + char **pargs = NULL; + char *pfx = NULL; + int m, param_len; + char *truepath; + + if (NULL != (evar = getenv("OPAL_PREFIX"))) { + +#if OMPI_USING_INTERNAL_PRRTE + opal_asprintf(&pvar, "PRTE_PREFIX=%s", evar); + putenv(pvar); +#endif + +#if OPAL_USING_INTERNAL_PMIX + opal_asprintf(&pvar, "PMIX_PREFIX=%s", evar); + putenv(pvar); +#endif + } + putenv("PRTE_MCA_schizo_proxy=ompi"); + + opal_argv_append_nosize(&pargs, "prterun"); + for (m=1; NULL != argv[m]; m++) { + opal_argv_append_nosize(&pargs, argv[m]); + /* Did the user specify a prefix, or want prefix by default? */ + if (0 == strcmp(argv[m], "--prefix")) { + opal_asprintf(&pfx, "%s%s", argv[m+1], "/bin"); + } + } + + if (NULL != pfx) { + /* "Parse" the param, aka remove superfluous path_sep. */ + param_len = strlen(pfx); + while (0 == strcmp(OPAL_PATH_SEP, &(pfx[param_len - 1]))) { + pfx[param_len - 1] = '\0'; + param_len--; + if (0 == param_len) { + fprintf(stderr, "A prefix was supplied to mpirun that only contained slashes.\n" + "This is a fatal error; mpirun will now abort.\nNo processes were launched.\n"); + exit(1); + } + } + } else if (opal_path_is_absolute(argv[0])) { + /* Check if called with fully-qualified path to mpirun. + * (Note: Put this second so can override with --prefix (above). */ + pfx = opal_dirname(argv[0]); +#if OMPI_USING_INTERNAL_PRRTE + } else { + /* in case --enable-prefix-by-default was given */ + mca_base_framework_open(&opal_installdirs_base_framework, 0); // fill in the installdirs + if (NULL != opal_install_dirs.bindir) { + pfx = strdup(opal_install_dirs.bindir); + } +#endif + } + + if (NULL == pfx) { + truepath = opal_path_findv("prterun", X_OK, environ, NULL); + } else { + truepath = opal_os_path(0, pfx, "prterun", NULL); + free(pfx); + } + + if (NULL == truepath) { + fprintf(stderr, "prterun executable could not be found - unable to run\n"); + exit(1); + } + + execve(truepath, pargs, environ); + fprintf(stderr, "The mpirun (\"%s\") cmd failed to exec its actual executable - your application will NOT execute. Error: %s\n", + truepath ? truepath : "NULL", strerror(errno)); + exit(1); +} + +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017-2020 Intel, Inc. All rights reserved. + * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ diff --git a/ompi/tools/mpisync/Makefile.am b/ompi/tools/mpisync/Makefile.am index 7e5549ae34b..97b0f6c07dc 100644 --- a/ompi/tools/mpisync/Makefile.am +++ b/ompi/tools/mpisync/Makefile.am @@ -26,26 +26,6 @@ # $HEADER$ # - - -AM_CFLAGS = \ - -DOPAL_CONFIGURE_USER="\"@OPAL_CONFIGURE_USER@\"" \ - -DOPAL_CONFIGURE_HOST="\"@OPAL_CONFIGURE_HOST@\"" \ - -DOPAL_CONFIGURE_DATE="\"@OPAL_CONFIGURE_DATE@\"" \ - -DOMPI_BUILD_USER="\"$$USER\"" \ - -DOMPI_BUILD_HOST="\"$${HOSTNAME:-`(hostname || uname -n) | sed 1q`}\"" \ - -DOMPI_BUILD_DATE="\"`$(top_srcdir)/config/getdate.sh`\"" \ - -DOMPI_BUILD_CFLAGS="\"@CFLAGS@\"" \ - -DOMPI_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ - -DOMPI_BUILD_CXXFLAGS="\"@CXXFLAGS@\"" \ - -DOMPI_BUILD_CXXCPPFLAGS="\"@CXXCPPFLAGS@\"" \ - -DOMPI_BUILD_FFLAGS="\"@FFLAGS@\"" \ - -DOMPI_BUILD_FCFLAGS="\"@FCFLAGS@\"" \ - -DOMPI_BUILD_LDFLAGS="\"@LDFLAGS@\"" \ - -DOMPI_BUILD_LIBS="\"@LIBS@\"" \ - -DOPAL_CC_ABSOLUTE="\"@OPAL_CC_ABSOLUTE@\"" \ - -DOMPI_CXX_ABSOLUTE="\"@OMPI_CXX_ABSOLUTE@\"" - include $(top_srcdir)/Makefile.ompi-rules man_pages = mpisync.1 diff --git a/ompi/tools/ompi_info/ompi_info.c b/ompi/tools/ompi_info/ompi_info.c index 01b33c933e1..cec93c4b431 100644 --- a/ompi/tools/ompi_info/ompi_info.c +++ b/ompi/tools/ompi_info/ompi_info.c @@ -96,8 +96,8 @@ int main(int argc, char *argv[]) } if (opal_cmd_line_is_taken(ompi_info_cmd_line, "version")) { - fprintf(stdout, "Open MPI v%s\n\n%s\n", - OPAL_VERSION, PACKAGE_BUGREPORT); + fprintf(stdout, "%s v%s\n\n%s\n", + PACKAGE_NAME, OPAL_VERSION, PACKAGE_BUGREPORT); exit(0); } diff --git a/ompi/tools/ompi_info/param.c b/ompi/tools/ompi_info/param.c index 785bc65b6ac..e435b603a3d 100644 --- a/ompi/tools/ompi_info/param.c +++ b/ompi/tools/ompi_info/param.c @@ -14,7 +14,7 @@ * Copyright (c) 2014-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * @@ -40,6 +40,7 @@ #include MCA_timer_IMPLEMENTATION_HEADER #include "opal/include/opal/version.h" +#include "opal/opal_portable_platform.h" #include "opal/class/opal_value_array.h" #include "opal/class/opal_pointer_array.h" #include "opal/util/printf.h" @@ -47,7 +48,6 @@ #include "opal/runtime/opal_info_support.h" #include "ompi/tools/ompi_info/ompi_info.h" -#include "ompi/include/mpi_portable_platform.h" const char *ompi_info_deprecated_value = "deprecated-ompi-info-value"; @@ -62,10 +62,10 @@ static void append(char *dest, size_t max, int *first, char *src) len = max - strlen(dest); if (!(*first)) { - strncat(dest, ", ", len); + strncat(dest, ", ", len - 1); len = max - strlen(dest); } - strncat(dest, src, len); + strncat(dest, src, len - 1); *first = 0; } @@ -287,7 +287,7 @@ void ompi_info_do_config(bool want_all) if (OMPI_FORTRAN_HAVE_IGNORE_TKR) { /* OMPI_FORTRAN_IGNORE_TKR_PREDECL is already in quotes; it - didn't work consistently to put it in _STRINGIFY because + didn't work consistently to put it in PLATFORM_STRINGIFY because sometimes the compiler would actually interpret the pragma in there before stringify-ing it. */ (void)opal_asprintf(&fortran_have_ignore_tkr, "yes (%s)", @@ -336,9 +336,9 @@ void ompi_info_do_config(bool want_all) opal_info_out("C compiler absolute", "compiler:c:absolute", OPAL_CC_ABSOLUTE); opal_info_out("C compiler family name", "compiler:c:familyname", - _STRINGIFY(OPAL_BUILD_PLATFORM_COMPILER_FAMILYNAME)); + PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYNAME)); opal_info_out("C compiler version", "compiler:c:version", - _STRINGIFY(OPAL_BUILD_PLATFORM_COMPILER_VERSION_STR)); + PLATFORM_COMPILER_VERSION_STR); if (want_all) { opal_info_out_int("C char size", "compiler:c:sizeof:char", sizeof(char)); diff --git a/ompi/tools/wrappers/mpifort-wrapper-data.txt.in b/ompi/tools/wrappers/mpifort-wrapper-data.txt.in index 188fbac84b8..73e51b134bf 100644 --- a/ompi/tools/wrappers/mpifort-wrapper-data.txt.in +++ b/ompi/tools/wrappers/mpifort-wrapper-data.txt.in @@ -13,6 +13,7 @@ compiler_env=FC compiler_flags_env=FCFLAGS compiler=@FC@ preprocessor_flags= +compiler_flags_prefix=@OMPI_WRAPPER_EXTRA_FCFLAGS_PREFIX@ compiler_flags=@OMPI_WRAPPER_EXTRA_FCFLAGS@ linker_flags=@OMPI_WRAPPER_EXTRA_FC_LDFLAGS@ # Note that per https://svn.open-mpi.org/trac/ompi/ticket/3422, we diff --git a/ompi/util/timings.h b/ompi/util/timings.h index b582466661e..55188f6563e 100644 --- a/ompi/util/timings.h +++ b/ompi/util/timings.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2021-2022 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,7 +13,6 @@ #define OMPI_UTIL_TIMING_H #include "opal/util/timings.h" -/* TODO: we need access to MPI_* functions */ #if (OPAL_ENABLE_TIMING) @@ -166,16 +166,17 @@ typedef struct ompi_timing_t { #define OMPI_TIMING_OUT \ do { \ if (OMPI_TIMING.enabled) { \ - int i, size, rank; \ - MPI_Comm_size(MPI_COMM_WORLD, &size); \ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ - int error = 0; \ + int i; \ + int size = ompi_comm_size(MPI_COMM_WORLD); \ + int rank = ompi_comm_rank(MPI_COMM_WORLD); \ + int timing_error = 0; \ int imported = 0; \ \ - MPI_Reduce(&OMPI_TIMING.error, &error, 1, \ - MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \ + MPI_COMM_WORLD->c_coll->coll_reduce(&OMPI_TIMING.error, &timing_error, 1, \ + MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD, \ + MPI_COMM_WORLD->c_coll->coll_reduce_module); \ \ - if (error) { \ + if (timing_error) { \ if (0 == rank) { \ printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \ } \ @@ -196,12 +197,15 @@ typedef struct ompi_timing_t { do { \ int use; \ for (use = 0; use < timing->use; use++) { \ - MPI_Reduce(&timing->val[use].ts, avg + i, 1, \ - MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); \ - MPI_Reduce(&timing->val[use].ts, min + i, 1, \ - MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); \ - MPI_Reduce(&timing->val[use].ts, max + i, 1, \ - MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); \ + MPI_COMM_WORLD->c_coll->coll_reduce(&timing->val[use].ts, avg + i, 1, \ + MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD, \ + MPI_COMM_WORLD->c_coll->coll_reduce_module); \ + MPI_COMM_WORLD->c_coll->coll_reduce(&timing->val[use].ts, min + i, 1, \ + MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD, \ + MPI_COMM_WORLD->c_coll->coll_reduce_module); \ + MPI_COMM_WORLD->c_coll->coll_reduce(&timing->val[use].ts, max + i, 1, \ + MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD, \ + MPI_COMM_WORLD->c_coll->coll_reduce_module); \ desc[i] = timing->val[use].desc; \ prefix[i] = timing->val[use].prefix; \ file[i] = timing->val[use].file; \ diff --git a/ompi/win/win.c b/ompi/win/win.c index b4bc150d893..70e70c978e8 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -17,6 +17,8 @@ * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -52,13 +54,13 @@ ompi_predefined_win_t *ompi_mpi_win_null_addr = &ompi_mpi_win_null; mca_base_var_enum_t *ompi_win_accumulate_ops = NULL; mca_base_var_enum_flag_t *ompi_win_accumulate_order = NULL; -static mca_base_var_enum_value_t accumulate_ops_values[] = { +static const mca_base_var_enum_value_t accumulate_ops_values[] = { {.value = OMPI_WIN_ACCUMULATE_OPS_SAME_OP_NO_OP, .string = "same_op_no_op",}, {.value = OMPI_WIN_ACCUMULATE_OPS_SAME_OP, .string = "same_op",}, {.value = -1, .string = NULL}, }; -static mca_base_var_enum_value_flag_t accumulate_order_flags[] = { +static const mca_base_var_enum_value_flag_t accumulate_order_flags[] = { {.flag = OMPI_WIN_ACC_ORDER_NONE, .string = "none", .conflicting_flag = OMPI_WIN_ACC_ORDER_RAR | OMPI_WIN_ACC_ORDER_WAR | OMPI_WIN_ACC_ORDER_RAW | OMPI_WIN_ACC_ORDER_WAW}, {.flag = OMPI_WIN_ACC_ORDER_RAR, .string = "rar", .conflicting_flag = OMPI_WIN_ACC_ORDER_NONE}, @@ -74,8 +76,40 @@ static void ompi_win_destruct(ompi_win_t *win); OBJ_CLASS_INSTANCE(ompi_win_t, opal_infosubscriber_t, ompi_win_construct, ompi_win_destruct); -int -ompi_win_init(void) + +static void ompi_win_dump (ompi_win_t *win) +{ + opal_output(0, "Dumping information for window: %s\n", win->w_name); + opal_output(0," Fortran window handle: %d, window size: %d\n", + win->w_f_to_c_index, ompi_group_size (win->w_group)); +} + +static int ompi_win_finalize(void) +{ + size_t size = opal_pointer_array_get_size (&ompi_mpi_windows); + /* start at 1 to skip win null */ + for (size_t i = 1 ; i < size ; ++i) { + ompi_win_t *win = + (ompi_win_t *) opal_pointer_array_get_item (&ompi_mpi_windows, i); + if (NULL != win) { + if (ompi_debug_show_handle_leaks && !ompi_win_invalid(win)){ + opal_output(0,"WARNING: MPI_Win still allocated in MPI_Finalize\n"); + ompi_win_dump (win); + } + ompi_win_free (win); + } + } + + OBJ_DESTRUCT(&ompi_mpi_win_null.win); + OBJ_DESTRUCT(&ompi_mpi_windows); + OBJ_RELEASE(ompi_win_accumulate_ops); + OBJ_RELEASE(ompi_win_accumulate_order); + + /* release a reference to the attributes subsys */ + return ompi_attr_put_ref(); +} + +int ompi_win_init (void) { int ret; @@ -106,36 +140,12 @@ ompi_win_init(void) return ret; } - return OMPI_SUCCESS; -} - -static void ompi_win_dump (ompi_win_t *win) -{ - opal_output(0, "Dumping information for window: %s\n", win->w_name); - opal_output(0," Fortran window handle: %d, window size: %d\n", - win->w_f_to_c_index, ompi_group_size (win->w_group)); -} - -int ompi_win_finalize(void) -{ - size_t size = opal_pointer_array_get_size (&ompi_mpi_windows); - /* start at 1 to skip win null */ - for (size_t i = 1 ; i < size ; ++i) { - ompi_win_t *win = - (ompi_win_t *) opal_pointer_array_get_item (&ompi_mpi_windows, i); - if (NULL != win) { - if (ompi_debug_show_handle_leaks && !ompi_win_invalid(win)){ - opal_output(0,"WARNING: MPI_Win still allocated in MPI_Finalize\n"); - ompi_win_dump (win); - } - ompi_win_free (win); - } + ret = ompi_attr_get_ref(); + if (OMPI_SUCCESS != ret) { + return ret; } - OBJ_DESTRUCT(&ompi_mpi_win_null.win); - OBJ_DESTRUCT(&ompi_mpi_windows); - OBJ_RELEASE(ompi_win_accumulate_ops); - OBJ_RELEASE(ompi_win_accumulate_order); + ompi_mpi_instance_append_finalize (ompi_win_finalize); return OMPI_SUCCESS; } @@ -152,7 +162,14 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int return OMPI_ERR_OUT_OF_RESOURCE; } - ret = opal_info_get_value_enum (info, "accumulate_ops", &acc_ops, + /* Copy the info for the info layer */ + win->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(win->super.s_info)); + } + + + ret = opal_info_get_value_enum (win->super.s_info, "accumulate_ops", &acc_ops, OMPI_WIN_ACCUMULATE_OPS_SAME_OP_NO_OP, ompi_win_accumulate_ops, &flag); if (OMPI_SUCCESS != ret) { @@ -162,7 +179,7 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int win->w_acc_ops = (ompi_win_accumulate_ops_t)acc_ops; - ret = opal_info_get_value_enum (info, "accumulate_order", &acc_order, + ret = opal_info_get_value_enum (win->super.s_info, "accumulate_order", &acc_order, OMPI_WIN_ACC_ORDER_RAR | OMPI_WIN_ACC_ORDER_WAR | OMPI_WIN_ACC_ORDER_RAW | OMPI_WIN_ACC_ORDER_WAW, &(ompi_win_accumulate_order->super), &flag); @@ -180,12 +197,6 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int OBJ_RETAIN(group); win->w_group = group; - /* Copy the info for the info layer */ - win->super.s_info = OBJ_NEW(opal_info_t); - if (info) { - opal_info_dup(info, &(win->super.s_info)); - } - *win_out = win; return OMPI_SUCCESS; @@ -243,7 +254,7 @@ ompi_win_create(void *base, size_t size, return ret; } - ret = ompi_osc_base_select(win, &base, size, disp_unit, comm, info, MPI_WIN_FLAVOR_CREATE, &model); + ret = ompi_osc_base_select(win, &base, size, disp_unit, comm, MPI_WIN_FLAVOR_CREATE, &model); if (OMPI_SUCCESS != ret) { OBJ_RELEASE(win); return ret; @@ -255,12 +266,14 @@ ompi_win_create(void *base, size_t size, return ret; } + /* MPI-4 §12.2.7 requires us to remove all unknown keys from the info object */ + opal_info_remove_unreferenced(win->super.s_info); + *newwin = win; return OMPI_SUCCESS; } - int ompi_win_allocate(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin) @@ -275,7 +288,7 @@ ompi_win_allocate(size_t size, int disp_unit, opal_info_t *info, return ret; } - ret = ompi_osc_base_select(win, &base, size, disp_unit, comm, info, MPI_WIN_FLAVOR_ALLOCATE, &model); + ret = ompi_osc_base_select(win, &base, size, disp_unit, comm, MPI_WIN_FLAVOR_ALLOCATE, &model); if (OMPI_SUCCESS != ret) { OBJ_RELEASE(win); return ret; @@ -287,13 +300,15 @@ ompi_win_allocate(size_t size, int disp_unit, opal_info_t *info, return ret; } + /* MPI-4 §12.2.7 requires us to remove all unknown keys from the info object */ + opal_info_remove_unreferenced(win->super.s_info); + *((void**) baseptr) = base; *newwin = win; return OMPI_SUCCESS; } - int ompi_win_allocate_shared(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin) @@ -308,7 +323,7 @@ ompi_win_allocate_shared(size_t size, int disp_unit, opal_info_t *info, return ret; } - ret = ompi_osc_base_select(win, &base, size, disp_unit, comm, info, MPI_WIN_FLAVOR_SHARED, &model); + ret = ompi_osc_base_select(win, &base, size, disp_unit, comm, MPI_WIN_FLAVOR_SHARED, &model); if (OMPI_SUCCESS != ret) { OBJ_RELEASE(win); return ret; @@ -320,13 +335,15 @@ ompi_win_allocate_shared(size_t size, int disp_unit, opal_info_t *info, return ret; } + /* MPI-4 §12.2.7 requires us to remove all unknown keys from the info object */ + opal_info_remove_unreferenced(win->super.s_info); + *((void**) baseptr) = base; *newwin = win; return OMPI_SUCCESS; } - int ompi_win_create_dynamic(opal_info_t *info, ompi_communicator_t *comm, ompi_win_t **newwin) { @@ -339,7 +356,7 @@ ompi_win_create_dynamic(opal_info_t *info, ompi_communicator_t *comm, ompi_win_t return ret; } - ret = ompi_osc_base_select(win, MPI_BOTTOM, 0, 1, comm, info, MPI_WIN_FLAVOR_DYNAMIC, &model); + ret = ompi_osc_base_select(win, MPI_BOTTOM, 0, 1, comm, MPI_WIN_FLAVOR_DYNAMIC, &model); if (OMPI_SUCCESS != ret) { OBJ_RELEASE(win); return ret; @@ -351,12 +368,14 @@ ompi_win_create_dynamic(opal_info_t *info, ompi_communicator_t *comm, ompi_win_t return ret; } + /* MPI-4 §12.2.7 requires us to remove all unknown keys from the info object */ + opal_info_remove_unreferenced(win->super.s_info); + *newwin = win; return OMPI_SUCCESS; } - int ompi_win_free(ompi_win_t *win) { diff --git a/ompi/win/win.h b/ompi/win/win.h index 63aec9de14a..33c0a48a873 100644 --- a/ompi/win/win.h +++ b/ompi/win/win.h @@ -15,6 +15,8 @@ * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -131,7 +133,6 @@ OMPI_DECLSPEC extern ompi_predefined_win_t ompi_mpi_win_null; OMPI_DECLSPEC extern ompi_predefined_win_t *ompi_mpi_win_null_addr; int ompi_win_init(void); -int ompi_win_finalize(void); int ompi_win_create(void *base, size_t size, int disp_unit, ompi_communicator_t *comm, opal_info_t *info, @@ -141,7 +142,6 @@ int ompi_win_allocate(size_t size, int disp_unit, opal_info_t *info, int ompi_win_allocate_shared(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin); int ompi_win_create_dynamic(opal_info_t *info, ompi_communicator_t *comm, ompi_win_t **newwin); - int ompi_win_free(ompi_win_t *win); OMPI_DECLSPEC int ompi_win_set_name(ompi_win_t *win, const char *win_name); diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index e08265b42bc..365ddc45a1e 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -455,29 +455,6 @@ int32_t opal_convertor_set_position_nocheck(opal_convertor_t *convertor, size_t return rc; } -static size_t opal_datatype_compute_remote_size(const opal_datatype_t *pData, const size_t *sizes) -{ - uint32_t typeMask = pData->bdt_used; - size_t length = 0; - - if (opal_datatype_is_predefined(pData)) { - return sizes[pData->desc.desc->elem.common.type]; - } - - if (OPAL_UNLIKELY(NULL == pData->ptypes)) { - /* Allocate and fill the array of types used in the datatype description */ - opal_datatype_compute_ptypes((opal_datatype_t *) pData); - } - - for (int i = OPAL_DATATYPE_FIRST_TYPE; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++) { - if (typeMask & ((uint32_t) 1 << i)) { - length += (pData->ptypes[i] * sizes[i]); - typeMask ^= ((uint32_t) 1 << i); - } - } - return length; -} - /** * Compute the remote size. If necessary remove the homogeneous flag * and redirect the convertor description toward the non-optimized @@ -496,9 +473,9 @@ size_t opal_convertor_compute_remote_size(opal_convertor_t *pConvertor) } if (0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE)) { /* This is for a single datatype, we must update it with the count */ - pConvertor->remote_size = opal_datatype_compute_remote_size(datatype, - pConvertor->master - ->remote_sizes); + pConvertor->remote_size = + opal_datatype_compute_remote_size(datatype, + pConvertor->master->remote_sizes); pConvertor->remote_size *= pConvertor->count; } } diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index 7dabd1742c0..5f7fc53fa7d 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -311,6 +311,15 @@ OPAL_DECLSPEC int32_t opal_datatype_copy_content_same_ddt(const opal_datatype_t OPAL_DECLSPEC int opal_datatype_compute_ptypes(opal_datatype_t *datatype); +/* + * Compute the size of the datatype using a specific set of predefined type sizes. + * This function allows to compute the size of a packed buffer without creating + * a fully fledged specialized convertor for the remote peer. + */ +OPAL_DECLSPEC size_t +opal_datatype_compute_remote_size(const opal_datatype_t *pData, + const size_t *sizes); + /* Compute the span in memory of count datatypes. This function help with temporary * memory allocations for receiving already typed data (such as those used for reduce * operations). This span is the distance between the minimum and the maximum byte diff --git a/opal/datatype/opal_datatype_get_count.c b/opal/datatype/opal_datatype_get_count.c index 202601d97a2..fed344d1bbd 100644 --- a/opal/datatype/opal_datatype_get_count.c +++ b/opal/datatype/opal_datatype_get_count.c @@ -223,3 +223,27 @@ int opal_datatype_compute_ptypes(opal_datatype_t *datatype) } } } + +size_t opal_datatype_compute_remote_size(const opal_datatype_t *pData, const size_t *sizes) +{ + uint32_t typeMask = pData->bdt_used; + size_t length = 0; + + if (opal_datatype_is_predefined(pData)) { + return sizes[pData->desc.desc->elem.common.type]; + } + + if (OPAL_UNLIKELY(NULL == pData->ptypes)) { + /* Allocate and fill the array of types used in the datatype description */ + opal_datatype_compute_ptypes((opal_datatype_t *) pData); + } + + for (int i = OPAL_DATATYPE_FIRST_TYPE; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++) { + if (typeMask & ((uint32_t) 1 << i)) { + length += (pData->ptypes[i] * sizes[i]); + typeMask ^= ((uint32_t) 1 << i); + } + } + return length; +} + diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index b97f2548b8a..b37ee9b31de 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -547,7 +547,6 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec dt_stack_t *pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ size_t count_desc; /* the number of items already done in the actual pos_desc */ - uint16_t type = OPAL_DATATYPE_MAX_PREDEFINED; /* type at current position */ size_t total_unpacked = 0; /* total size unpacked this time */ dt_elem_desc_t *description; dt_elem_desc_t *pElem; @@ -588,7 +587,6 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec while (1) { while (pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) { /* now here we have a basic datatype */ - type = description[pos_desc].elem.common.type; OPAL_DATATYPE_SAFEGUARD_POINTER(conv_ptr + pElem->elem.disp, pData->size, pConvertor->pBaseBuf, pData, pConvertor->count); DO_DEBUG(opal_output(0, @@ -596,7 +594,7 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec (void *) iov_ptr, iov_len_local, (void *) pConvertor->pBaseBuf, conv_ptr + pElem->elem.disp - pConvertor->pBaseBuf, count_desc, description[pos_desc].elem.extent, - opal_datatype_basicDatatypes[type]->name);); + opal_datatype_basicDatatypes[description[pos_desc].elem.common.type]->name);); unpack_predefined_heterogeneous(pConvertor, pElem, &count_desc, &conv_ptr, &iov_ptr, &iov_len_local); if (0 == count_desc) { /* completed */ diff --git a/opal/include/opal/Makefile.am b/opal/include/opal/Makefile.am index f0389a25815..ed657307caf 100644 --- a/opal/include/opal/Makefile.am +++ b/opal/include/opal/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2021 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -27,7 +28,8 @@ headers += \ opal/prefetch.h \ opal/hash_string.h \ opal/frameworks.h \ - opal/opal_portable_platform.h + opal/opal_portable_platform.h \ + opal/opal_portable_platform_real.h nodist_headers += \ opal/version.h diff --git a/opal/include/opal/align.h b/opal/include/opal/align.h index 8351668bbee..5df9d9485ea 100644 --- a/opal/include/opal/align.h +++ b/opal/include/opal/align.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -22,11 +22,11 @@ #ifndef OPAL_ALIGN_H #define OPAL_ALIGN_H -#define OPAL_DOWN_ALIGN(x, a, t) ((x) & ~(((t)(a) -1))) -#define OPAL_DOWN_ALIGN_PTR(x, a, t) ((t) OPAL_DOWN_ALIGN((uintptr_t) x, a, uintptr_t)) -#define OPAL_ALIGN(x, a, t) (((x) + ((t)(a) -1)) & ~(((t)(a) -1))) -#define OPAL_ALIGN_PTR(x, a, t) ((t) OPAL_ALIGN((uintptr_t) x, a, uintptr_t)) -#define OPAL_ALIGN_PAD_AMOUNT(x, s) ((~((uintptr_t)(x)) + 1) & ((uintptr_t)(s) -1)) +#define OPAL_DOWN_ALIGN(x,a,t) ((x) & ~(((t)(a)-1))) +#define OPAL_DOWN_ALIGN_PTR(x,a,t) ((t)OPAL_DOWN_ALIGN((uintptr_t)x, a, uintptr_t)) +#define OPAL_ALIGN(x,a,t) (((x)+((t)(a)-1)) & ~(((t)(a)-1))) +#define OPAL_ALIGN_PTR(x,a,t) ((t)OPAL_ALIGN((uintptr_t)x, a, uintptr_t)) +#define OPAL_ALIGN_PAD_AMOUNT(x,s) ((~((uintptr_t)(x))+1) & ((uintptr_t)(s)+(!(uintptr_t)(s))-1)) #if __STDC_VERSION__ >= 201101L # include diff --git a/opal/include/opal/opal_portable_platform.h b/opal/include/opal/opal_portable_platform.h index 2cbc012697d..fc09d04fdaf 100644 --- a/opal/include/opal/opal_portable_platform.h +++ b/opal/include/opal/opal_portable_platform.h @@ -1,396 +1,26 @@ /* - * Header file with preprocessor magic to figure out, which compiler the user has been calling! + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * $COPYRIGHT$ * - * This code is adapted from the file other/portable_platform.h of GASnet-1.14.0: - * - Ripping out the required parts. - * - Get rid of brackets as it messes up autoconf - * - Delete version tests for older PGI versions (#include "omp.h" not acceptabe) - * - Indent ('#' should be in column 0) + * Additional copyrights may follow * - * External packages (i.e., romio) depend on top_build_dir/ompi/include, therefore - * although this is not changed in the configure process, this has to be set as - * a .in file... - * --------------------------------------------------------------------------- - */ -#ifndef OPAL_PORTABLE_PLATFORM_H -#define OPAL_PORTABLE_PLATFORM_H - -/* All files in this directory and all sub-directories (except where otherwise noted) - * are subject to the following licensing terms: - * - * --------------------------------------------------------------------------- - * "Copyright (c) 2000-2003 The Regents of the University of California. - * All rights reserved. - * - * Permission to use, copy, modify, and distribute this software and its - * documentation for any purpose, without fee, and without written agreement is - * hereby granted, provided that the above copyright notice and the following - * two paragraphs appear in all copies of this software. - * - * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT - * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF - * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS - * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO - * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS." - * --------------------------------------------------------------------------- - * - * Please see the license.txt files within the gm-conduit, lapi-conduit and - * vapi-conduit directories for the licensing terms governing those - * contributed components. - * - * The authors/contributors of GASNet include: - * - * Dan Bonachea : - * General infrastructure & documentation - * mpi-conduit - * elan-conduit - * smp-conduit - * udp-conduit - * extended-ref - * template-conduit - * Christian Bell : gm-conduit, shmem-conduit - * Mike Welcome : lapi-conduit, portals-conduit - * Paul H. Hargrove : vapi-conduit, ibv-conduit - * Rajesh Nishtala : collectives, dcmf-conduit - * Parry Husbands (PJRHusbands@lbl.gov): lapi-conduit - * - * For more information about GASNet, visit our home page at: - * http://gasnet.cs.berkeley.edu/ - * Or send email to: - * - * - * Source code contributions (fixes, patches, extensions etc.) should be - * sent to to be reviewed for acceptance into the primary - * distribution. Contributions are most likely to be accepted if they - * are provided as public domain, or under a BSD-style license such as - * the one above. + * $HEADER$ * + * Wrapper around GASNet's gasnet_portable_platform.h to avoid + * compiler warnings */ -#ifndef _STRINGIFY -# define _STRINGIFY_HELPER(x) # x -# define _STRINGIFY(x) _STRINGIFY_HELPER(x) -#endif - -#if defined(__INTEL_COMPILER) -# define PLATFORM_COMPILER_FAMILYNAME INTEL -# define PLATFORM_COMPILER_FAMILYID 2 -# ifdef __cplusplus -# define PLATFORM_COMPILER_INTEL_CXX 1 -# else -# define PLATFORM_COMPILER_INTEL_C 1 -# endif -# define _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE \ - 19700000 /* year 1970: predates most intel products :) */ -# ifdef __INTEL_COMPILER_BUILD_DATE -# define _PLATFORM_INTEL_COMPILER_BUILD_DATE __INTEL_COMPILER_BUILD_DATE -# else -# define _PLATFORM_INTEL_COMPILER_BUILD_DATE _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE -# endif -/* patch number is a decimal build date: YYYYMMDD */ -# define PLATFORM_COMPILER_VERSION_INT(maj, min, pat) \ - (((((maj) *10) | (min)) << 20) \ - | ((pat) < _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE \ - ? _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE \ - : ((pat) -_PLATFORM_COMPILER_INTEL_MIN_BUILDDATE))) -# define PLATFORM_COMPILER_VERSION \ - PLATFORM_COMPILER_VERSION_INT(__INTEL_COMPILER / 10, __INTEL_COMPILER / 100, \ - _PLATFORM_INTEL_COMPILER_BUILD_DATE) -# define PLATFORM_COMPILER_VERSION_STR \ - _STRINGIFY(__INTEL_COMPILER) "." _STRINGIFY(_PLATFORM_INTEL_COMPILER_BUILD_DATE) - -#elif defined(__PATHSCALE__) -# define PLATFORM_COMPILER_PATHSCALE 1 -# define PLATFORM_COMPILER_FAMILYNAME PATHSCALE -# define PLATFORM_COMPILER_FAMILYID 3 -# ifdef __cplusplus -# define PLATFORM_COMPILER_PATHSCALE_CXX 1 -# else -# define PLATFORM_COMPILER_PATHSCALE_C 1 -# endif -# define PLATFORM_COMPILER_VERSION \ - PLATFORM_COMPILER_VERSION_INT(__PATHCC__, __PATHCC_MINOR__, __PATHCC_PATCHLEVEL__) -# define PLATFORM_COMPILER_VERSION_STR __PATHSCALE__ - -#elif defined(__PGI) -# define PLATFORM_COMPILER_PGI 1 -# define PLATFORM_COMPILER_FAMILYNAME PGI -# define PLATFORM_COMPILER_FAMILYID 4 -# ifdef __cplusplus -# define PLATFORM_COMPILER_PGI_CXX 1 -# else -# define PLATFORM_COMPILER_PGI_C 1 -# endif -# if __PGIC__ == 99 -/* bug 2230: PGI versioning was broken for some platforms in 7.0 - no way to know exact version, but provide something slightly more accurate */ -# define PLATFORM_COMPILER_VERSION 0x070000 -# define PLATFORM_COMPILER_VERSION_STR "7.?-?" -# elif defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define PLATFORM_COMPILER_VERSION \ - PLATFORM_COMPILER_VERSION_INT(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -# define PLATFORM_COMPILER_VERSION_STR \ - _STRINGIFY(__PGIC__) "." _STRINGIFY(__PGIC_MINOR__) "-" _STRINGIFY(__PGIC_PATCHLEVEL__) -# else -/* PGI before 6.1-4 lacks any version ID preprocessor macros - so use this filthy hack */ -/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - * We cannot do these within mpi.h.in, as we should not include ompi.h - * Hopefully, compilers with integrated preprocessors will not analyse code within the #if 0-block - * XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - */ -# if 0 -# ifdef PLATFORM_PGI_IS_ANCIENT - /* Include below might fail for ancient versions lacking this header, but testing shows it - works back to at least 5.1-3 (Nov 2003), and based on docs probably back to 3.2 (Sep 2000) */ -# define PLATFORM_COMPILER_VERSION 0 -# elif defined( \ - __x86_64__) /* bug 1753 - 64-bit omp.h upgrade happenned in <6.0-8,6.1-1) */ -# include "omp.h" -# if defined(_PGOMP_H) - /* 6.1.1 or newer */ -# define PLATFORM_COMPILER_VERSION 0x060101 -# define PLATFORM_COMPILER_VERSION_STR ">=6.1-1" -# else - /* 6.0.8 or older */ -# define PLATFORM_COMPILER_VERSION 0 -# define PLATFORM_COMPILER_VERSION_STR "<=6.0-8" -# endif -# else /* 32-bit omp.h upgrade happenned in <5.2-4,6.0-8 */ -# include "omp.h" -# if defined(_PGOMP_H) - /* 6.0-8 or newer */ -# define PLATFORM_COMPILER_VERSION 0x060008 -# define PLATFORM_COMPILER_VERSION_STR ">=6.0-8" -# else - /* 5.2-4 or older */ -# define PLATFORM_COMPILER_VERSION 0 -# define PLATFORM_COMPILER_VERSION_STR "<=5.2-4" -# endif -# endif -# endif /* 0 */ -/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -# endif - -#elif defined(__xlC__) || defined(__ibmxl__) || defined(__IBMC__) || defined(__IBMCPP__) -# define PLATFORM_COMPILER_XLC 1 -# define PLATFORM_COMPILER_FAMILYNAME XLC -# define PLATFORM_COMPILER_FAMILYID 5 -# ifdef __cplusplus -# define PLATFORM_COMPILER_XLC_CXX 1 -# else -# define PLATFORM_COMPILER_XLC_C 1 -# endif -# define PLATFORM_COMPILER_VERSION __xlC__ -# define PLATFORM_COMPILER_VERSION_INT(maj, min, pat) (((maj) << 8) | ((min) << 4) | (pat)) -#elif defined(__DECC) || defined(__DECCXX) -# define PLATFORM_COMPILER_COMPAQ 1 -# define PLATFORM_COMPILER_FAMILYNAME COMPAQ -# define PLATFORM_COMPILER_FAMILYID 6 -# ifdef __cplusplus -# define PLATFORM_COMPILER_COMPAQ_CXX 1 -# else -# define PLATFORM_COMPILER_COMPAQ_C 1 -# endif -# if defined(__DECC_VER) -# define PLATFORM_COMPILER_VERSION __DECC_VER -# elif defined(__DECCXX_VER) -# define PLATFORM_COMPILER_VERSION __DECCXX_VER -# endif - -# define PLATFORM_COMPILER_VERSION_INT(maj, min, pat) \ - (((maj) *10000000) + ((min) *100000) + (90000) + (pat)) -/* 90000 = official ver, 80000 = customer special ver, 60000 = field test ver */ - -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) -# define PLATFORM_COMPILER_SUN 1 -# define PLATFORM_COMPILER_FAMILYNAME SUN -# define PLATFORM_COMPILER_FAMILYID 7 -# ifdef __cplusplus -# define PLATFORM_COMPILER_SUN_CXX 1 -# else -# define PLATFORM_COMPILER_SUN_C 1 -# endif -# if defined(__SUNPRO_C) && __SUNPRO_C > 0 -# define PLATFORM_COMPILER_VERSION __SUNPRO_C -# elif defined(__SUNPRO_CC) && __SUNPRO_CC > 0 -# define PLATFORM_COMPILER_VERSION __SUNPRO_CC -# endif -# define PLATFORM_COMPILER_VERSION_INT(maj, min, pat) (((maj) << 8) | ((min) << 4) | (pat)) - -#elif defined(__HP_cc) || defined(__HP_aCC) -# define PLATFORM_COMPILER_HP 1 -# define PLATFORM_COMPILER_FAMILYNAME HP -# define PLATFORM_COMPILER_FAMILYID 8 -# ifdef __cplusplus -# define PLATFORM_COMPILER_HP_CXX 1 -# else -# define PLATFORM_COMPILER_HP_C 1 -# endif -# if defined(__HP_cc) && __HP_cc > 0 -# define PLATFORM_COMPILER_VERSION __HP_cc -# elif defined(__HP_aCC) && __HP_aCC > 0 -# define PLATFORM_COMPILER_VERSION __HP_aCC -# endif -# define PLATFORM_COMPILER_VERSION_INT(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) - -#elif defined(_SGI_COMPILER_VERSION) \ - || (defined(_COMPILER_VERSION) && defined(__sgi) \ - && !defined(__GNUC__)) /* 7.3.0 and earlier lack _SGI_COMPILER_VERSION */ -# define PLATFORM_COMPILER_SGI 1 -# define PLATFORM_COMPILER_FAMILYNAME SGI -# define PLATFORM_COMPILER_FAMILYID 9 -# ifdef __cplusplus -# define PLATFORM_COMPILER_SGI_CXX 1 -# else -# define PLATFORM_COMPILER_SGI_C 1 -# endif -# if defined(_SGI_COMPILER_VERSION) && _SGI_COMPILER_VERSION > 0 -# define PLATFORM_COMPILER_VERSION _SGI_COMPILER_VERSION -# elif defined(_COMPILER_VERSION) && _COMPILER_VERSION > 0 -# define PLATFORM_COMPILER_VERSION _COMPILER_VERSION -# endif -# define PLATFORM_COMPILER_VERSION_INT(maj, min, pat) (((maj) << 8) | ((min) << 4) | (pat)) - -#elif defined(_CRAYC) -# define PLATFORM_COMPILER_CRAY 1 -# define PLATFORM_COMPILER_FAMILYNAME CRAY -# define PLATFORM_COMPILER_FAMILYID 10 -# ifdef __cplusplus -# define PLATFORM_COMPILER_CRAY_CXX 1 -# else -# define PLATFORM_COMPILER_CRAY_C 1 -# endif -# if defined(_RELEASE) && defined(_RELEASE_MINOR) /* X1 and XT */ -# define PLATFORM_COMPILER_VERSION PLATFORM_COMPILER_VERSION_INT(_RELEASE, _RELEASE_MINOR, 0) -# elif defined(_RELEASE) /* T3E */ -# define PLATFORM_COMPILER_VERSION PLATFORM_COMPILER_VERSION_INT(_RELEASE, 0, 0) -# endif -# ifdef _RELEASE_STRING /* X1 and XT */ -# define PLATFORM_COMPILER_VERSION_STR _RELEASE_STRING -# endif - -#elif defined(__KCC) -# define PLATFORM_COMPILER_KAI 1 -# define PLATFORM_COMPILER_FAMILYNAME KAI -# define PLATFORM_COMPILER_FAMILYID 11 -# ifdef __cplusplus -# define PLATFORM_COMPILER_KAI_CXX 1 -# else -# define PLATFORM_COMPILER_KAI_C 1 -# endif - -#elif defined(__MTA__) -# define PLATFORM_COMPILER_MTA 1 -# define PLATFORM_COMPILER_FAMILYNAME MTA -# define PLATFORM_COMPILER_FAMILYID 12 -# ifdef __cplusplus -# define PLATFORM_COMPILER_MTA_CXX 1 -# else -# define PLATFORM_COMPILER_MTA_C 1 -# endif - -#elif defined(_SX) -# define PLATFORM_COMPILER_NECSX 1 -# define PLATFORM_COMPILER_FAMILYNAME NECSX -# define PLATFORM_COMPILER_FAMILYID 13 -# ifdef __cplusplus -# define PLATFORM_COMPILER_NECSX_CXX 1 -# else -# define PLATFORM_COMPILER_NECSX_C 1 -# endif - -#elif defined(_MSC_VER) -# define PLATFORM_COMPILER_MICROSOFT 1 -# define PLATFORM_COMPILER_FAMILYNAME MICROSOFT -# define PLATFORM_COMPILER_FAMILYID 14 -# ifdef __cplusplus -# define PLATFORM_COMPILER_MICROSOFT_CXX 1 -# else -# define PLATFORM_COMPILER_MICROSOFT_C 1 -# endif -# define PLATFORM_COMPILER_VERSION _MSC_VER - -#elif defined(__TINYC__) -# define PLATFORM_COMPILER_TINY 1 -# define PLATFORM_COMPILER_FAMILYNAME TINY -# define PLATFORM_COMPILER_FAMILYID 15 -# ifdef __cplusplus -# define PLATFORM_COMPILER_TINY_CXX 1 -# else -# define PLATFORM_COMPILER_TINY_C 1 -# endif - -#elif defined(__LCC__) -# define PLATFORM_COMPILER_LCC 1 -# define PLATFORM_COMPILER_FAMILYNAME LCC -# define PLATFORM_COMPILER_FAMILYID 16 -# ifdef __cplusplus -# define PLATFORM_COMPILER_LCC_CXX 1 -# else -# define PLATFORM_COMPILER_LCC_C 1 -# endif - -#else /* unknown compiler */ -# define PLATFORM_COMPILER_UNKNOWN 1 -#endif +#ifndef OPAL_PORTABLE_PLATFORM_H +#define OPAL_PORTABLE_PLATFORM_H 1 -/* this stanza comes last, because many vendor compilers lie and claim - to be GNU C for compatibility reasons and/or because they share a frontend */ -#if defined(__GNUC__) -# undef PLATFORM_COMPILER_UNKNOWN -# ifndef PLATFORM_COMPILER_FAMILYID -# define PLATFORM_COMPILER_GNU 1 -# define PLATFORM_COMPILER_FAMILYNAME GNU -# define PLATFORM_COMPILER_FAMILYID 1 -# ifdef __cplusplus -# define PLATFORM_COMPILER_GNU_CXX 1 -# else -# define PLATFORM_COMPILER_GNU_C 1 -# endif -# if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) -# define PLATFORM_COMPILER_VERSION \ - PLATFORM_COMPILER_VERSION_INT(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -# elif defined(__GNUC_MINOR__) /* older versions of egcs lack __GNUC_PATCHLEVEL__ */ -# define PLATFORM_COMPILER_VERSION \ - PLATFORM_COMPILER_VERSION_INT(__GNUC__, __GNUC_MINOR__, 0) -# else -# define PLATFORM_COMPILER_VERSION PLATFORM_COMPILER_VERSION_INT(__GNUC__, 0, 0) -# endif -# define PLATFORM_COMPILER_VERSION_STR __PLATFORM_COMPILER_GNU_VERSION_STR -# else -# define _PLATFORM_COMPILER_GNU_VERSION_STR __PLATFORM_COMPILER_GNU_VERSION_STR -# endif -/* gather any advertised GNU version number info, even for non-gcc compilers */ -# if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) -# define __PLATFORM_COMPILER_GNU_VERSION_STR \ - _STRINGIFY(__GNUC__) "." _STRINGIFY(__GNUC_MINOR__) "." _STRINGIFY(__GNUC_PATCHLEVEL__) -# elif defined(__GNUC_MINOR__) -# define __PLATFORM_COMPILER_GNU_VERSION_STR \ - _STRINGIFY(__GNUC__) "." _STRINGIFY(__GNUC_MINOR__) ".?" -# else -# define __PLATFORM_COMPILER_GNU_VERSION_STR _STRINGIFY(__GNUC__) ".?.?" -# endif -#elif defined(PLATFORM_COMPILER_UNKNOWN) /* unknown compiler */ -# define PLATFORM_COMPILER_FAMILYNAME UNKNOWN -# define PLATFORM_COMPILER_FAMILYID 0 +#ifndef _PORTABLE_PLATFORM_H +#define _PORTABLE_PLATFORM_H 0 #endif - -/* Default Values */ -#ifndef PLATFORM_COMPILER_VERSION -# define PLATFORM_COMPILER_VERSION 0 /* don't know */ +#ifndef PLATFORM_HEADER_VERSION +#define PLATFORM_HEADER_VERSION 0 #endif -#ifndef PLATFORM_COMPILER_VERSION_STR -# define PLATFORM_COMPILER_VERSION_STR _STRINGIFY(PLATFORM_COMPILER_VERSION) -#endif +#include "opal/opal_portable_platform_real.h" -#ifndef PLATFORM_COMPILER_VERSION_INT -# define PLATFORM_COMPILER_VERSION_INT(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) #endif - -#endif /* OPAL_PORTABLE_PLATFORM_H */ diff --git a/opal/include/opal/opal_portable_platform_real.h b/opal/include/opal/opal_portable_platform_real.h new file mode 100644 index 00000000000..70e73cebc7e --- /dev/null +++ b/opal/include/opal/opal_portable_platform_real.h @@ -0,0 +1,1161 @@ +/* $Source: bitbucket.org:berkeleylab/gasnet.git/other/portable_platform.h $ + * Description: Portable platform detection header + * Copyright 2006, Dan Bonachea + * Copyright 2018, The Regents of the University of California + * Terms of Use: In ADDITION to the license information in license.txt, + * anyone redistributing this header agrees not to change any part of this notice, or + * the version handshake in the header versioning section below. + * Furthermore, redistributed copies of any portion of this header must + * not appear within files named "portable_platform.h" or "gasnet_portable_platform.h", + * unless it is embedded within a complete copy of the GASNet distribution. + * These restrictions are designed to prevent conflicts for end users + * who compose multiple projects using the PLATFORM_ namespace. + * + * The canonical version of this header is hosted in the GASNet project at: + * https://bitbucket.org/berkeleylab/gasnet + * + * Developers who clone this header into their own project are HIGHLY encouraged to + * contribute any improvements (especially addition of new platforms) back to the + * canonical version, for the benefit of the community. + * Contributions and bug reports should be directed to: + * https://gasnet-bugs.lbl.gov or gasnet-staff@lbl.gov + */ +/* ------------------------------------------------------------------------------------ */ +/* Header versioning: DO NOT CHANGE ANYTHING IN THIS SECTION + * The license terms for this header prohibit modifying this section in ANY way. + Clones should continue to advertise a PLATFORM_HEADER_VERSION equal to the canonical version they cloned, + and should not modify the handshake logic which ensures the highest canonical header version is used. + */ +/* Publish and enforce version number for the public interface to this header */ +/* YOU ARE NOT PERMITTED TO CHANGE THIS SECTION WITHOUT DIRECT APPROVAL FROM DAN BONACHEA */ +#if _PORTABLE_PLATFORM_H != PLATFORM_HEADER_VERSION \ + || PLATFORM_HEADER_VERSION < 16 +#undef PLATFORM_HEADER_VERSION +#define PLATFORM_HEADER_VERSION 16 +#undef _PORTABLE_PLATFORM_H +#define _PORTABLE_PLATFORM_H PLATFORM_HEADER_VERSION +/* End Header versioning handshake */ +/* ------------------------------------------------------------------------------------ */ + +/* make sure that previously-included older/broken clones of this header do not pollute our namespace */ +#undef PLATFORM_COMPILER_FAMILYNAME +#undef PLATFORM_COMPILER_FAMILYID +#undef PLATFORM_COMPILER_ID +#undef PLATFORM_COMPILER_VERSION +#undef PLATFORM_COMPILER_VERSION_STR +#undef PLATFORM_COMPILER_VERSION_INT +#undef PLATFORM_COMPILER_IDSTR +#undef PLATFORM_COMPILER_VERSION_GT +#undef PLATFORM_COMPILER_VERSION_GE +#undef PLATFORM_COMPILER_VERSION_EQ +#undef PLATFORM_COMPILER_VERSION_LE +#undef PLATFORM_COMPILER_VERSION_LT +#undef PLATFORM_COMPILER_C_LANGLVL +#undef PLATFORM_COMPILER_CXX_LANGLVL +#undef PLATFORM_COMPILER_INTEL +#undef PLATFORM_COMPILER_INTEL_C +#undef PLATFORM_COMPILER_INTEL_CXX +#undef PLATFORM_COMPILER_PATHSCALE +#undef PLATFORM_COMPILER_PATHSCALE_C +#undef PLATFORM_COMPILER_PATHSCALE_CXX +#undef PLATFORM_COMPILER_PGI +#undef PLATFORM_COMPILER_PGI_C +#undef PLATFORM_COMPILER_PGI_CXX +#undef PLATFORM_COMPILER_XLC +#undef PLATFORM_COMPILER_XLC_C +#undef PLATFORM_COMPILER_XLC_CXX +#undef PLATFORM_COMPILER_COMPAQ +#undef PLATFORM_COMPILER_COMPAQ_C +#undef PLATFORM_COMPILER_COMPAQ_CXX +#undef PLATFORM_COMPILER_SUN +#undef PLATFORM_COMPILER_SUN_C +#undef PLATFORM_COMPILER_SUN_CXX +#undef PLATFORM_COMPILER_HP +#undef PLATFORM_COMPILER_HP_C +#undef PLATFORM_COMPILER_HP_CXX +#undef PLATFORM_COMPILER_SGI +#undef PLATFORM_COMPILER_SGI_C +#undef PLATFORM_COMPILER_SGI_CXX +#undef PLATFORM_COMPILER_CRAY +#undef PLATFORM_COMPILER_CRAY_C +#undef PLATFORM_COMPILER_CRAY_CXX +#undef PLATFORM_COMPILER_KAI +#undef PLATFORM_COMPILER_KAI_C +#undef PLATFORM_COMPILER_KAI_CXX +#undef PLATFORM_COMPILER_MTA +#undef PLATFORM_COMPILER_MTA_C +#undef PLATFORM_COMPILER_MTA_CXX +#undef PLATFORM_COMPILER_NECSX +#undef PLATFORM_COMPILER_NECSX_C +#undef PLATFORM_COMPILER_NECSX_CXX +#undef PLATFORM_COMPILER_MICROSOFT +#undef PLATFORM_COMPILER_MICROSOFT_C +#undef PLATFORM_COMPILER_MICROSOFT_CXX +#undef PLATFORM_COMPILER_TINY +#undef PLATFORM_COMPILER_TINY_C +#undef PLATFORM_COMPILER_TINY_CXX +#undef PLATFORM_COMPILER_LCC +#undef PLATFORM_COMPILER_LCC_C +#undef PLATFORM_COMPILER_LCC_CXX +#undef PLATFORM_COMPILER_OPEN64 +#undef PLATFORM_COMPILER_OPEN64_C +#undef PLATFORM_COMPILER_OPEN64_CXX +#undef PLATFORM_COMPILER_PCC +#undef PLATFORM_COMPILER_PCC_C +#undef PLATFORM_COMPILER_PCC_CXX +#undef PLATFORM_COMPILER_CLANG +#undef PLATFORM_COMPILER_CLANG_C +#undef PLATFORM_COMPILER_CLANG_CXX +#undef PLATFORM_COMPILER_NVHPC +#undef PLATFORM_COMPILER_NVHPC_C +#undef PLATFORM_COMPILER_NVHPC_CXX +#undef PLATFORM_COMPILER_GNU +#undef PLATFORM_COMPILER_GNU_C +#undef PLATFORM_COMPILER_GNU_CXX +#undef PLATFORM_COMPILER_UNKNOWN + +#undef PLATFORM_OS_FAMILYNAME +#undef PLATFORM_OS_CATAMOUNT +#undef PLATFORM_OS_CNL +#undef PLATFORM_OS_BGP +#undef PLATFORM_OS_BGQ +#undef PLATFORM_OS_WSL +#undef PLATFORM_OS_K42 +#undef PLATFORM_OS_UCLINUX +#undef PLATFORM_OS_LINUX +#undef PLATFORM_OS_BLRTS +#undef PLATFORM_OS_CYGWIN +#undef PLATFORM_OS_MSWINDOWS +#undef PLATFORM_OS_AIX +#undef PLATFORM_OS_TRU64 +#undef PLATFORM_OS_FREEBSD +#undef PLATFORM_OS_NETBSD +#undef PLATFORM_OS_OPENBSD +#undef PLATFORM_OS_SOLARIS +#undef PLATFORM_OS_DARWIN +#undef PLATFORM_OS_IRIX +#undef PLATFORM_OS_HPUX +#undef PLATFORM_OS_UNICOS +#undef PLATFORM_OS_MTA +#undef PLATFORM_OS_SUPERUX +#undef PLATFORM_OS_UNKNOWN + +#undef PLATFORM_ARCH_FAMILYNAME +#undef PLATFORM_ARCH_32 +#undef _PLATFORM_ARCH_32 +#undef PLATFORM_ARCH_64 +#undef _PLATFORM_ARCH_64 +#undef PLATFORM_ARCH_BIG_ENDIAN +#undef _PLATFORM_ARCH_BIG_ENDIAN +#undef PLATFORM_ARCH_LITTLE_ENDIAN +#undef _PLATFORM_ARCH_LITTLE_ENDIAN +#undef PLATFORM_ARCH_POWERPC +#undef PLATFORM_ARCH_MIC +#undef PLATFORM_ARCH_X86_64 +#undef PLATFORM_ARCH_IA64 +#undef PLATFORM_ARCH_X86 +#undef PLATFORM_ARCH_ALPHA +#undef PLATFORM_ARCH_MIPS +#undef PLATFORM_ARCH_SPARC +#undef PLATFORM_ARCH_PARISC +#undef PLATFORM_ARCH_CRAYX1 +#undef PLATFORM_ARCH_CRAYT3E +#undef PLATFORM_ARCH_MTA +#undef PLATFORM_ARCH_NECSX +#undef PLATFORM_ARCH_MICROBLAZE +#undef PLATFORM_ARCH_ARM +#undef PLATFORM_ARCH_AARCH64 +#undef PLATFORM_ARCH_TILE +#undef PLATFORM_ARCH_S390 +#undef PLATFORM_ARCH_UNKNOWN + +/* prevent known old/broken versions of this header from loading */ +#undef OMPI_PORTABLE_PLATFORM_H +#define OMPI_PORTABLE_PLATFORM_H +#undef OPAL_PORTABLE_PLATFORM_H +#define OPAL_PORTABLE_PLATFORM_H + +/* ------------------------------------------------------------------------------------ */ +/* most of this file was written based on information in vendor documents, system headers, + and inspecting verbose compiler output. + Another useful source of information: http://predef.sourceforge.net/ +*/ + +/* ------------------------------------------------------------------------------------ */ +/* helpers */ + +#undef _PLATFORM_STRINGIFY_HELPER +#define _PLATFORM_STRINGIFY_HELPER(x) #x +#undef PLATFORM_STRINGIFY +#define PLATFORM_STRINGIFY(x) _PLATFORM_STRINGIFY_HELPER(x) + +/* ------------------------------------------------------------------------------------ */ +/* Compiler detection */ +/* + PLATFORM_COMPILER_: + defined to 1 if compiler is a given family, undef otherwise + PLATFORM_COMPILER__C + PLATFORM_COMPILER__CXX + defined to 1 if compiler is a given family, and is the C or C++ compiler, respectively + PLATFORM_COMPILER_FAMILYNAME: + unquoted token which provides the compiler family name + PLATFORM_COMPILER_FAMILYID: + defined to a positive integral value which is unique to a given compiler family + or zero if the compiler is unrecognized + PLATFORM_COMPILER_ID: + same as PLATFORM_COMPILER_FAMILYID, except C and C++ compilers are differentiated + PLATFORM_COMPILER_VERSION: + defined to an integral expression which is guaranteed to be monotonically non-decreasing + with increasing compiler versions. Will be zero for unrecognized compilers. + The exact encoding of compiler version tuples into this constant may occasionally + change when this header is upgraded, so code should use the (in)equality macros below + to check against particular compiler versions, instead of embedding an encoded constant. + PLATFORM_COMPILER_VERSION_STR: + A string representation of the compiler version, which may contain additional info + PLATFORM_COMPILER_VERSION_[GT,GE,EQ,LE,LT](maj,min,pat): + evaluate to non-zero iff the compiler version in use is respectively + greater-than, greater-or-equal, equal, less-or-equal, less-than + the provided version components + PLATFORM_COMPILER_IDSTR: + a string which uniquely identifies recognized compilers + PLATFORM_COMPILER_C_LANGLVL and PLATFORM_COMPILER_CXX_LANGLVL: (in PLATFORM_HEADER_VERSION >= 5) + defined to a positive integral value corresponding to the C or C++ (respectively) + language standard to which the current compiler advertises conformance. + Otherwise undef (in particular at most one of these is defined in a given compilation). +*/ + +#if defined(__INTEL_COMPILER) + #define PLATFORM_COMPILER_INTEL 1 + #define PLATFORM_COMPILER_FAMILYNAME INTEL + #define PLATFORM_COMPILER_FAMILYID 2 + #ifdef __cplusplus + #define PLATFORM_COMPILER_INTEL_CXX 1 + #else + #define PLATFORM_COMPILER_INTEL_C 1 + #endif + /* Intel compiler version "patch number" + * ------------------------------------- + * Intel compiler versioning is unfortunately complicated by behavioral changes. + * Versions prior to Intel 14.0.0 (Sept 2013) lacked a preprocessor symbol to supply the "update" number. + * Version 14.0.0 and later supply a __INTEL_COMPILER_UPDATE symbol, but sadly several releases of Version 19 + * report the wrong value in this field (bug 3876). + * For now, the "patch" field of the PLATFORM_COMPILER_VERSION for Intel is the release package BUILD DATE, + * in the same decimal YYYYMMDD format as __INTEL_COMPILER_BUILD_DATE, as this is the only indicator that has + * remained reliably stable/correct across versions. + * So for example to check for icc --version "19.0.1.144 20181018" or later, pass: + * PLATFORM_COMPILER_VERSION_GE(19, 0, 20181018) + * NOTE 1: this build-date is unfortunately OS-DEPENDENT, sometimes differing by several days or weeks + * between the Linux and OSX releases. For a complete mapping, see: + * https://software.intel.com/en-us/articles/intel-compiler-and-composer-update-version-numbers-to-compiler-version-number-mapping + * NOTE 2: some of the build-date entries in the table linked above have been observed to be incorrect, + * so when possible it's safest to verify the build-date from `icc --version` on both Linux and macOS. + */ + #undef _PLATFORM_INTEL_COMPILER_BUILD_DATE + #undef _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE + #define _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE 19900000 /* year 1990: corresponds roughly to Intel v4.5 (1992) */ + /* MIN_BUILDDATE is used to normalize build dates to a bit-saving range for the encoding + * Intel officially supports the current release and two prior (currently back to 2016) + * Our 1990 floor corresponds to Intel v4.x that only worked on MS-DOS and predates both Linux and BSD-based macOS + */ + #ifdef __INTEL_COMPILER_BUILD_DATE + #define _PLATFORM_INTEL_COMPILER_BUILD_DATE __INTEL_COMPILER_BUILD_DATE + #else + #define _PLATFORM_INTEL_COMPILER_BUILD_DATE _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE + #endif + /* Intel patch number is a decimal build date: YYYYMMDD - do NOT pass the "update number" */ + #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ + (((((maj) * 100) + (min)) << 19) | \ + ((pat) < _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE ? \ + 0 : ((pat)-_PLATFORM_COMPILER_INTEL_MIN_BUILDDATE))) + #undef _PLATFORM__INTEL_COMPILER + #if __INTEL_COMPILER == 9999 /* Seen in 20110811 release of 12.1.0 - overflows VERSION_INT() */ + #define _PLATFORM__INTEL_COMPILER 1201 + #else + #define _PLATFORM__INTEL_COMPILER __INTEL_COMPILER + #endif + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(_PLATFORM__INTEL_COMPILER/100, _PLATFORM__INTEL_COMPILER%100, _PLATFORM_INTEL_COMPILER_BUILD_DATE) + #define PLATFORM_COMPILER_VERSION_STR \ + PLATFORM_STRINGIFY(_PLATFORM__INTEL_COMPILER) "." PLATFORM_STRINGIFY(_PLATFORM_INTEL_COMPILER_BUILD_DATE) + +#elif defined(__PATHSCALE__) + #define PLATFORM_COMPILER_PATHSCALE 1 + #define PLATFORM_COMPILER_FAMILYNAME PATHSCALE + #define PLATFORM_COMPILER_FAMILYID 3 + #ifdef __cplusplus + #define PLATFORM_COMPILER_PATHSCALE_CXX 1 + #else + #define PLATFORM_COMPILER_PATHSCALE_C 1 + #endif + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(__PATHCC__,__PATHCC_MINOR__,__PATHCC_PATCHLEVEL__+0) + #define PLATFORM_COMPILER_VERSION_STR __PATHSCALE__ + +#elif defined(__NVCOMPILER) // Must occur prior to PGI and CLANG + #define PLATFORM_COMPILER_NVHPC 1 + #define PLATFORM_COMPILER_FAMILYNAME NVHPC + #define PLATFORM_COMPILER_FAMILYID 20 + #ifdef __cplusplus + #define PLATFORM_COMPILER_NVHPC_CXX 1 + #else + #define PLATFORM_COMPILER_NVHPC_C 1 + #endif + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(__NVCOMPILER_MAJOR__,__NVCOMPILER_MINOR__,__NVCOMPILER_PATCHLEVEL__) + #define PLATFORM_COMPILER_VERSION_STR \ + PLATFORM_STRINGIFY(__NVCOMPILER_MAJOR__) "." PLATFORM_STRINGIFY(__NVCOMPILER_MINOR__) "-" PLATFORM_STRINGIFY(__NVCOMPILER_PATCHLEVEL__) + +#elif defined(__PGI) + #define PLATFORM_COMPILER_PGI 1 + #define PLATFORM_COMPILER_FAMILYNAME PGI + #define PLATFORM_COMPILER_FAMILYID 4 + #ifdef __cplusplus + #define PLATFORM_COMPILER_PGI_CXX 1 + #else + #define PLATFORM_COMPILER_PGI_C 1 + #endif + #if __PGIC__ == 99 + /* bug 2230: PGI versioning was broken for some platforms in 7.0 + no way to know exact version, but provide something slightly more accurate */ + #define PLATFORM_COMPILER_VERSION 0x070000 + #define PLATFORM_COMPILER_VERSION_STR "7.?-?" + #elif defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(__PGIC__,__PGIC_MINOR__,__PGIC_PATCHLEVEL__) + #define PLATFORM_COMPILER_VERSION_STR \ + PLATFORM_STRINGIFY(__PGIC__) "." PLATFORM_STRINGIFY(__PGIC_MINOR__) "-" PLATFORM_STRINGIFY(__PGIC_PATCHLEVEL__) + #else + /* PGI before 6.1-4 lacks any version ID preprocessor macros - so use this filthy hack */ + #ifdef PLATFORM_PGI_IS_ANCIENT + /* Include below might fail for ancient versions lacking this header, but testing shows it + works back to at least 5.1-3 (Nov 2003), and based on docs probably back to 3.2 (Sep 2000) */ + #define PLATFORM_COMPILER_VERSION 0 + #elif defined(__x86_64__) /* bug 1753 - 64-bit omp.h upgrade happenned in <6.0-8,6.1-1] */ + #include "omp.h" + #if defined(_PGOMP_H) + /* 6.1.1 or newer */ + #define PLATFORM_COMPILER_VERSION 0x060101 + #define PLATFORM_COMPILER_VERSION_STR ">=6.1-1" + #else + /* 6.0.8 or older */ + #define PLATFORM_COMPILER_VERSION 0 + #define PLATFORM_COMPILER_VERSION_STR "<=6.0-8" + #endif + #else /* 32-bit omp.h upgrade happenned in <5.2-4,6.0-8] */ + #include "omp.h" + #if defined(_PGOMP_H) + /* 6.0-8 or newer */ + #define PLATFORM_COMPILER_VERSION 0x060008 + #define PLATFORM_COMPILER_VERSION_STR ">=6.0-8" + #else + /* 5.2-4 or older */ + #define PLATFORM_COMPILER_VERSION 0 + #define PLATFORM_COMPILER_VERSION_STR "<=5.2-4" + #endif + #endif + #endif + +#elif defined(__xlC__) || defined(__ibmxl__) + #define PLATFORM_COMPILER_XLC 1 + #define PLATFORM_COMPILER_FAMILYNAME XLC + #define PLATFORM_COMPILER_FAMILYID 5 + #ifdef __cplusplus + #define PLATFORM_COMPILER_XLC_CXX 1 + #else + #define PLATFORM_COMPILER_XLC_C 1 + #endif + #ifdef __ibmxl_version__ + #define PLATFORM_COMPILER_VERSION \ + (__ibmxl_version__ << 24 | __ibmxl_release__ << 16 | \ + __ibmxl_modification__ << 8 | __ibmxl_ptf_fix_level__) + #define PLATFORM_COMPILER_VERSION_STR \ + PLATFORM_STRINGIFY(__ibmxl_version__) "." PLATFORM_STRINGIFY(__ibmxl_release__) "." PLATFORM_STRINGIFY(__ibmxl_modification__) "." PLATFORM_STRINGIFY(__ibmxl_ptf_fix_level__) + #else + #ifdef __xlC_ver__ + #define PLATFORM_COMPILER_VERSION (__xlC__ << 16 | __xlC_ver__) + #else + #define PLATFORM_COMPILER_VERSION (__xlC__ << 16) + #endif + #ifdef __xlc__ + #define PLATFORM_COMPILER_VERSION_STR __xlc__ + #else + #define PLATFORM_COMPILER_VERSION_STR PLATFORM_STRINGIFY(__xlC__) + #endif + #endif + #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ + ( ((maj) << 24) | ((min) << 16) | ((pat) << 8) ) + +#elif defined(__DECC) || defined(__DECCXX) + #define PLATFORM_COMPILER_COMPAQ 1 + #define PLATFORM_COMPILER_FAMILYNAME COMPAQ + #define PLATFORM_COMPILER_FAMILYID 6 + #ifdef __cplusplus + #define PLATFORM_COMPILER_COMPAQ_CXX 1 + #else + #define PLATFORM_COMPILER_COMPAQ_C 1 + #endif + #if defined(__DECC_VER) + #define PLATFORM_COMPILER_VERSION __DECC_VER + #elif defined(__DECCXX_VER) + #define PLATFORM_COMPILER_VERSION __DECCXX_VER + #endif + + #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ + ( ((maj) * 10000000) + ((min) * 100000) + (90000) + (pat) ) + /* 90000 = official ver, 80000 = customer special ver, 60000 = field test ver */ + +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) + #define PLATFORM_COMPILER_SUN 1 + #define PLATFORM_COMPILER_FAMILYNAME SUN + #define PLATFORM_COMPILER_FAMILYID 7 + #ifdef __cplusplus + #define PLATFORM_COMPILER_SUN_CXX 1 + #else + #define PLATFORM_COMPILER_SUN_C 1 + #endif + #if defined(__SUNPRO_C) && __SUNPRO_C > 0 + #define PLATFORM_COMPILER_VERSION __SUNPRO_C + #elif defined(__SUNPRO_CC) && __SUNPRO_CC > 0 + #define PLATFORM_COMPILER_VERSION __SUNPRO_CC + #endif + /* Sun version numbers look like hex but are actually a sloppy concatenation of decimal version numbers + * leading to weird discontinuities in the version space, luckily it remains monotonic (so far) + */ + #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) ( \ + (min) < 10 ? \ + ( ((maj) << 8) | ((min) << 4) | (pat) ) : \ + ( ((maj) << 12) | (((min)/10) << 8) | (((min)%10) << 4) | (pat) ) ) + +#elif defined(__HP_cc) || defined(__HP_aCC) + #define PLATFORM_COMPILER_HP 1 + #define PLATFORM_COMPILER_FAMILYNAME HP + #define PLATFORM_COMPILER_FAMILYID 8 + #ifdef __cplusplus + #define PLATFORM_COMPILER_HP_CXX 1 + #else + #define PLATFORM_COMPILER_HP_C 1 + #endif + #if defined(__HP_cc) && __HP_cc > 0 + #define PLATFORM_COMPILER_VERSION __HP_cc + #elif defined(__HP_aCC) && __HP_aCC > 0 + #define PLATFORM_COMPILER_VERSION __HP_aCC + #endif + #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ + ( ((maj) << 16) | ((min) << 8) | (pat) ) + +#elif defined(_SGI_COMPILER_VERSION) || \ + (defined(_COMPILER_VERSION) && defined(__sgi) && !defined(__GNUC__)) /* 7.3.0 and earlier lack _SGI_COMPILER_VERSION */ + #define PLATFORM_COMPILER_SGI 1 + #define PLATFORM_COMPILER_FAMILYNAME SGI + #define PLATFORM_COMPILER_FAMILYID 9 + #ifdef __cplusplus + #define PLATFORM_COMPILER_SGI_CXX 1 + #else + #define PLATFORM_COMPILER_SGI_C 1 + #endif + #if defined(_SGI_COMPILER_VERSION) && _SGI_COMPILER_VERSION > 0 + #define PLATFORM_COMPILER_VERSION _SGI_COMPILER_VERSION + #elif defined(_COMPILER_VERSION) && _COMPILER_VERSION > 0 + #define PLATFORM_COMPILER_VERSION _COMPILER_VERSION + #endif + #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ + ( ((maj) << 8) | ((min) << 4) | (pat) ) + +#elif defined(_CRAYC) + #define PLATFORM_COMPILER_CRAY 1 + #define PLATFORM_COMPILER_FAMILYNAME CRAY + #define PLATFORM_COMPILER_FAMILYID 10 + #ifdef __cplusplus + #define PLATFORM_COMPILER_CRAY_CXX 1 + #else + #define PLATFORM_COMPILER_CRAY_C 1 + #endif + #if defined(_RELEASE_MAJOR) && defined(_RELEASE_MINOR) /* XE, XK, XC */ + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(_RELEASE_MAJOR,_RELEASE_MINOR,0) + #elif defined(_RELEASE) && defined(_RELEASE_MINOR) /* X1 and XT */ + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(_RELEASE,_RELEASE_MINOR,0) + #elif defined(_RELEASE) /* T3E */ + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(_RELEASE,0,0) + #endif + #ifdef _RELEASE_STRING /* X1 and XT, XK, XC */ + #define PLATFORM_COMPILER_VERSION_STR _RELEASE_STRING + #endif + +#elif defined(__KCC) + #define PLATFORM_COMPILER_KAI 1 + #define PLATFORM_COMPILER_FAMILYNAME KAI + #define PLATFORM_COMPILER_FAMILYID 11 + #ifdef __cplusplus + #define PLATFORM_COMPILER_KAI_CXX 1 + #else + #define PLATFORM_COMPILER_KAI_C 1 + #endif + +#elif defined(__MTA__) + #define PLATFORM_COMPILER_MTA 1 + #define PLATFORM_COMPILER_FAMILYNAME MTA + #define PLATFORM_COMPILER_FAMILYID 12 + #ifdef __cplusplus + #define PLATFORM_COMPILER_MTA_CXX 1 + #else + #define PLATFORM_COMPILER_MTA_C 1 + #endif + +#elif defined(_SX) + #define PLATFORM_COMPILER_NECSX 1 + #define PLATFORM_COMPILER_FAMILYNAME NECSX + #define PLATFORM_COMPILER_FAMILYID 13 + #ifdef __cplusplus + #define PLATFORM_COMPILER_NECSX_CXX 1 + #else + #define PLATFORM_COMPILER_NECSX_C 1 + #endif + +#elif defined(_MSC_VER) + #define PLATFORM_COMPILER_MICROSOFT 1 + #define PLATFORM_COMPILER_FAMILYNAME MICROSOFT + #define PLATFORM_COMPILER_FAMILYID 14 + #ifdef __cplusplus + #define PLATFORM_COMPILER_MICROSOFT_CXX 1 + #else + #define PLATFORM_COMPILER_MICROSOFT_C 1 + #endif + #define PLATFORM_COMPILER_VERSION _MSC_VER + +#elif defined(__TINYC__) + #define PLATFORM_COMPILER_TINY 1 + #define PLATFORM_COMPILER_FAMILYNAME TINY + #define PLATFORM_COMPILER_FAMILYID 15 + #ifdef __cplusplus + #define PLATFORM_COMPILER_TINY_CXX 1 + #else + #define PLATFORM_COMPILER_TINY_C 1 + #endif + +#elif defined(__LCC__) + #define PLATFORM_COMPILER_LCC 1 + #define PLATFORM_COMPILER_FAMILYNAME LCC + #define PLATFORM_COMPILER_FAMILYID 16 + #ifdef __cplusplus + #define PLATFORM_COMPILER_LCC_CXX 1 + #else + #define PLATFORM_COMPILER_LCC_C 1 + #endif + +#elif defined(__OPENCC__) + #define PLATFORM_COMPILER_OPEN64 1 + #define PLATFORM_COMPILER_FAMILYNAME OPEN64 + #define PLATFORM_COMPILER_FAMILYID 17 + #ifdef __cplusplus + #define PLATFORM_COMPILER_OPEN64_CXX 1 + #else + #define PLATFORM_COMPILER_OPEN64_C 1 + #endif + /* Note: can't use __OPENCC_PATCHLEVEL__ because it is sometimes non-integer (eg 3.2). + Adding a cast would not result in a preprocessor constant expression. */ + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(__OPENCC__,__OPENCC_MINOR__,0) + #define PLATFORM_COMPILER_VERSION_STR __OPEN64__ + +#elif defined(__PCC__) + #define PLATFORM_COMPILER_PCC 1 + #define PLATFORM_COMPILER_FAMILYNAME PCC + #define PLATFORM_COMPILER_FAMILYID 18 + #ifdef __cplusplus + #define PLATFORM_COMPILER_PCC_CXX 1 + #else + #define PLATFORM_COMPILER_PCC_C 1 + #endif + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(__PCC__,__PCC_MINOR__,__PCC_MINORMINOR__) + #define PLATFORM_COMPILER_VERSION_STR \ + PLATFORM_STRINGIFY(__PCC__) "." PLATFORM_STRINGIFY(__PCC_MINOR__) "." PLATFORM_STRINGIFY(__PCC_MINORMINOR__) + +#elif defined(__clang__) + #define PLATFORM_COMPILER_CLANG 1 + #define PLATFORM_COMPILER_FAMILYNAME CLANG + #define PLATFORM_COMPILER_FAMILYID 19 + #ifdef __cplusplus + #define PLATFORM_COMPILER_CLANG_CXX 1 + #else + #define PLATFORM_COMPILER_CLANG_C 1 + #endif + #ifdef __clang_version__ + /* clang 2.7 (gcc 4.2.1 compliant) and earlier lacked specific version identification */ + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(__clang_major__,__clang_minor__,__clang_patchlevel__) + #define PLATFORM_COMPILER_VERSION_STR __clang_version__ + #endif + +// NOTE: PLATFORM_COMPILER_FAMILYID "20" is allocted to NVHPC, appearing earlier + +#else /* unknown compiler */ + #define PLATFORM_COMPILER_UNKNOWN 1 +#endif + +/* this stanza comes last, because many vendor compilers lie and claim + to be GNU C for compatibility reasons and/or because they share a frontend */ +#undef _PLATFORM_COMPILER_GNU_VERSION_STR +#undef __PLATFORM_COMPILER_GNU_VERSION_STR +#if defined(__GNUC__) + #undef PLATFORM_COMPILER_UNKNOWN + #ifndef PLATFORM_COMPILER_FAMILYID + #define PLATFORM_COMPILER_GNU 1 + #define PLATFORM_COMPILER_FAMILYNAME GNU + #define PLATFORM_COMPILER_FAMILYID 1 + #ifdef __cplusplus + #define PLATFORM_COMPILER_GNU_CXX 1 + #else + #define PLATFORM_COMPILER_GNU_C 1 + #endif + #if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__) + #elif defined(__GNUC_MINOR__) /* older versions of egcs lack __GNUC_PATCHLEVEL__ */ + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(__GNUC__,__GNUC_MINOR__,0) + #else + #define PLATFORM_COMPILER_VERSION \ + PLATFORM_COMPILER_VERSION_INT(__GNUC__,0,0) + #endif + #define PLATFORM_COMPILER_VERSION_STR __PLATFORM_COMPILER_GNU_VERSION_STR + #else + #define _PLATFORM_COMPILER_GNU_VERSION_STR __PLATFORM_COMPILER_GNU_VERSION_STR + #endif + /* gather any advertised GNU version number info, even for non-gcc compilers */ + #if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) + #define __PLATFORM_COMPILER_GNU_VERSION_STR \ + PLATFORM_STRINGIFY(__GNUC__) "." PLATFORM_STRINGIFY(__GNUC_MINOR__) "." PLATFORM_STRINGIFY(__GNUC_PATCHLEVEL__) + #elif defined(__GNUC_MINOR__) + #define __PLATFORM_COMPILER_GNU_VERSION_STR \ + PLATFORM_STRINGIFY(__GNUC__) "." PLATFORM_STRINGIFY(__GNUC_MINOR__) ".?" + #else + #define __PLATFORM_COMPILER_GNU_VERSION_STR \ + PLATFORM_STRINGIFY(__GNUC__) ".?.?" + #endif +#elif defined(PLATFORM_COMPILER_UNKNOWN) /* unknown compiler */ + #define PLATFORM_COMPILER_FAMILYNAME UNKNOWN + #define PLATFORM_COMPILER_FAMILYID 0 +#endif + +/* defaulting */ + +#ifndef PLATFORM_COMPILER_VERSION +#define PLATFORM_COMPILER_VERSION 0 /* don't know */ +#endif + +#ifndef PLATFORM_COMPILER_VERSION_STR +#define PLATFORM_COMPILER_VERSION_STR PLATFORM_STRINGIFY(PLATFORM_COMPILER_VERSION) +#endif + +#ifndef PLATFORM_COMPILER_VERSION_INT +#define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ + (((maj) << 16) | ((min) << 8) | (pat)) +#endif + +/* version check macros */ + +#define PLATFORM_COMPILER_VERSION_GT(maj,min,pat) \ + PLATFORM_COMPILER_VERSION > PLATFORM_COMPILER_VERSION_INT(maj,min,pat) +#define PLATFORM_COMPILER_VERSION_GE(maj,min,pat) \ + PLATFORM_COMPILER_VERSION >= PLATFORM_COMPILER_VERSION_INT(maj,min,pat) +#define PLATFORM_COMPILER_VERSION_EQ(maj,min,pat) \ + PLATFORM_COMPILER_VERSION == PLATFORM_COMPILER_VERSION_INT(maj,min,pat) +#define PLATFORM_COMPILER_VERSION_LE(maj,min,pat) \ + PLATFORM_COMPILER_VERSION <= PLATFORM_COMPILER_VERSION_INT(maj,min,pat) +#define PLATFORM_COMPILER_VERSION_LT(maj,min,pat) \ + PLATFORM_COMPILER_VERSION < PLATFORM_COMPILER_VERSION_INT(maj,min,pat) + +/* misc feature detection */ + +#ifdef __cplusplus + #define PLATFORM_COMPILER_ID (10000+PLATFORM_COMPILER_FAMILYID) +#else + #define PLATFORM_COMPILER_ID PLATFORM_COMPILER_FAMILYID +#endif + +/* default language spec conformance detection */ +#if !defined(PLATFORM_COMPILER_C_LANGLVL) && !defined(PLATFORM_COMPILER_CXX_LANGLVL) + #if defined(__cplusplus) && (__cplusplus > 0) /* C++98 or newer */ + #define PLATFORM_COMPILER_CXX_LANGLVL __cplusplus + #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ > 0) /* C95 or newer */ + #define PLATFORM_COMPILER_C_LANGLVL __STDC_VERSION__ + #elif defined(__STDC__) && !defined(__cplusplus) && !defined(__STDC_VERSION__) /* C89/C90 */ + #define PLATFORM_COMPILER_C_LANGLVL 199000L + #else + /* unknown - leave both undef */ + #endif +#endif + +#undef _PLATFORM_COMPILER_STD_STDC +#ifdef __STDC__ + #define _PLATFORM_COMPILER_STD_STDC "__STDC__" +#else + #define _PLATFORM_COMPILER_STD_STDC "-" +#endif +#undef _PLATFORM_COMPILER_STD_STDC_VERSION +#ifdef __STDC_VERSION__ + #define _PLATFORM_COMPILER_STD_STDC_VERSION ",__STDC_VERSION__=" PLATFORM_STRINGIFY(__STDC_VERSION__) +#else + #define _PLATFORM_COMPILER_STD_STDC_VERSION +#endif +#undef _PLATFORM_COMPILER_STD_STDC_EXT +#ifdef __STDC_EXT__ + #define _PLATFORM_COMPILER_STD_STDC_EXT ",__STDC_EXT__=" PLATFORM_STRINGIFY(__STDC_EXT__) +#else + #define _PLATFORM_COMPILER_STD_STDC_EXT +#endif +#undef _PLATFORM_COMPILER_STD_CPLUSPLUS +#ifdef __cplusplus + #define _PLATFORM_COMPILER_STD_CPLUSPLUS ",__cplusplus=" PLATFORM_STRINGIFY(__cplusplus) +#else + #define _PLATFORM_COMPILER_STD_CPLUSPLUS +#endif + +#undef _PLATFORM_COMPILER_MISC_VERSION_STR +#ifndef _PLATFORM_COMPILER_MISC_VERSION_STR + #ifdef __VERSION__ + #define _PLATFORM_COMPILER_MISC_VERSION_STR "|misc:" __VERSION__ + #else + #define _PLATFORM_COMPILER_MISC_VERSION_STR + #endif +#endif +#undef _PLATFORM_COMPILER_GNU_VERSION_STR_HELP +#ifdef _PLATFORM_COMPILER_GNU_VERSION_STR + #define _PLATFORM_COMPILER_GNU_VERSION_STR_HELP "|GNU:" _PLATFORM_COMPILER_GNU_VERSION_STR +#else + #define _PLATFORM_COMPILER_GNU_VERSION_STR_HELP +#endif + +#define PLATFORM_COMPILER_IDSTR \ + "|COMPILER_FAMILY:" \ + PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYNAME) \ + "|COMPILER_VERSION:" PLATFORM_COMPILER_VERSION_STR \ + "|COMPILER_FAMILYID:" \ + PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYID) \ + _PLATFORM_COMPILER_GNU_VERSION_STR_HELP \ + "|STD:" _PLATFORM_COMPILER_STD_STDC \ + _PLATFORM_COMPILER_STD_STDC_VERSION \ + _PLATFORM_COMPILER_STD_STDC_EXT \ + _PLATFORM_COMPILER_STD_CPLUSPLUS \ + _PLATFORM_COMPILER_MISC_VERSION_STR \ + "|" + +/* ------------------------------------------------------------------------------------ */ +/* OS detection */ +/* + PLATFORM_OS_: + defined to a positive value if OS belongs to a given family, undef otherwise + PLATFORM_OS_FAMILYNAME: + unquoted token which provides the compiler family name +*/ + +#if defined(__LIBCATAMOUNT__) || defined(__QK_USER__) + #define PLATFORM_OS_CATAMOUNT 1 + #define PLATFORM_OS_FAMILYNAME CATAMOUNT + +#elif defined(__CRAYXT_COMPUTE_LINUX_TARGET) + #define PLATFORM_OS_CNL 1 + #define PLATFORM_OS_FAMILYNAME CNL + +#elif defined(GASNETI_ARCH_BGP) || defined(__bgp__) + #define PLATFORM_OS_BGP 1 + #define PLATFORM_OS_FAMILYNAME BGP + +#elif defined(GASNETI_ARCH_BGQ) || defined(__bgq__) + #define PLATFORM_OS_BGQ 1 + #define PLATFORM_OS_FAMILYNAME BGQ + +#elif defined(GASNETI_ARCH_WSL) + #define PLATFORM_OS_WSL 1 + #define PLATFORM_OS_FAMILYNAME WSL + +#elif defined(__K42) + #define PLATFORM_OS_K42 1 + #define PLATFORM_OS_FAMILYNAME K42 + +#elif defined(__uClinux__) + #define PLATFORM_OS_UCLINUX 1 + #define PLATFORM_OS_FAMILYNAME UCLINUX + +#elif defined(__linux) || defined(__linux__) || defined(__gnu_linux__) + #define PLATFORM_OS_LINUX 1 + #define PLATFORM_OS_FAMILYNAME LINUX + +#elif defined(__blrts) || defined(__blrts__) || defined(__gnu_blrts__) + #define PLATFORM_OS_BLRTS 1 + #define PLATFORM_OS_FAMILYNAME BLRTS + +#elif defined(__CYGWIN__) + #define PLATFORM_OS_CYGWIN 1 + #define PLATFORM_OS_FAMILYNAME CYGWIN + +#elif defined(_WIN32) + #define PLATFORM_OS_MSWINDOWS 1 + #define PLATFORM_OS_FAMILYNAME MSWINDOWS + +#elif defined(_AIX) + #define PLATFORM_OS_AIX 1 + #define PLATFORM_OS_FAMILYNAME AIX + +#elif defined(__osf__) || defined(__digital__) + #define PLATFORM_OS_TRU64 1 + #define PLATFORM_OS_FAMILYNAME TRU64 + +#elif defined(__FreeBSD) || defined(__FreeBSD__) + #define PLATFORM_OS_FREEBSD 1 + #define PLATFORM_OS_FAMILYNAME FREEBSD + +#elif defined(__NetBSD) || defined(__NetBSD__) + #define PLATFORM_OS_NETBSD 1 + #define PLATFORM_OS_FAMILYNAME NETBSD + +#elif defined(__OpenBSD__) + #define PLATFORM_OS_OPENBSD 1 + #define PLATFORM_OS_FAMILYNAME OPENBSD + +#elif defined(__sun) || defined(__sun__) + #define PLATFORM_OS_SOLARIS 1 + #define PLATFORM_OS_FAMILYNAME SOLARIS + +#elif (defined(__APPLE__) && defined(__MACH__)) || \ + defined(__osx86__) /* PGI on OSX */ + #define PLATFORM_OS_DARWIN 1 + #define PLATFORM_OS_FAMILYNAME DARWIN + +#elif defined(__sgi) || defined(__sgi__) + #define PLATFORM_OS_IRIX 1 + #define PLATFORM_OS_FAMILYNAME IRIX + +#elif defined(__hpux) || defined(__hpux__) + #define PLATFORM_OS_HPUX 1 + #define PLATFORM_OS_FAMILYNAME HPUX + +#elif defined(_CRAY) || defined(_UNICOSMP) + #define PLATFORM_OS_UNICOS 1 + #define PLATFORM_OS_FAMILYNAME UNICOS + +#elif defined(__MTA__) + #define PLATFORM_OS_MTA 1 + #define PLATFORM_OS_FAMILYNAME MTA + +#elif defined(_SX) + #define PLATFORM_OS_SUPERUX 1 + #define PLATFORM_OS_FAMILYNAME SUPERUX + +#else + #define PLATFORM_OS_UNKNOWN 1 + #define PLATFORM_OS_FAMILYNAME UNKNOWN +#endif + +/* ------------------------------------------------------------------------------------ */ +/* Architecture detection */ +/* + PLATFORM_ARCH_: + defined to positive value if CPU belongs to a given family, undef otherwise + PLATFORM_ARCH_FAMILYNAME: + unquoted token which provides the CPU family name + + PLATFORM_ARCH_32 - 32-bit pointers + PLATFORM_ARCH_64 - 64-bit pointers + PLATFORM_ARCH_BIG_ENDIAN - big-endian word order + PLATFORM_ARCH_LITTLE_ENDIAN - little-endian word order + defined to positive value if CPU is known to have the indicated property, undef otherwise + */ + +#if defined(__ppc64) || defined(__ppc64__) || \ + defined(__PPC64) || defined(__PPC64__) || \ + defined(__powerpc64) || defined(__powerpc64__) + #define PLATFORM_ARCH_POWERPC 1 + #define PLATFORM_ARCH_FAMILYNAME POWERPC + #define _PLATFORM_ARCH_64 1 + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + +#elif defined(_POWER) || \ + defined(__PPC) || defined(__PPC__) || \ + defined(__powerpc) || defined(__powerpc__) || \ + defined(__ppc) || defined(__ppc__) || \ + defined(__POWERPC__) + #define PLATFORM_ARCH_POWERPC 1 + #define PLATFORM_ARCH_FAMILYNAME POWERPC + #define _PLATFORM_ARCH_32 1 + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + +#elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) + #define PLATFORM_ARCH_POWERPC 1 + #define PLATFORM_ARCH_FAMILYNAME POWERPC + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + +#elif defined(__KNC__) || defined(__MIC__) + #define PLATFORM_ARCH_MIC 1 + #define PLATFORM_ARCH_FAMILYNAME MIC + #define _PLATFORM_ARCH_64 1 + #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 + +#elif defined(__x86_64) || defined(__x86_64__) || \ + defined(__athlon) || defined(__athlon__) || \ + defined(__amd64) || defined(__amd64__) + #define PLATFORM_ARCH_X86_64 1 + #define PLATFORM_ARCH_FAMILYNAME X86_64 + #define _PLATFORM_ARCH_64 1 + #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 + +#elif defined(__ia64__) || defined(__ia64) + #define PLATFORM_ARCH_IA64 1 + #define PLATFORM_ARCH_FAMILYNAME IA64 + #define _PLATFORM_ARCH_64 1 + #if defined(PLATFORM_OS_LINUX) || defined(PLATFORM_OS_FREEBSD) + #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 + #elif defined(PLATFORM_OS_HPUX) + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + #else + /* Unknown. Hope one of the other mechanisms can sort it out. */ + #endif + +#elif defined(__i386__) || defined(__i386) || \ + defined(__i486__) || defined(__i486) || \ + defined(__i586__) || defined(__i586) || \ + defined(__i686__) || defined(__i686) || \ + defined(__pentiumpro) || defined(__pentiumpro__) || \ + defined(_M_IX86) + #define PLATFORM_ARCH_X86 1 + #define PLATFORM_ARCH_FAMILYNAME X86 + #define _PLATFORM_ARCH_32 1 + #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 + +#elif defined(__alpha) || defined(__alpha__) + #define PLATFORM_ARCH_ALPHA 1 + #define PLATFORM_ARCH_FAMILYNAME ALPHA + #define _PLATFORM_ARCH_64 1 + #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 + +#elif defined(_mips) || defined(__mips) || defined(__mips__) || \ + defined(__host_mips) || defined(__host_mips__) || \ + defined(_MIPS_ARCH) || defined(__R4000) + #define PLATFORM_ARCH_MIPS 1 + #define PLATFORM_ARCH_FAMILYNAME MIPS + #ifdef _MIPSEL /* MIPS cores support both little and big endian modes */ + /* SiCortex */ + #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 + #else + /* IRIX */ + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + #endif + #ifdef _MIPS_SZPTR + #if _MIPS_SZPTR == 32 + #define _PLATFORM_ARCH_32 1 + #elif _MIPS_SZPTR == 64 + #define _PLATFORM_ARCH_64 1 + #endif + #endif + +#elif defined(__sparc) || defined(__sparc__) || \ + defined(__sparclet__) || defined(__sparclite__) || \ + defined(__sparcv8) || defined(__sparcv9) + #define PLATFORM_ARCH_SPARC 1 + #define PLATFORM_ARCH_FAMILYNAME SPARC + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + +#elif defined(__hppa) || defined(__hppa__) || \ + defined(__parisc) || defined(__parisc__) || \ + defined(_PA_RISC1_1) || defined(_PA_RISC2_0) + #define PLATFORM_ARCH_PARISC 1 + #define PLATFORM_ARCH_FAMILYNAME PARISC + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + +#elif defined(__crayx1) + #define PLATFORM_ARCH_CRAYX1 1 + #define PLATFORM_ARCH_FAMILYNAME CRAYX1 + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + #define _PLATFORM_ARCH_64 1 + +#elif defined(_CRAYT3E) + #define PLATFORM_ARCH_CRAYT3E 1 + #define PLATFORM_ARCH_FAMILYNAME CRAYT3E + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + #define _PLATFORM_ARCH_64 1 + +#elif defined(__MTA__) + #define PLATFORM_ARCH_MTA 1 + #define PLATFORM_ARCH_FAMILYNAME MTA + +#elif defined(_SX) + #define PLATFORM_ARCH_NECSX 1 + #define PLATFORM_ARCH_FAMILYNAME NECSX + +#elif defined(__MICROBLAZE__) + #define PLATFORM_ARCH_MICROBLAZE 1 + #define PLATFORM_ARCH_FAMILYNAME MICROBLAZE + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + #define _PLATFORM_ARCH_32 1 + +#elif defined(__arm__) + #define PLATFORM_ARCH_ARM 1 + #define PLATFORM_ARCH_FAMILYNAME ARM + #define _PLATFORM_ARCH_32 1 + #if defined(__ARMEB__) + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + #elif defined(__ARMEL__) + #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 + #endif + +#elif defined(__aarch64__) + #define PLATFORM_ARCH_AARCH64 1 + #define PLATFORM_ARCH_FAMILYNAME AARCH64 + #if defined(__AARCH64EB__) + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + #elif defined(__AARCH64EL__) + #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 + #endif + +#elif defined(__tile__) + #define PLATFORM_ARCH_TILE 1 + #define PLATFORM_ARCH_FAMILYNAME TILE + #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 + #if defined(__tilegx__) + #define _PLATFORM_ARCH_64 1 + #else + #define _PLATFORM_ARCH_32 1 + #endif + +#elif defined(__s390__) + #define PLATFORM_ARCH_S390 1 + #define PLATFORM_ARCH_FAMILYNAME S390 + #define _PLATFORM_ARCH_BIG_ENDIAN 1 + #if defined(__s390x__) + #define _PLATFORM_ARCH_64 1 + #else + #define _PLATFORM_ARCH_32 1 + #endif + +#else /* unknown CPU */ + #define PLATFORM_ARCH_UNKNOWN 1 + #define PLATFORM_ARCH_FAMILYNAME UNKNOWN +#endif + +/* generic chip properties */ + +#if defined(PLATFORM_ARCH_BIG_ENDIAN) || defined(PLATFORM_ARCH_LITTLE_ENDIAN) + #error internal error in endianness configuration +#endif + +/* PLATFORM_ARCH_{BIG,LITTLE}_ENDIAN: + first detect common preprocessor defines + then default to any arch-specific value provided + */ + +#if defined(__BIG_ENDIAN__) || defined(WORDS_BIGENDIAN) || \ + ( __BYTE_ORDER__ > 0 && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ) + #define PLATFORM_ARCH_BIG_ENDIAN 1 +#elif defined(__LITTLE_ENDIAN__) || defined(WORDS_LITTLEENDIAN) || \ + ( __BYTE_ORDER__ > 0 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ) + #define PLATFORM_ARCH_LITTLE_ENDIAN 1 +#elif _PLATFORM_ARCH_BIG_ENDIAN + #define PLATFORM_ARCH_BIG_ENDIAN 1 +#elif _PLATFORM_ARCH_LITTLE_ENDIAN + #define PLATFORM_ARCH_LITTLE_ENDIAN 1 +#endif +#undef _PLATFORM_ARCH_BIG_ENDIAN +#undef _PLATFORM_ARCH_LITTLE_ENDIAN + +#if defined(PLATFORM_ARCH_BIG_ENDIAN) && defined(PLATFORM_ARCH_LITTLE_ENDIAN) + #error conflicting endianness information +#endif + +/* PLATFORM_ARCH_{32,64}: + first trust SIZEOF_VOID_P, which is most likely to be accurate + next, detect common 32/64 preprocessor defines + finally default to any arch-specific value provided + */ +#if defined(PLATFORM_ARCH_64) || defined(PLATFORM_ARCH_32) + #error internal error in bit width configuration +#endif + +#if SIZEOF_VOID_P == 8 + #define PLATFORM_ARCH_64 1 +#elif SIZEOF_VOID_P == 4 + #define PLATFORM_ARCH_32 1 +#elif defined(_LP64) || defined(__LP64__) || \ + defined(__arch64__) || defined(__64BIT__) || \ + __INTPTR_MAX__ > 2147483647 + #define PLATFORM_ARCH_64 1 +#elif defined(_ILP32) || defined(__ILP32__) || \ + defined(__arch32__) || defined(__32BIT__) || \ + __INTPTR_MAX__ == 2147483647 + #define PLATFORM_ARCH_32 1 +#elif _PLATFORM_ARCH_64 + #define PLATFORM_ARCH_64 1 +#elif _PLATFORM_ARCH_32 + #define PLATFORM_ARCH_32 1 +#endif +#undef _PLATFORM_ARCH_64 +#undef _PLATFORM_ARCH_32 + +#if defined(PLATFORM_ARCH_64) && defined(PLATFORM_ARCH_32) + #error conflicting bit width information +#elif !defined(PLATFORM_ARCH_64) && !defined(PLATFORM_ARCH_32) + #error missing bit width information +#endif + +/* ------------------------------------------------------------------------------------ */ +/* handy test code that can be parsed after preprocess or executed to show platform results */ +#ifdef PLATFORM_SHOW +#include +#include +const char * +COMPILER_FAMILYNAME = PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYNAME) +, * +COMPILER_FAMILYID = PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYID) +, * +COMPILER_VERSION_STR = PLATFORM_COMPILER_VERSION_STR +, * +COMPILER_IDSTR = PLATFORM_COMPILER_IDSTR +, * +OS_FAMILYNAME = PLATFORM_STRINGIFY(PLATFORM_OS_FAMILYNAME) +, * +ARCH_FAMILYNAME = PLATFORM_STRINGIFY(PLATFORM_ARCH_FAMILYNAME) +; +int main(void) { + #define PLATFORM_DISP(x) printf("PLATFORM_"#x"=%s\n",x) + #define PLATFORM_DISPI(x) printf("PLATFORM_"#x"=%li\n",(long int)PLATFORM_##x) + #define PLATFORM_DISPX(x) printf("PLATFORM_"#x"=0x%lx\n",(long int)PLATFORM_##x) + PLATFORM_DISP(COMPILER_FAMILYNAME); + PLATFORM_DISP(COMPILER_FAMILYID); + PLATFORM_DISPI(COMPILER_ID); + PLATFORM_DISPX(COMPILER_VERSION); + PLATFORM_DISP(COMPILER_VERSION_STR); + PLATFORM_DISP(COMPILER_IDSTR); + #if PLATFORM_COMPILER_C_LANGLVL + PLATFORM_DISPI(COMPILER_C_LANGLVL); + #elif PLATFORM_COMPILER_CXX_LANGLVL + PLATFORM_DISPI(COMPILER_CXX_LANGLVL); + #else + printf("WARNING: Missing PLATFORM_COMPILER_C(XX)_LANGLVL!"); + #endif + PLATFORM_DISP(OS_FAMILYNAME); + PLATFORM_DISP(ARCH_FAMILYNAME); + #if PLATFORM_ARCH_32 + PLATFORM_DISPI(ARCH_32); + assert(sizeof(void *) == 4); + #else + PLATFORM_DISPI(ARCH_64); + assert(sizeof(void *) == 8); + #endif + { int x = 0x00FF; + unsigned char *p = (unsigned char *)&x; + #if PLATFORM_ARCH_BIG_ENDIAN + PLATFORM_DISPI(ARCH_BIG_ENDIAN); + assert(*p == 0); + #else + PLATFORM_DISPI(ARCH_LITTLE_ENDIAN); + assert(*p == 0xFF); + #endif + } + return 0; +} +#endif +/* ------------------------------------------------------------------------------------ */ +#endif diff --git a/opal/include/opal/sys/Makefile.am b/opal/include/opal/sys/Makefile.am index cdceeb21fc0..b56e909b376 100644 --- a/opal/include/opal/sys/Makefile.am +++ b/opal/include/opal/sys/Makefile.am @@ -16,6 +16,8 @@ # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2020-2021 Google, LLC. All rights reserved. +# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. +# All Rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,10 +28,14 @@ # This makefile.am does not stand on its own - it is included from opal/Makefile.am headers += \ - opal/sys/architecture.h \ opal/sys/atomic.h \ opal/sys/atomic_stdc.h \ - opal/sys/atomic_impl.h \ + opal/sys/atomic_impl_minmax_math.h \ + opal/sys/atomic_impl_ptr_cswap.h \ + opal/sys/atomic_impl_ptr_llsc.h \ + opal/sys/atomic_impl_ptr_swap.h \ + opal/sys/atomic_impl_size_t_math.h \ + opal/sys/atomic_impl_spinlock.h \ opal/sys/timer.h \ opal/sys/cma.h diff --git a/opal/include/opal/sys/architecture.h b/opal/include/opal/sys/architecture.h deleted file mode 100644 index 35e7cad7886..00000000000 --- a/opal/include/opal/sys/architecture.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2020 Google, LLC. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * List of supported architectures - */ - -#ifndef OPAL_SYS_ARCHITECTURE_H -#define OPAL_SYS_ARCHITECTURE_H - -/* Architectures */ -#define OPAL_UNSUPPORTED 0000 -#define OPAL_IA32 0010 -#define OPAL_X86_64 0030 -#define OPAL_POWERPC32 0050 -#define OPAL_POWERPC64 0051 -#define OPAL_ARM 0100 -#define OPAL_ARM64 0101 -#define OPAL_BUILTIN_GCC 0202 -#define OPAL_BUILTIN_NO 0203 -#define OPAL_BUILTIN_C11 0204 - -/* Formats */ -#define OPAL_DEFAULT 1000 /* standard for given architecture */ - -#endif /* #ifndef OPAL_SYS_ARCHITECTURE_H */ diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index 944b7d2577e..6fb7b6db268 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -16,6 +16,8 @@ * reserved. * Copyright (c) 2021 Triad National Security, LLC. All rights reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,31 +27,9 @@ #include "atomic_llsc.h" -#if !defined(OPAL_SYS_ARCH_ATOMIC_H) +#ifndef OPAL_SYS_ARCH_ATOMIC_H +#define OPAL_SYS_ARCH_ATOMIC_H 1 -# define OPAL_SYS_ARCH_ATOMIC_H 1 - -# if OPAL_GCC_INLINE_ASSEMBLY - -# define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -# define OPAL_HAVE_ATOMIC_SWAP_32 1 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -# define OPAL_HAVE_ATOMIC_SWAP_64 1 -# define OPAL_HAVE_ATOMIC_ADD_32 1 -# define OPAL_HAVE_ATOMIC_AND_32 1 -# define OPAL_HAVE_ATOMIC_OR_32 1 -# define OPAL_HAVE_ATOMIC_XOR_32 1 -# define OPAL_HAVE_ATOMIC_SUB_32 1 -# define OPAL_HAVE_ATOMIC_ADD_64 1 -# define OPAL_HAVE_ATOMIC_AND_64 1 -# define OPAL_HAVE_ATOMIC_OR_64 1 -# define OPAL_HAVE_ATOMIC_XOR_64 1 -# define OPAL_HAVE_ATOMIC_SUB_64 1 - -# define MB() __asm__ __volatile__("dmb sy" : : : "memory") -# define RMB() __asm__ __volatile__("dmb ld" : : : "memory") -# define WMB() __asm__ __volatile__("dmb st" : : : "memory") /********************************************************************** * @@ -59,17 +39,17 @@ static inline void opal_atomic_mb(void) { - MB(); + __asm__ __volatile__("dmb sy" : : : "memory"); } static inline void opal_atomic_rmb(void) { - RMB(); + __asm__ __volatile__("dmb ld" : : : "memory"); } static inline void opal_atomic_wmb(void) { - WMB(); + __asm__ __volatile__("dmb st" : : : "memory"); } static inline void opal_atomic_isync(void) @@ -77,9 +57,10 @@ static inline void opal_atomic_isync(void) __asm__ __volatile__("isb"); } + /********************************************************************** * - * Atomic math operations + * Compare and Swap * *********************************************************************/ @@ -104,20 +85,6 @@ static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *a return ret; } -static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) -{ - int32_t ret, tmp; - - __asm__ __volatile__("1: ldaxr %w0, [%2] \n" - " stlxr %w1, %w3, [%2] \n" - " cbnz %w1, 1b \n" - : "=&r"(ret), "=&r"(tmp) - : "r"(addr), "r"(newval) - : "cc", "memory"); - - return ret; -} - /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_32 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in @@ -187,21 +154,6 @@ static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *a return ret; } -static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) -{ - int64_t ret; - int tmp; - - __asm__ __volatile__("1: ldaxr %0, [%2] \n" - " stlxr %w1, %3, [%2] \n" - " cbnz %w1, 1b \n" - : "=&r"(ret), "=&r"(tmp) - : "r"(addr), "r"(newval) - : "cc", "memory"); - - return ret; -} - /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_64 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in @@ -251,35 +203,110 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_ return ret; } -# define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ - static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, \ - type value) \ - { \ - type newval, old; \ - int32_t tmp; \ - \ - __asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \ - " " inst " %" reg "0, %" reg "1, %" reg "4 \n" \ - " stxr %w2, %" reg "0, [%3] \n" \ - " cbnz %w2, 1b \n" \ - : "=&r"(newval), "=&r"(old), "=&r"(tmp) \ - : "r"(addr), "r"(value) \ - : "cc", "memory"); \ - \ - return old; \ - } +#include "opal/sys/atomic_impl_ptr_cswap.h" + + +/********************************************************************** + * + * Swap + * + *********************************************************************/ + +static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__("1: ldaxr %w0, [%2] \n" + " stlxr %w1, %w3, [%2] \n" + " cbnz %w1, 1b \n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(newval) + : "cc", "memory"); + + return ret; +} + +static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__("1: ldaxr %0, [%2] \n" + " stlxr %w1, %3, [%2] \n" + " cbnz %w1, 1b \n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(newval) + : "cc", "memory"); + + return ret; +} + +#include "opal/sys/atomic_impl_ptr_swap.h" + + +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ + +#include "opal/sys/atomic_impl_spinlock.h" + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ + static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, \ + type value) \ + { \ + type newval, old; \ + int32_t tmp; \ + \ + __asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \ + " " inst " %" reg "0, %" reg "1, %" reg "4 \n" \ + " stxr %w2, %" reg "0, [%3] \n" \ + " cbnz %w2, 1b \n" \ + : "=&r"(newval), "=&r"(old), "=&r"(tmp) \ + : "r"(addr), "r"(value) \ + : "cc", "memory"); \ + \ + return old; \ + } \ + static inline type opal_atomic_##name##_fetch_##bits(opal_atomic_##type *addr, \ + type value) \ + { \ + type newval, old; \ + int32_t tmp; \ + \ + __asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \ + " " inst " %" reg "0, %" reg "1, %" reg "4 \n" \ + " stxr %w2, %" reg "0, [%3] \n" \ + " cbnz %w2, 1b \n" \ + : "=&r"(newval), "=&r"(old), "=&r"(tmp) \ + : "r"(addr), "r"(value) \ + : "cc", "memory"); \ + \ + return newval; \ + } OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w") OPAL_ASM_MAKE_ATOMIC(int32_t, 32, and, "and", "w") OPAL_ASM_MAKE_ATOMIC(int32_t, 32, or, "orr", "w") OPAL_ASM_MAKE_ATOMIC(int32_t, 32, xor, "eor", "w") OPAL_ASM_MAKE_ATOMIC(int32_t, 32, sub, "sub", "w") + OPAL_ASM_MAKE_ATOMIC(int64_t, 64, add, "add", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, and, "and", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, or, "orr", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, xor, "eor", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, sub, "sub", "") -# endif /* OPAL_GCC_INLINE_ASSEMBLY */ +#include "opal/sys/atomic_impl_minmax_math.h" +#include "opal/sys/atomic_impl_size_t_math.h" + #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/arm64/atomic_llsc.h b/opal/include/opal/sys/arm64/atomic_llsc.h index 807ff526a2c..f51ab4a3481 100644 --- a/opal/include/opal/sys/arm64/atomic_llsc.h +++ b/opal/include/opal/sys/arm64/atomic_llsc.h @@ -16,6 +16,8 @@ * reserved. * Copyright (c) 2021 Triad National Security, LLC. All rights reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,26 +25,24 @@ * $HEADER$ */ -#if !defined(OPAL_SYS_ARCH_ATOMIC_LLSC_H) +#ifndef OPAL_SYS_ARCH_ATOMIC_LLSC_H +#define OPAL_SYS_ARCH_ATOMIC_LLSC_H 1 -# define OPAL_SYS_ARCH_ATOMIC_LLSC_H +/* + * this file is included even when C11 or GCC built-in atomics are + * used, which is why we must check for gcc inline assembly support. + */ # if OPAL_C_GCC_INLINE_ASSEMBLY -# undef OPAL_HAVE_ATOMIC_LLSC_32 -# undef OPAL_HAVE_ATOMIC_LLSC_64 - # define OPAL_HAVE_ATOMIC_LLSC_32 1 # define OPAL_HAVE_ATOMIC_LLSC_64 1 # define opal_atomic_ll_32(addr, ret) \ do { \ opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret; \ \ - __asm__ __volatile__("ldaxr %w0, [%1] \n" : "=&r"(_ret) : "r"(_addr)); \ - \ - ret = (typeof(ret)) _ret; \ + __asm__ __volatile__("ldaxr %w0, [%1] \n" : "=&r"(ret) : "r"(_addr)); \ } while (0) # define opal_atomic_sc_32(addr, newval, ret) \ @@ -62,11 +62,8 @@ # define opal_atomic_ll_64(addr, ret) \ do { \ opal_atomic_int64_t *_addr = (addr); \ - int64_t _ret; \ - \ - __asm__ __volatile__("ldaxr %0, [%1] \n" : "=&r"(_ret) : "r"(_addr)); \ \ - ret = (typeof(ret)) _ret; \ + __asm__ __volatile__("ldaxr %0, [%1] \n" : "=&r"(ret) : "r"(_addr)); \ } while (0) # define opal_atomic_sc_64(addr, newval, ret) \ @@ -83,6 +80,8 @@ ret = (_ret == 0); \ } while (0) -# endif /* OPAL_GCC_INLINE_ASSEMBLY */ +#include "opal/sys/atomic_impl_ptr_llsc.h" + +# endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ #endif /* ! OPAL_SYS_ARCH_ATOMIC_LLSC_H */ diff --git a/opal/include/opal/sys/arm64/timer.h b/opal/include/opal/sys/arm64/timer.h index 257f3782cb1..0f237e81506 100644 --- a/opal/include/opal/sys/arm64/timer.h +++ b/opal/include/opal/sys/arm64/timer.h @@ -7,6 +7,8 @@ * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,18 +19,20 @@ #ifndef OPAL_SYS_ARCH_TIMER_H #define OPAL_SYS_ARCH_TIMER_H 1 -#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#if defined(PLATFORM_ARCH_AARCH64) typedef uint64_t opal_timer_t; #else typedef uint32_t opal_timer_t; #endif +#if OPAL_C_GCC_INLINE_ASSEMBLY + static inline opal_timer_t opal_sys_timer_get_cycles(void) { opal_timer_t ret; __asm__ __volatile__("isb" ::: "memory"); -#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#if defined(PLATFORM_ARCH_AARCH64) __asm__ __volatile__("mrs %0, CNTVCT_EL0" : "=r"(ret)); #else __asm__ __volatile__("mrs %0, CNTVCT" : "=r"(ret)); @@ -40,7 +44,7 @@ static inline opal_timer_t opal_sys_timer_get_cycles(void) static inline opal_timer_t opal_sys_timer_get_freq(void) { opal_timer_t freq; -#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#if defined(PLATFORM_ARCH_AARCH64) __asm__ __volatile__("mrs %0, CNTFRQ_EL0" : "=r"(freq)); #else __asm__ __volatile__("mrs %0, CNTFRQ" : "=r"(freq)); @@ -51,4 +55,6 @@ static inline opal_timer_t opal_sys_timer_get_freq(void) #define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 #define OPAL_HAVE_SYS_TIMER_GET_FREQ 1 +#endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ + #endif /* ! OPAL_SYS_ARCH_TIMER_H */ diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 01c4ba514b7..be647260b73 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -17,6 +17,8 @@ * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020-2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,9 +41,6 @@ * The following #defines will be true / false based on * assembly support: * - * - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers - * - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks - * * Note that for the Atomic math, atomic add/sub may be implemented as * C code using opal_atomic_compare_exchange. The appearance of atomic * operation will be upheld in these cases. @@ -50,166 +49,18 @@ #ifndef OPAL_SYS_ATOMIC_H #define OPAL_SYS_ATOMIC_H 1 -#include "opal_config.h" - #include -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #include "opal_stdatomic.h" -/* do some quick #define cleanup in cases where we are doing - testing... */ -#ifdef OPAL_DISABLE_INLINE_ASM -# undef OPAL_C_GCC_INLINE_ASSEMBLY -# define OPAL_C_GCC_INLINE_ASSEMBLY 0 -#endif - -#if OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER) - -# include "atomic_stdc.h" - -#else /* !OPAL_C_HAVE__ATOMIC */ - -/* define OPAL_{GCC,DEC,XLC}_INLINE_ASSEMBLY based on the - OPAL_C_{GCC,DEC,XLC}_INLINE_ASSEMBLY defines and whether we - are in C or C++ */ -# if defined(c_plusplus) || defined(__cplusplus) -/* We no longer support inline assembly for C++ as OPAL is a C-only interface */ -# define OPAL_GCC_INLINE_ASSEMBLY 0 -# else -# define OPAL_GCC_INLINE_ASSEMBLY OPAL_C_GCC_INLINE_ASSEMBLY -# endif - BEGIN_C_DECLS -/********************************************************************** - * - * Data structures for atomic ops - * - *********************************************************************/ -/** - * Volatile lock object (with optional padding). - * - * \note The internals of the lock are included here, but should be - * considered private. The implementation currently in use may choose - * to use an int or unsigned char as the lock value - the user is not - * informed either way. - */ -struct opal_atomic_lock_t { - union { - opal_atomic_int32_t lock; /**< The lock address (an integer) */ - volatile unsigned char sparc_lock; /**< The lock address on sparc */ - char padding[sizeof(int)]; /**< Array for optional padding */ - } u; -}; -typedef struct opal_atomic_lock_t opal_atomic_lock_t; /********************************************************************** * - * Set or unset these macros in the architecture-specific atomic.h - * files if we need to specify them as inline or non-inline + * Memory Barriers * *********************************************************************/ -# if !OPAL_GCC_INLINE_ASSEMBLY -# define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 -# define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_ADD_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_AND_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_OR_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_XOR_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_SUB_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_ADD_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_AND_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_OR_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_XOR_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_SUB_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_SWAP_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 0 -# else -# define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1 -# define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_AND_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_OR_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_XOR_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_SUB_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_ADD_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_AND_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_OR_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_XOR_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_SUB_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_SWAP_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 1 -# endif - -/** - * Enumeration of lock states - */ -enum { OPAL_ATOMIC_LOCK_UNLOCKED = 0, OPAL_ATOMIC_LOCK_LOCKED = 1 }; - -# define OPAL_ATOMIC_LOCK_INIT \ - { \ - .u = {.lock = OPAL_ATOMIC_LOCK_UNLOCKED } \ - } - -/********************************************************************** - * - * Load the appropriate architecture files and set some reasonable - * default values for our support - * - *********************************************************************/ -# if defined(DOXYGEN) -/* don't include system-level gorp when generating doxygen files */ -# elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC -# include "opal/sys/gcc_builtin/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 -# include "opal/sys/x86_64/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM -# include "opal/sys/arm/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 -# include "opal/sys/arm64/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 -# include "opal/sys/ia32/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 -# include "opal/sys/powerpc/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 -# include "opal/sys/powerpc/atomic.h" -# endif - -# ifndef DOXYGEN -/* compare and set operations can't really be emulated from software, - so if these defines aren't already set, they should be set to 0 - now */ -# ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 0 -# endif -# ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 -# endif -# ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 -# endif -# ifndef OPAL_HAVE_ATOMIC_LLSC_32 -# define OPAL_HAVE_ATOMIC_LLSC_32 0 -# endif -# ifndef OPAL_HAVE_ATOMIC_LLSC_64 -# define OPAL_HAVE_ATOMIC_LLSC_64 0 -# endif -# endif /* DOXYGEN */ - -/********************************************************************** - * - * Memory Barriers - defined here if running doxygen or have barriers - * but can't inline - * - *********************************************************************/ -# if !defined(OPAL_HAVE_ATOMIC_MEM_BARRIER) && !defined(DOXYGEN) -/* no way to emulate in C code */ -# define OPAL_HAVE_ATOMIC_MEM_BARRIER 0 -# endif - -# if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MEM_BARRIER /** * Memory barrier * @@ -223,12 +74,7 @@ enum { OPAL_ATOMIC_LOCK_UNLOCKED = 0, OPAL_ATOMIC_LOCK_LOCKED = 1 }; * generally grinding the memory controller's performance. Use only * if you need *both* read and write barriers. */ - -# if OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER -static inline -# endif - void - opal_atomic_mb(void); +static inline void opal_atomic_mb(void); /** * Read memory barrier @@ -239,12 +85,7 @@ static inline * next read. Nothing is said about the ordering of writes when using * \c opal_atomic_rmb(). */ - -# if OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER -static inline -# endif - void - opal_atomic_rmb(void); +static inline void opal_atomic_rmb(void); /** * Write memory barrier. @@ -255,41 +96,208 @@ static inline * next write. Nothing is said about the ordering of reads when using * \c opal_atomic_wmb(). */ +static inline void opal_atomic_wmb(void); -# if OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER -static inline -# endif - void - opal_atomic_wmb(void); -# endif /* defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MEM_BARRIER */ +/********************************************************************** + * + * Compare and Swap + * + * Implementations must provide 32 and 64 bit compare-and-swap + * operations, but may provide the ptr implementation by including + * atomic_cmpx_ptr_impl.h (which implements the ptr implementation + * over the 32 and 64 bit implementations). + * + *********************************************************************/ + +/* + * The stdc implementation is implemetned as macros around the C11 + * atomic interface (which is a type-independent interface). While it + * would be better to have type checking so developers using the C11 + * interface didn't accidently munge something that broke on other + * implementations, there are a ton of warnings due to volatile casing + * in the opal_lifo code. Don't enforce the types of the function + * calls on C11 until we can sort that out. + */ +#if OPAL_USE_C11_ATOMICS == 0 + +/** + * Atomic compare and set of 32 bit intergers with acquire and release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, + int32_t newval); + +/** + * Atomic compare and set of 32 bit intergers with acquire semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_acq_32(opal_atomic_int32_t *addr, int32_t *oldval, + int32_t newval); + +/** + * Atomic compare and set of 32 bit intergers with release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_rel_32(opal_atomic_int32_t *addr, int32_t *oldval, + int32_t newval); + +/** + * Atomic compare and set of 64 bit intergers with acquire and release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, int64_t *oldval, + int64_t newval); + +/** + * Atomic compare and set of 64 bit intergers with acquire semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, int64_t *oldval, + int64_t newval); + +/** + * Atomic compare and set of 64 bit intergers with release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_t *addr, int64_t *oldval, + int64_t newval); + +/** + * Atomic compare and set of pointer-sized intergers with acquire and release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval); + +/** + * Atomic compare and set of pointer-sized intergers with acquire semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_acq_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval); + +/** + * Atomic compare and set of pointer-sized intergers with release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_rel_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval); /********************************************************************** * - * Atomic spinlocks - always inlined, if have atomic compare-and-swap + * Swap + * + * Implementations may provide a native implementation of these + * operations or include atomic_swap_impl.h, which provides + * implementations over compare-and-swap. * *********************************************************************/ +/** + * Atomic swap of 32 bit value + * @param addr Address of value to be swapped + * @param newval New value to set in addr + * + * @returns Value in addr before swap + */ +static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval); -# if !defined(OPAL_HAVE_ATOMIC_SPINLOCKS) && !defined(DOXYGEN) -/* 0 is more like "pending" - we'll fix up at the end after all - the static inline functions are declared */ -# define OPAL_HAVE_ATOMIC_SPINLOCKS 0 -# endif +/** + * Atomic swap of 32 bit value + * @param addr Address of value to be swapped + * @param newval New value to set in addr + * + * @returns Value in addr before swap + */ +static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval); -# if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS \ - || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) +/** + * Atomic swap of 32 bit value + * @param addr Address of value to be swapped + * @param newval New value to set in addr + * + * @returns Value in addr before swap + */ +static inline intptr_t opal_atomic_swap_ptr(opal_atomic_intptr_t *addr, intptr_t newval); + +#endif /* #if !(OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER)) */ + +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ /** * Initialize a lock to value * * @param lock Address of the lock * @param value Initial value to set lock to */ -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -static inline -# endif - void - opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value); +static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value); /** * Try to acquire a lock. @@ -297,102 +305,28 @@ static inline * @param lock Address of the lock. * @return 0 if the lock was acquired, 1 otherwise. */ -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -static inline -# endif - int - opal_atomic_trylock(opal_atomic_lock_t *lock); +static inline int opal_atomic_trylock(opal_atomic_lock_t *lock); /** * Acquire a lock by spinning. * * @param lock Address of the lock. */ -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -static inline -# endif - void - opal_atomic_lock(opal_atomic_lock_t *lock); +static inline void opal_atomic_lock(opal_atomic_lock_t *lock); /** * Release a lock. * * @param lock Address of the lock. */ -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -static inline -# endif - void - opal_atomic_unlock(opal_atomic_lock_t *lock); - -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -# undef OPAL_HAVE_ATOMIC_SPINLOCKS -# define OPAL_HAVE_ATOMIC_SPINLOCKS \ - (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -# define OPAL_NEED_INLINE_ATOMIC_SPINLOCKS 1 -# endif +static inline void opal_atomic_unlock(opal_atomic_lock_t *lock); -# endif /* OPAL_HAVE_ATOMIC_SPINLOCKS */ /********************************************************************** * * Atomic math operations * *********************************************************************/ -# if !defined(OPAL_HAVE_ATOMIC_CMPSET_32) && !defined(DOXYGEN) -# define OPAL_HAVE_ATOMIC_CMPSET_32 0 -# endif -# if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32 - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, - int32_t newval); - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_acq_32(opal_atomic_int32_t *addr, int32_t *oldval, - int32_t newval); - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_rel_32(opal_atomic_int32_t *addr, int32_t *oldval, - int32_t newval); -# endif - -# if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) && !defined(DOXYGEN) -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 -# endif -# if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, int64_t *oldval, - int64_t newval); - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, int64_t *oldval, - int64_t newval); - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_t *addr, int64_t *oldval, - int64_t newval); - -# endif static inline int32_t opal_atomic_add_fetch_32(opal_atomic_int32_t *addr, int delta); static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *addr, int delta); @@ -409,7 +343,6 @@ static inline int32_t opal_atomic_fetch_min_32(opal_atomic_int32_t *addr, int32_ static inline int32_t opal_atomic_max_fetch_32(opal_atomic_int32_t *addr, int32_t value); static inline int32_t opal_atomic_fetch_max_32(opal_atomic_int32_t *addr, int32_t value); - static inline int64_t opal_atomic_add_fetch_64(opal_atomic_int64_t *addr, int64_t delta); static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *addr, int64_t delta); static inline int64_t opal_atomic_and_fetch_64(opal_atomic_int64_t *addr, int64_t value); @@ -424,245 +357,111 @@ static inline int64_t opal_atomic_fetch_min_64(opal_atomic_int64_t *addr, int64_ static inline int64_t opal_atomic_max_fetch_64(opal_atomic_int64_t *addr, int64_t value); static inline int64_t opal_atomic_fetch_max_64(opal_atomic_int64_t *addr, int64_t value); +static inline size_t opal_atomic_add_fetch_size_t(opal_atomic_size_t *addr, size_t delta); +static inline size_t opal_atomic_fetch_add_size_t(opal_atomic_size_t *addr, size_t delta); -/* provide a size_t add/subtract. When in debug mode, make it an - * inline function so that we don't have any casts in the - * interface and can catch type errors. When not in debug mode, - * just make it a macro, so that there's no performance penalty - */ -# if defined(DOXYGEN) || OPAL_ENABLE_DEBUG -static inline size_t opal_atomic_add_fetch_size_t(opal_atomic_size_t *addr, size_t delta) -{ -# if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_add_fetch_32((int32_t *) addr, delta); -# elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_add_fetch_64((int64_t *) addr, delta); -# else -# error "Unknown size_t size" -# endif -} - -static inline size_t opal_atomic_fetch_add_size_t(opal_atomic_size_t *addr, size_t delta) -{ -# if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_fetch_add_32((int32_t *) addr, delta); -# elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_fetch_add_64((int64_t *) addr, delta); -# else -# error "Unknown size_t size" -# endif -} - -static inline size_t opal_atomic_sub_fetch_size_t(opal_atomic_size_t *addr, size_t delta) -{ -# if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_sub_fetch_32((int32_t *) addr, delta); -# elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_sub_fetch_64((int64_t *) addr, delta); -# else -# error "Unknown size_t size" -# endif -} - -static inline size_t opal_atomic_fetch_sub_size_t(opal_atomic_size_t *addr, size_t delta) -{ -# if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_fetch_sub_32((int32_t *) addr, delta); -# elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_fetch_sub_64((int64_t *) addr, delta); -# else -# error "Unknown size_t size" -# endif -} - -# else -# if SIZEOF_SIZE_T == 4 -# define opal_atomic_add_fetch_size_t(addr, delta) \ - ((size_t) opal_atomic_add_fetch_32((opal_atomic_int32_t *) addr, delta)) -# define opal_atomic_fetch_add_size_t(addr, delta) \ - ((size_t) opal_atomic_fetch_add_32((opal_atomic_int32_t *) addr, delta)) -# define opal_atomic_sub_fetch_size_t(addr, delta) \ - ((size_t) opal_atomic_sub_fetch_32((opal_atomic_int32_t *) addr, delta)) -# define opal_atomic_fetch_sub_size_t(addr, delta) \ - ((size_t) opal_atomic_fetch_sub_32((opal_atomic_int32_t *) addr, delta)) -# elif SIZEOF_SIZE_T == 8 -# define opal_atomic_add_fetch_size_t(addr, delta) \ - ((size_t) opal_atomic_add_fetch_64((opal_atomic_int64_t *) addr, delta)) -# define opal_atomic_fetch_add_size_t(addr, delta) \ - ((size_t) opal_atomic_fetch_add_64((opal_atomic_int64_t *) addr, delta)) -# define opal_atomic_sub_fetch_size_t(addr, delta) \ - ((size_t) opal_atomic_sub_fetch_64((opal_atomic_int64_t *) addr, delta)) -# define opal_atomic_fetch_sub_size_t(addr, delta) \ - ((size_t) opal_atomic_fetch_sub_64((opal_atomic_int64_t *) addr, delta)) -# else -# error "Unknown size_t size" -# endif -# endif - -# if defined(DOXYGEN) \ - || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -/* these are always done with inline functions, so always mark as - static inline */ - -static inline bool opal_atomic_compare_exchange_strong_xx(opal_atomic_intptr_t *addr, - intptr_t *oldval, int64_t newval, - size_t length); -static inline bool opal_atomic_compare_exchange_strong_acq_xx(opal_atomic_intptr_t *addr, - intptr_t *oldval, int64_t newval, - size_t length); -static inline bool opal_atomic_compare_exchange_strong_rel_xx(opal_atomic_intptr_t *addr, - intptr_t *oldval, int64_t newval, - size_t length); - -static inline bool opal_atomic_compare_exchange_strong_ptr(opal_atomic_intptr_t *addr, - intptr_t *oldval, intptr_t newval); -static inline bool opal_atomic_compare_exchange_strong_acq_ptr(opal_atomic_intptr_t *addr, - intptr_t *oldval, intptr_t newval); -static inline bool opal_atomic_compare_exchange_strong_rel_ptr(opal_atomic_intptr_t *addr, - intptr_t *oldval, intptr_t newval); - +#ifdef DOXYGEN /* because this isn't a proper C prototype */ /** - * Atomic compare and set of generic type with relaxed semantics. This - * macro detect at compile time the type of the first argument and - * choose the correct function to be called. - * - * \note This macro should only be used for integer types. + * Atomically add delta to addr, type independent * - * @param addr Address of . - * @param oldval Comparison value address of . - * @param newval New value to set if comparision is true . + * @param addr Address of value to update + * @param delta Value by which to change the value in addr * - * See opal_atomic_compare_exchange_* for pseudo-code. + * Generally implemented as a macro (except for when implemented as a + * compiler built-in), this function provides a type-independent math + * operator. */ -# define opal_atomic_compare_exchange_strong(ADDR, OLDVAL, NEWVAL) \ - opal_atomic_compare_exchange_strong_xx((opal_atomic_intptr_t *) (ADDR), \ - (intptr_t *) (OLDVAL), (intptr_t)(NEWVAL), \ - sizeof(*(ADDR))) +static inline void opal_atomic_add(type *addr, type delta); +#endif -/** - * Atomic compare and set of generic type with acquire semantics. This - * macro detect at compile time the type of the first argument and - * choose the correct function to be called. - * - * \note This macro should only be used for integer types. - * - * @param addr Address of . - * @param oldval Comparison value address of . - * @param newval New value to set if comparision is true . - * - * See opal_atomic_compare_exchange_acq_* for pseudo-code. - */ -# define opal_atomic_compare_exchange_strong_acq(ADDR, OLDVAL, NEWVAL) \ - opal_atomic_compare_exchange_strong_acq_xx((opal_atomic_intptr_t *) (ADDR), \ - (intptr_t *) (OLDVAL), (intptr_t)(NEWVAL), \ - sizeof(*(ADDR))) -/** - * Atomic compare and set of generic type with release semantics. This - * macro detect at compile time the type of the first argument and - * choose the correct function to be called. +/********************************************************************** * - * \note This macro should only be used for integer types. + * Load-linked, Store Conditional * - * @param addr Address of . - * @param oldval Comparison value address of . - * @param newval New value to set if comparision is true . + * Optional. Check OPAL_HAVE_ATOMIC_LLSC_32, + * OPAL_HAVE_ATOMIC_LLSC_64, or OPAL_HAVE_ATOMIC_LLSC_PTR before + * using. Implemented as macros due to function call behaviors; + * prototyped here as C++-style fuctions for readability. * - * See opal_atomic_compare_exchange_rel_* for pseudo-code. - */ -# define opal_atomic_compare_exchange_strong_rel(ADDR, OLDVAL, NEWVAL) \ - opal_atomic_compare_exchange_strong_rel_xx((opal_atomic_intptr_t *) (ADDR), \ - (intptr_t *) (OLDVAL), (intptr_t)(NEWVAL), \ - sizeof(*(ADDR))) - -# endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ - -static inline void opal_atomic_add_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length); -static inline void opal_atomic_sub_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length); - -static inline intptr_t opal_atomic_add_fetch_ptr(opal_atomic_intptr_t *addr, void *delta); -static inline intptr_t opal_atomic_fetch_add_ptr(opal_atomic_intptr_t *addr, void *delta); -static inline intptr_t opal_atomic_sub_fetch_ptr(opal_atomic_intptr_t *addr, void *delta); -static inline intptr_t opal_atomic_fetch_sub_ptr(opal_atomic_intptr_t *addr, void *delta); - -/** - * Atomically increment the content depending on the type. This - * macro detect at compile time the type of the first argument - * and choose the correct function to be called. + * C11 and GCC built-in atomics don't provide native LL/SC support, so + * if there is an architectual implementation, we use it even if + * we are using the C11 or GCC built-in atomics. * - * \note This macro should only be used for integer types. - * - * @param addr Address of - * @param delta Value to add (converted to ). - */ -# define opal_atomic_add(ADDR, VALUE) \ - opal_atomic_add_xx((opal_atomic_intptr_t *) (ADDR), (int32_t)(VALUE), sizeof(*(ADDR))) + *********************************************************************/ -/** - * Atomically decrement the content depending on the type. This - * macro detect at compile time the type of the first argument - * and choose the correct function to be called. - * - * \note This macro should only be used for integer types. - * - * @param addr Address of - * @param delta Value to substract (converted to ). - */ -# define opal_atomic_sub(ADDR, VALUE) \ - opal_atomic_sub_xx((opal_atomic_intptr_t *) (ADDR), (int32_t)(VALUE), sizeof(*(ADDR))) +#ifdef DOXYGEN +static inline void opal_atomic_ll_32(opal_atomic_int32_t *addr, int32_t &ret); -/* - * Include inline implementations of everything not defined directly - * in assembly - */ -# include "opal/sys/atomic_impl.h" +static inline void opal_atomic_sc_32(opal_atomic_int32_t *addr, int32_t newval, int &ret); -#endif /* !OPAL_C_HAVE__ATOMIC */ +static inline void opal_atomic_ll_64(opal_atomic_int64_t *addr, int64_t &ret); -/****** load-linked, store-conditional atomic implementations ******/ +static inline void opal_atomic_sc_64(opal_atomic_int64_t *addr, int64_t newval, int &ret); -/* C11 atomics do not expose the low-level load-linked, store-conditional - * instructions. Open MPI can use these instructions to implement a more - * efficient version of the lock-free lifo and fifo. On Apple Silicon the - * LL/SC fifo and lifo are ~ 2-20x faster than the CAS128 implementation. */ -#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 -# include "opal/sys/arm64/atomic_llsc.h" -#endif +static inline void opal_atomic_ll_ptr(opal_atomic_intptr_t *addr, intptr_t &ret); -#if !defined(OPAL_HAVE_ATOMIC_LLSC_32) -# define OPAL_HAVE_ATOMIC_LLSC_32 0 -#endif +static inline void opal_atomic_sc_ptr(opal_atomic_intptr_t *addr, intptr_t newval, int &ret); -#if !defined(OPAL_HAVE_ATOMIC_LLSC_64) -# define OPAL_HAVE_ATOMIC_LLSC_64 0 #endif -#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64) -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32 +/********************************************************************** + * + * Load the appropriate architecture files and set some reasonable + * default values for our support + * + *********************************************************************/ -# define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret) -# define opal_atomic_sc_ptr(addr, value, ret) \ - opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t)(value), ret) +#if defined(DOXYGEN) +/* don't include system-level gorp when generating doxygen files */ +#elif OPAL_USE_C11_ATOMICS == 1 +# include "opal/sys/atomic_stdc.h" +#elif OPAL_USE_GCC_BUILTIN_ATOMICS == 1 +# include "opal/sys/gcc_builtin/atomic.h" +#elif OPAL_USE_ASM_ATOMICS == 1 +# if defined(PLATFORM_ARCH_X86_64) +# include "opal/sys/x86_64/atomic.h" +# elif defined(PLATFORM_ARCH_AARCH64) +# include "opal/sys/arm64/atomic.h" +# elif defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) +# include "opal/sys/powerpc/atomic.h" +# else +# error "No asm support found." +# endif +#else +#error "No atomics support found." +#endif -# define OPAL_HAVE_ATOMIC_LLSC_PTR 1 +#if defined(PLATFORM_ARCH_AARCH64) +# include "opal/sys/arm64/atomic_llsc.h" +#elif defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) +# include "opal/sys/powerpc/atomic_llsc.h" +#endif -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64 -# define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret) -# define opal_atomic_sc_ptr(addr, value, ret) \ - opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t)(value), ret) +/********************************************************************** + * + * Ensure defines for the few optional features are always defined + * + *********************************************************************/ -# define OPAL_HAVE_ATOMIC_LLSC_PTR 1 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 +#endif -# endif +#ifndef OPAL_HAVE_ATOMIC_LLSC_32 +# define OPAL_HAVE_ATOMIC_LLSC_32 0 +#endif -#else +#ifndef OPAL_HAVE_ATOMIC_LLSC_64 +# define OPAL_HAVE_ATOMIC_LLSC_64 0 +#endif +#ifndef OPAL_HAVE_ATOMIC_LLSC_PTR # define OPAL_HAVE_ATOMIC_LLSC_PTR 0 - -#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/ +#endif END_C_DECLS diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h deleted file mode 100644 index ef522daad43..00000000000 --- a/opal/include/opal/sys/atomic_impl.h +++ /dev/null @@ -1,486 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* Inline C implementation of the functions defined in atomic.h */ - -#include - -/********************************************************************** - * - * Atomic math operations - * - * All the architectures provide a compare_and_set atomic operations. If - * they dont provide atomic additions and/or substractions then we can - * define these operations using the atomic compare_and_set. - * - * Some architectures do not provide support for the 64 bits - * atomic operations. Until we find a better solution let's just - * undefine all those functions if there is no 64 bit compare-exchange - * - *********************************************************************/ -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 - -# if !defined(OPAL_HAVE_ATOMIC_MIN_32) -static inline int32_t opal_atomic_fetch_min_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = *addr; - do { - if (old <= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); - - return old; -} - -# define OPAL_HAVE_ATOMIC_MIN_32 1 - -# endif /* OPAL_HAVE_ATOMIC_MIN_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_MAX_32) -static inline int32_t opal_atomic_fetch_max_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = *addr; - do { - if (old >= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); - - return old; -} - -# define OPAL_HAVE_ATOMIC_MAX_32 1 -# endif /* OPAL_HAVE_ATOMIC_MAX_32 */ - -# define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operation, name) \ - static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, type value) \ - { \ - type oldval; \ - do { \ - oldval = *addr; \ - } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, \ - oldval operation value)); \ - \ - return oldval; \ - } - -# if !defined(OPAL_HAVE_ATOMIC_SWAP_32) -# define OPAL_HAVE_ATOMIC_SWAP_32 1 -static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) -{ - int32_t old = *addr; - do { - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, newval)); - - return old; -} -# endif /* OPAL_HAVE_ATOMIC_SWAP_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_ADD_32) -# define OPAL_HAVE_ATOMIC_ADD_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, +, add) - -# endif /* OPAL_HAVE_ATOMIC_ADD_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_AND_32) -# define OPAL_HAVE_ATOMIC_AND_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, &, and) - -# endif /* OPAL_HAVE_ATOMIC_AND_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_OR_32) -# define OPAL_HAVE_ATOMIC_OR_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, |, or) - -# endif /* OPAL_HAVE_ATOMIC_OR_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_XOR_32) -# define OPAL_HAVE_ATOMIC_XOR_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, ^, xor) - -# endif /* OPAL_HAVE_ATOMIC_XOR_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_SUB_32) -# define OPAL_HAVE_ATOMIC_SUB_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub) - -# endif /* OPAL_HAVE_ATOMIC_SUB_32 */ - -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ - -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - -# if !defined(OPAL_HAVE_ATOMIC_MIN_64) -static inline int64_t opal_atomic_fetch_min_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = *addr; - do { - if (old <= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); - - return old; -} - -# define OPAL_HAVE_ATOMIC_MIN_64 1 - -# endif /* OPAL_HAVE_ATOMIC_MIN_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_MAX_64) -static inline int64_t opal_atomic_fetch_max_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = *addr; - do { - if (old >= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); - - return old; -} - -# define OPAL_HAVE_ATOMIC_MAX_64 1 -# endif /* OPAL_HAVE_ATOMIC_MAX_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_SWAP_64) -# define OPAL_HAVE_ATOMIC_SWAP_64 1 -static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) -{ - int64_t old = *addr; - do { - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, newval)); - - return old; -} -# endif /* OPAL_HAVE_ATOMIC_SWAP_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_ADD_64) -# define OPAL_HAVE_ATOMIC_ADD_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, +, add) - -# endif /* OPAL_HAVE_ATOMIC_ADD_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_AND_64) -# define OPAL_HAVE_ATOMIC_AND_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, &, and) - -# endif /* OPAL_HAVE_ATOMIC_AND_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_OR_64) -# define OPAL_HAVE_ATOMIC_OR_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, |, or) - -# endif /* OPAL_HAVE_ATOMIC_OR_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_XOR_64) -# define OPAL_HAVE_ATOMIC_XOR_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, ^, xor) - -# endif /* OPAL_HAVE_ATOMIC_XOR_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_SUB_64) -# define OPAL_HAVE_ATOMIC_SUB_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub) - -# endif /* OPAL_HAVE_ATOMIC_SUB_64 */ - -#else - -# if !defined(OPAL_HAVE_ATOMIC_ADD_64) -# define OPAL_HAVE_ATOMIC_ADD_64 0 -# endif - -# if !defined(OPAL_HAVE_ATOMIC_SUB_64) -# define OPAL_HAVE_ATOMIC_SUB_64 0 -# endif - -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ - -#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) - -# if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -# define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ - static inline bool opal_atomic_compare_exchange_strong##semantics##xx( \ - opal_atomic_intptr_t *addr, intptr_t *oldval, int64_t newval, const size_t length) \ - { \ - switch (length) { \ - case 4: \ - return opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t *) addr, \ - (int32_t *) oldval, \ - (int32_t) newval); \ - case 8: \ - return opal_atomic_compare_exchange_strong_64((opal_atomic_int64_t *) addr, \ - (int64_t *) oldval, \ - (int64_t) newval); \ - } \ - abort(); \ - } -# elif OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -# define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ - static inline bool opal_atomic_compare_exchange_strong##semantics##xx( \ - opal_atomic_intptr_t *addr, intptr_t *oldval, int64_t newval, const size_t length) \ - { \ - switch (length) { \ - case 4: \ - return opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t *) addr, \ - (int32_t *) oldval, \ - (int32_t) newval); \ - } \ - abort(); \ - } -# else -# error "Platform does not have required atomic compare-and-swap functionality" -# endif - -OPAL_ATOMIC_DEFINE_CMPXCG_XX(_) -OPAL_ATOMIC_DEFINE_CMPXCG_XX(_acq_) -OPAL_ATOMIC_DEFINE_CMPXCG_XX(_rel_) - -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -# define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ - static inline bool \ - opal_atomic_compare_exchange_strong##semantics##ptr(opal_atomic_intptr_t *addr, \ - intptr_t *oldval, \ - intptr_t newval) \ - { \ - return opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t *) addr, \ - (int32_t *) oldval, \ - (int32_t) newval); \ - } -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -# define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ - static inline bool \ - opal_atomic_compare_exchange_strong##semantics##ptr(opal_atomic_intptr_t *addr, \ - intptr_t *oldval, \ - intptr_t newval) \ - { \ - return opal_atomic_compare_exchange_strong_64((opal_atomic_int64_t *) addr, \ - (int64_t *) oldval, \ - (int64_t) newval); \ - } -# else -# error "Can not define opal_atomic_compare_exchange_strong_ptr with existing atomics" -# endif - -OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_) -OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_acq_) -OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) - -#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ - -#if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) - -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SWAP_32 -# define opal_atomic_swap_ptr(addr, value) \ - (intptr_t) opal_atomic_swap_32((opal_atomic_int32_t *) addr, (int32_t) value) -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SWAP_64 -# define opal_atomic_swap_ptr(addr, value) \ - (intptr_t) opal_atomic_swap_64((opal_atomic_int64_t *) addr, (int64_t) value) -# endif - -#endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */ - -static inline void opal_atomic_add_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length) -{ - switch (length) { -# if OPAL_HAVE_ATOMIC_ADD_32 - case 4: - (void) opal_atomic_fetch_add_32((opal_atomic_int32_t *) addr, (int32_t) value); - break; -# endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ - -# if OPAL_HAVE_ATOMIC_ADD_64 - case 8: - (void) opal_atomic_fetch_add_64((opal_atomic_int64_t *) addr, (int64_t) value); - break; -# endif /* OPAL_HAVE_ATOMIC_ADD_64 */ - default: - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); - } -} - -static inline void opal_atomic_sub_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length) -{ - switch (length) { -# if OPAL_HAVE_ATOMIC_SUB_32 - case 4: - (void) opal_atomic_fetch_sub_32((opal_atomic_int32_t *) addr, (int32_t) value); - break; -# endif /* OPAL_HAVE_ATOMIC_SUB_32 */ - -# if OPAL_HAVE_ATOMIC_SUB_64 - case 8: - (void) opal_atomic_fetch_sub_64((opal_atomic_int64_t *) addr, (int64_t) value); - break; -# endif /* OPAL_HAVE_ATOMIC_SUB_64 */ - default: - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); - } -} - -# define OPAL_ATOMIC_DEFINE_OP_FETCH(op, operation, type, ptr_type, suffix) \ - static inline type opal_atomic_##op##_fetch_##suffix(opal_atomic_##ptr_type *addr, \ - type value) \ - { \ - return opal_atomic_fetch_##op##_##suffix(addr, value) operation value; \ - } - -OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int32_t, int32_t, 32) -OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int32_t, int32_t, 32) -OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int32_t, int32_t, 32) -OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int32_t, int32_t, 32) -OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int32_t, int32_t, 32) - -static inline int32_t opal_atomic_min_fetch_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = opal_atomic_fetch_min_32(addr, value); - return old <= value ? old : value; -} - -static inline int32_t opal_atomic_max_fetch_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = opal_atomic_fetch_max_32(addr, value); - return old >= value ? old : value; -} - -OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int64_t, int64_t, 64) -OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int64_t, int64_t, 64) -OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int64_t, int64_t, 64) -OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int64_t, int64_t, 64) -OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int64_t, int64_t, 64) - -static inline int64_t opal_atomic_min_fetch_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = opal_atomic_fetch_min_64(addr, value); - return old <= value ? old : value; -} - -static inline int64_t opal_atomic_max_fetch_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = opal_atomic_fetch_max_64(addr, value); - return old >= value ? old : value; -} - -static inline intptr_t opal_atomic_fetch_add_ptr(opal_atomic_intptr_t *addr, void *delta) -{ -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 - return opal_atomic_fetch_add_32((opal_atomic_int32_t *) addr, (unsigned long) delta); -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 - return opal_atomic_fetch_add_64((opal_atomic_int64_t *) addr, (unsigned long) delta); -# else - abort(); - return 0; -# endif -} - -static inline intptr_t opal_atomic_add_fetch_ptr(opal_atomic_intptr_t *addr, void *delta) -{ -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 - return opal_atomic_add_fetch_32((opal_atomic_int32_t *) addr, (unsigned long) delta); -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 - return opal_atomic_add_fetch_64((opal_atomic_int64_t *) addr, (unsigned long) delta); -# else - abort(); - return 0; -# endif -} - -static inline intptr_t opal_atomic_fetch_sub_ptr(opal_atomic_intptr_t *addr, void *delta) -{ -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 - return opal_atomic_fetch_sub_32((opal_atomic_int32_t *) addr, (unsigned long) delta); -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 - return opal_atomic_fetch_sub_64((opal_atomic_int64_t *) addr, (unsigned long) delta); -# else - abort(); - return 0; -# endif -} - -static inline intptr_t opal_atomic_sub_fetch_ptr(opal_atomic_intptr_t *addr, void *delta) -{ -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 - return opal_atomic_sub_fetch_32((opal_atomic_int32_t *) addr, (unsigned long) delta); -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 - return opal_atomic_sub_fetch_64((opal_atomic_int64_t *) addr, (unsigned long) delta); -# else - abort(); - return 0; -# endif -} - - -/********************************************************************** - * - * Atomic spinlocks - * - *********************************************************************/ -#ifdef OPAL_NEED_INLINE_ATOMIC_SPINLOCKS - -/* - * Lock initialization function. It set the lock to UNLOCKED. - */ -static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value) -{ - lock->u.lock = value; -} - -static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) -{ - int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED; - bool ret = opal_atomic_compare_exchange_strong_acq_32(&lock->u.lock, &unlocked, - OPAL_ATOMIC_LOCK_LOCKED); - return (ret == false) ? 1 : 0; -} - -static inline void opal_atomic_lock(opal_atomic_lock_t *lock) -{ - while (opal_atomic_trylock(lock)) { - while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) { - /* spin */; - } - } -} - -static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) -{ - opal_atomic_wmb(); - lock->u.lock = OPAL_ATOMIC_LOCK_UNLOCKED; -} - -#endif /* OPAL_HAVE_ATOMIC_SPINLOCKS */ diff --git a/opal/include/opal/sys/atomic_impl_math.h b/opal/include/opal/sys/atomic_impl_math.h new file mode 100644 index 00000000000..7f48e50a23b --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_math.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Implementation of the required atomic math functions in terms of + * compare and swap operators. + */ + +#ifndef ATOMIC_IMPL_MATH_H +#define ATOMIC_IMPL_MATH_H 1 + +#define OPAL_ATOMIC_DEFINE_OP(type, bits, operation, name) \ + static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, type value) \ + { \ + type oldval; \ + do { \ + oldval = *addr; \ + } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, \ + oldval operation value)); \ + \ + return oldval; \ + } \ + \ + static inline type opal_atomic_##name##_fetch_##bits(opal_atomic_##type *addr, type value) \ + { \ + type oldval, newval; \ + do { \ + oldval = *addr; \ + newval = oldval operation value; \ + } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, newval); \ + \ + return newval; \ + } + +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, +, add) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, &, and) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, |, or) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, ^, xor) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, -, sub) + +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, +, add) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, &, and) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, |, or) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, ^, xor) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, -, sub) + +#include "opal/sys/atomic_impl_minmax_math.h" + +#endif /* #ifndef ATOMIC_MATH_IMPL_H */ diff --git a/opal/include/opal/sys/atomic_impl_minmax_math.h b/opal/include/opal/sys/atomic_impl_minmax_math.h new file mode 100644 index 00000000000..7ec5920ad46 --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_minmax_math.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Implementation of the min/max atomic functions in terms of compare + * and swap. These are broken out from the basic atomic_impl_math.h + * functions because most atomic implementations do not provide native + * min/max interfaces. + */ + +#ifndef ATOMIC_IMPL_MINMAX_MATH_H +#define ATOMIC_IMPL_MINMAX_MATH_H 1 + +static inline int32_t opal_atomic_fetch_min_32(opal_atomic_int32_t *addr, int32_t value) +{ + int32_t old = *addr; + do { + if (old <= value) { + break; + } + } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); + + return old; +} + +static inline int32_t opal_atomic_min_fetch_32(opal_atomic_int32_t *addr, int32_t value) +{ + int32_t old = opal_atomic_fetch_min_32(addr, value); + return old <= value ? old : value; +} + +static inline int32_t opal_atomic_fetch_max_32(opal_atomic_int32_t *addr, int32_t value) +{ + int32_t old = *addr; + do { + if (old >= value) { + break; + } + } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); + + return old; +} + +static inline int32_t opal_atomic_max_fetch_32(opal_atomic_int32_t *addr, int32_t value) +{ + int32_t old = opal_atomic_fetch_max_32(addr, value); + return old >= value ? old : value; +} + +static inline int64_t opal_atomic_fetch_min_64(opal_atomic_int64_t *addr, int64_t value) +{ + int64_t old = *addr; + do { + if (old <= value) { + break; + } + } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); + + return old; +} + +static inline int64_t opal_atomic_fetch_max_64(opal_atomic_int64_t *addr, int64_t value) +{ + int64_t old = *addr; + do { + if (old >= value) { + break; + } + } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); + + return old; +} + +static inline int64_t opal_atomic_min_fetch_64(opal_atomic_int64_t *addr, int64_t value) +{ + int64_t old = opal_atomic_fetch_min_64(addr, value); + return old <= value ? old : value; +} + +static inline int64_t opal_atomic_max_fetch_64(opal_atomic_int64_t *addr, int64_t value) +{ + int64_t old = opal_atomic_fetch_max_64(addr, value); + return old >= value ? old : value; +} + +#endif /* #ifndef ATOMIC_MATH_MINMAX_IMPL_H */ diff --git a/opal/include/opal/sys/atomic_impl_ptr_cswap.h b/opal/include/opal/sys/atomic_impl_ptr_cswap.h new file mode 100644 index 00000000000..a7246c04ee3 --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_ptr_cswap.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ATOMIC_IMPL_PTR_CSWAP_H +#define ATOMIC_IMPL_PTR_CSWAP_H 1 + +static inline bool opal_atomic_compare_exchange_strong_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval) +{ +#if SIZEOF_VOID_P == 4 + return opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t*)addr, + (int32_t*)oldval, + (int32_t)newval); +#elif SIZEOF_VOID_P == 8 + return opal_atomic_compare_exchange_strong_64((opal_atomic_int64_t*)addr, + (int64_t*)oldval, + (int64_t)newval); +#else +#error "No implementation of opal_atomic_compare_exchange_strong_ptr" +#endif +} + +static inline bool opal_atomic_compare_exchange_strong_acq_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval) +{ +#if SIZEOF_VOID_P == 4 + return opal_atomic_compare_exchange_strong_acq_32((opal_atomic_int32_t*)addr, + (int32_t*)oldval, + (int32_t)newval); +#elif SIZEOF_VOID_P == 8 + return opal_atomic_compare_exchange_strong_acq_64((opal_atomic_int64_t*)addr, + (int64_t*)oldval, + (int64_t)newval); +#else +#error "No implementation of opal_atomic_compare_exchange_strong_acq_ptr" +#endif +} + +static inline bool opal_atomic_compare_exchange_strong_rel_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval) +{ +#if SIZEOF_VOID_P == 4 + return opal_atomic_compare_exchange_strong_rel_32((opal_atomic_int32_t*)addr, + (int32_t*)oldval, + (int32_t)newval); +#elif SIZEOF_VOID_P == 8 + return opal_atomic_compare_exchange_strong_rel_64((opal_atomic_int64_t*)addr, + (int64_t*)oldval, + (int64_t)newval); +#else +#error "No implementation of opal_atomic_compare_exchange_strong_rel_ptr" +#endif +} + +#endif /* #ifndef ATOMIC_IMPL_PTR_CSWAP_H */ diff --git a/opal/include/opal/sys/atomic_impl_ptr_llsc.h b/opal/include/opal/sys/atomic_impl_ptr_llsc.h new file mode 100644 index 00000000000..076e768cba5 --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_ptr_llsc.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2020-2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Pointer-sized wrapper for LL/SC calls, wrappers around size-defined + * calls. Note that these must be macros, as LL/SC may not work + * across function calls. + */ + + +#ifndef ATOMIC_IMPL_PTR_LLSC_H +#define ATOMIC_IMPL_PTR_LLSC_H 1 + +#if SIZEOF_VOID_P == 4 && defined(OPAL_HAVE_ATOMIC_LLSC_32) && OPAL_HAVE_ATOMIC_LLSC_32 + +# define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret) +# define opal_atomic_sc_ptr(addr, value, ret) \ + opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t)(value), ret) + +# define OPAL_HAVE_ATOMIC_LLSC_PTR 1 + +#elif SIZEOF_VOID_P == 8 && defined(OPAL_HAVE_ATOMIC_LLSC_64) && OPAL_HAVE_ATOMIC_LLSC_64 + +# define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret) +# define opal_atomic_sc_ptr(addr, value, ret) \ + opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t)(value), ret) + +# define OPAL_HAVE_ATOMIC_LLSC_PTR 1 + +#endif /* SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64 */ + +#endif /* ATOMIC_IMPL_PTR_LLSC_H */ diff --git a/opal/mca/btl/sm/btl_sm_endpoint.h b/opal/include/opal/sys/atomic_impl_ptr_swap.h similarity index 50% rename from opal/mca/btl/sm/btl_sm_endpoint.h rename to opal/include/opal/sys/atomic_impl_ptr_swap.h index f5670289183..97615961fc7 100644 --- a/opal/mca/btl/sm/btl_sm_endpoint.h +++ b/opal/include/opal/sys/atomic_impl_ptr_swap.h @@ -1,34 +1,36 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Triad National Security, LLC. All rights - * reserved. - * Copyright (c) 2020 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ -/** - * @file - */ -#ifndef MCA_BTL_SM_ENDPOINT_H -#define MCA_BTL_SM_ENDPOINT_H +#ifndef ATOMIC_SWAP_PTR_IMPL_H +#define ATOMIC_SWAP_PTR_IMPL_H -#include "opal_config.h" +static inline intptr_t opal_atomic_swap_ptr(opal_atomic_intptr_t *addr, intptr_t newval) +{ +#if SIZEOF_VOID_P == 4 + return (intptr_t)opal_atomic_swap_32((opal_atomic_int32_t *) addr, (int32_t) newval); +#elif SIZEOF_VOID_P == 8 + return (intptr_t)opal_atomic_swap_64((opal_atomic_int64_t *) addr, (int64_t) newval); +#else +#error "No implementation of opal_atomic_swap_ptr" +#endif +} -#endif /* MCA_BTL_SM_ENDPOINT_H */ +#endif /* #ifndef ATOMIC_SWAP_PTR_IMPL_H */ diff --git a/opal/include/opal/sys/atomic_impl_size_t_math.h b/opal/include/opal/sys/atomic_impl_size_t_math.h new file mode 100644 index 00000000000..4316bd9a52c --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_size_t_math.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Implementation of size_t atomic add functions as wrappers around + * sized implementations. + */ + +#ifndef ATOMIC_IMPL_SIZE_T_MATH_H +#define ATOMIC_IMPL_SIZE_T_MATH_H 1 + +#include + +static inline size_t opal_atomic_add_fetch_size_t(opal_atomic_size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t)opal_atomic_add_fetch_32((opal_atomic_int32_t *) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t)opal_atomic_add_fetch_64((opal_atomic_int64_t *) addr, delta); +#else +#error "No implementation of opal_atomic_add_fetch_size_t" +#endif +} + +static inline size_t opal_atomic_fetch_add_size_t(opal_atomic_size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t)opal_atomic_fetch_add_32((opal_atomic_int32_t *) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t)opal_atomic_fetch_add_64((opal_atomic_int64_t *) addr, delta); +#else +#error "No implementation of opal_atomic_fetch_add_size_t" +#endif +} + +static inline size_t opal_atomic_sub_fetch_size_t(opal_atomic_size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t)opal_atomic_sub_fetch_32((opal_atomic_int32_t *) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t)opal_atomic_sub_fetch_64((opal_atomic_int64_t *) addr, delta); +#else +#error "No implementation of opal_atomic_sub_fetch_size_t" +#endif +} + +static inline size_t opal_atomic_fetch_sub_size_t(opal_atomic_size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t)opal_atomic_fetch_sub_32((opal_atomic_int32_t *) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t)opal_atomic_fetch_sub_64((opal_atomic_int64_t *) addr, delta); +#else +#error "No implementation of opal_atomic_fetch_sub_size_t" +#endif +} + +/** + * Atomically increment the content depending on the type. This + * macro detect at compile time the type of the first argument + * and choose the correct function to be called. + * + * \note This macro should only be used for integer types. + * + * @param addr Address of + * @param delta Value to add (converted to ). + */ +#define opal_atomic_add(ADDR, VALUE) \ + opal_atomic_add_xx((opal_atomic_intptr_t *) (ADDR), (int32_t)(VALUE), sizeof(*(ADDR))) + +static inline void opal_atomic_add_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length) +{ + switch (length) { + case 4: + (void)opal_atomic_fetch_add_32((opal_atomic_int32_t*)addr, value); + break; + case 8: + (void)opal_atomic_fetch_add_64((opal_atomic_int64_t*)addr, value); + break; + default: + abort(); + } +} + +#endif diff --git a/opal/include/opal/sys/atomic_impl_spinlock.h b/opal/include/opal/sys/atomic_impl_spinlock.h new file mode 100644 index 00000000000..1f92f5ae72b --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_spinlock.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Compare-and-swap based implementation of the atomic interface + */ + +#ifndef ATOMIC_SPINLOCK_IMPL_H +#define ATOMIC_SPINLOCK_IMPL_H + +static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value) +{ + *lock = value; + opal_atomic_wmb(); +} + +static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) +{ + int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED; + bool ret = opal_atomic_compare_exchange_strong_acq_32(lock, &unlocked, + OPAL_ATOMIC_LOCK_LOCKED); + return (ret == false) ? 1 : 0; +} + +static inline void opal_atomic_lock(opal_atomic_lock_t *lock) +{ + while (opal_atomic_trylock(lock)) { + while (*lock == OPAL_ATOMIC_LOCK_LOCKED) { + /* spin */; + } + } +} + +static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) +{ + opal_atomic_wmb(); + *lock = OPAL_ATOMIC_LOCK_UNLOCKED; +} + +#endif /* #ifndef ATOMIC_SPINLOCK_IMPL_H */ diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index c42573c2ddb..4f5bceaa922 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -7,6 +7,8 @@ * Copyright (c) 2019-2021 Google, LLC. All rights reserved. * Copyright (c) 2019 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,33 +32,11 @@ # include # include -# define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 - -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -# define OPAL_HAVE_ATOMIC_SWAP_32 1 - -# define OPAL_HAVE_ATOMIC_ADD_32 1 -# define OPAL_HAVE_ATOMIC_AND_32 1 -# define OPAL_HAVE_ATOMIC_OR_32 1 -# define OPAL_HAVE_ATOMIC_XOR_32 1 -# define OPAL_HAVE_ATOMIC_SUB_32 1 - -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -# define OPAL_HAVE_ATOMIC_SWAP_64 1 - -# define OPAL_HAVE_ATOMIC_ADD_64 1 -# define OPAL_HAVE_ATOMIC_AND_64 1 -# define OPAL_HAVE_ATOMIC_OR_64 1 -# define OPAL_HAVE_ATOMIC_XOR_64 1 -# define OPAL_HAVE_ATOMIC_SUB_64 1 - -# define OPAL_HAVE_ATOMIC_MIN_32 1 -# define OPAL_HAVE_ATOMIC_MAX_32 1 - -# define OPAL_HAVE_ATOMIC_MIN_64 1 -# define OPAL_HAVE_ATOMIC_MAX_64 1 - -# define OPAL_HAVE_ATOMIC_SPINLOCKS 1 +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ static inline void opal_atomic_mb(void) { @@ -70,7 +50,7 @@ static inline void opal_atomic_wmb(void) static inline void opal_atomic_rmb(void) { -# if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +# if defined(PLATFORM_ARCH_X86_64) /* work around a bug in older gcc versions (observed in gcc 6.x) * where acquire seems to get treated as a no-op instead of being * equivalent to __asm__ __volatile__("": : :"memory") on x86_64 */ @@ -80,168 +60,98 @@ static inline void opal_atomic_rmb(void) # endif } + +/********************************************************************** + * + * Compare and Swap + * + *********************************************************************/ + # define opal_atomic_compare_exchange_strong_32(addr, compare, value) \ atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_64(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ - memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_ptr(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ - memory_order_relaxed) # define opal_atomic_compare_exchange_strong_acq_32(addr, compare, value) \ atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_acquire, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_acq_64(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_acquire, \ - memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_acq_ptr(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_acquire, \ - memory_order_relaxed) - # define opal_atomic_compare_exchange_strong_rel_32(addr, compare, value) \ atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_release, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_rel_64(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_release, \ + +# define opal_atomic_compare_exchange_strong_64(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_rel_ptr(addr, compare, value) \ +# define opal_atomic_compare_exchange_strong_acq_64(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_acquire, \ + memory_order_relaxed) +# define opal_atomic_compare_exchange_strong_rel_64(addr, compare, value) \ atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_release, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong(addr, oldval, newval) \ - atomic_compare_exchange_strong_explicit(addr, oldval, newval, memory_order_relaxed, \ +# define opal_atomic_compare_exchange_strong_ptr(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_acq(addr, oldval, newval) \ - atomic_compare_exchange_strong_explicit(addr, oldval, newval, memory_order_acquire, \ +# define opal_atomic_compare_exchange_strong_acq_ptr(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_acquire, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_rel(addr, oldval, newval) \ - atomic_compare_exchange_strong_explicit(addr, oldval, newval, memory_order_release, \ +# define opal_atomic_compare_exchange_strong_rel_ptr(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_release, \ memory_order_relaxed) -# define opal_atomic_swap_32(addr, value) \ - atomic_exchange_explicit((_Atomic unsigned int *) addr, value, memory_order_relaxed) -# define opal_atomic_swap_64(addr, value) \ - atomic_exchange_explicit((_Atomic unsigned long *) addr, value, memory_order_relaxed) -# define opal_atomic_swap_ptr(addr, value) \ - atomic_exchange_explicit((_Atomic unsigned long *) addr, value, memory_order_relaxed) - -# define OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ - static inline type opal_atomic_fetch_##op##_##bits(opal_atomic_##type *addr, type value) \ - { \ - return atomic_fetch_##op##_explicit(addr, value, memory_order_relaxed); \ - } \ - \ - static inline type opal_atomic_##op##_fetch_##bits(opal_atomic_##type *addr, type value) \ - { \ - return atomic_fetch_##op##_explicit(addr, value, memory_order_relaxed) operator value; \ - } - -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 32, int32_t, +) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 64, int64_t, +) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, size_t, size_t, +) - -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, 32, int32_t, -) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, 64, int64_t, -) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, size_t, size_t, -) - -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(or, 32, int32_t, |) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(or, 64, int64_t, |) - -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(xor, 32, int32_t, ^) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(xor, 64, int64_t, ^) - -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(and, 32, int32_t, &) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(and, 64, int64_t, &) - -# define opal_atomic_add(addr, value) \ - (void) atomic_fetch_add_explicit(addr, value, memory_order_relaxed) - -static inline int32_t opal_atomic_fetch_min_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = *addr; - do { - if (old <= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); +# if OPAL_HAVE_C11_CSWAP_INT128 - return old; -} +/* the C11 atomic compare-exchange is lock free so use it */ +# define opal_atomic_compare_exchange_strong_128 atomic_compare_exchange_strong -static inline int32_t opal_atomic_fetch_max_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = *addr; - do { - if (old >= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); +# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 - return old; -} +# elif OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 -static inline int64_t opal_atomic_fetch_min_64(opal_atomic_int64_t *addr, int64_t value) +/* fall back on the __sync builtin if available since it will emit the expected instruction on + * x86_64 (cmpxchng16b) */ +__opal_attribute_always_inline__ static inline bool +opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_t *oldval, + opal_int128_t newval) { - int64_t old = *addr; - do { - if (old <= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); - - return old; + opal_int128_t prev = __sync_val_compare_and_swap(addr, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } -static inline int64_t opal_atomic_fetch_max_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = *addr; - do { - if (old >= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); - - return old; -} +# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 -static inline int32_t opal_atomic_min_fetch_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = opal_atomic_fetch_min_32(addr, value); - return old <= value ? old : value; -} +# else -static inline int32_t opal_atomic_max_fetch_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = opal_atomic_fetch_max_32(addr, value); - return old >= value ? old : value; -} +# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 -static inline int64_t opal_atomic_min_fetch_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = opal_atomic_fetch_min_64(addr, value); - return old <= value ? old : value; -} +# endif -static inline int64_t opal_atomic_max_fetch_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = opal_atomic_fetch_max_64(addr, value); - return old >= value ? old : value; -} -# define OPAL_ATOMIC_LOCK_UNLOCKED false -# define OPAL_ATOMIC_LOCK_LOCKED true +/********************************************************************** + * + * Swap + * + *********************************************************************/ -# define OPAL_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT +# define opal_atomic_swap_32(addr, value) \ + atomic_exchange_explicit((_Atomic unsigned int *) addr, value, memory_order_relaxed) +# define opal_atomic_swap_64(addr, value) \ + atomic_exchange_explicit((_Atomic unsigned long *) addr, value, memory_order_relaxed) +# define opal_atomic_swap_ptr(addr, value) \ + atomic_exchange_explicit((_Atomic unsigned long *) addr, value, memory_order_relaxed) -typedef atomic_flag opal_atomic_lock_t; +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ /* * Lock initialization function. It set the lock to UNLOCKED. */ -static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, bool value) +static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value) { - atomic_flag_clear(lock); + atomic_flag_clear_explicit(lock, memory_order_relaxed); } static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) @@ -260,33 +170,42 @@ static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) atomic_flag_clear(lock); } -# if OPAL_HAVE_C11_CSWAP_INT128 - -/* the C11 atomic compare-exchange is lock free so use it */ -# define opal_atomic_compare_exchange_strong_128 atomic_compare_exchange_strong -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ -# elif OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 +# define OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ + static inline type opal_atomic_fetch_##op##_##bits(opal_atomic_##type *addr, type value) \ + { \ + return atomic_fetch_##op##_explicit(addr, value, memory_order_relaxed); \ + } \ + \ + static inline type opal_atomic_##op##_fetch_##bits(opal_atomic_##type *addr, type value) \ + { \ + return atomic_fetch_##op##_explicit(addr, value, memory_order_relaxed) operator value; \ + } -/* fall back on the __sync builtin if available since it will emit the expected instruction on - * x86_64 (cmpxchng16b) */ -__opal_attribute_always_inline__ static inline bool -opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_t *oldval, - opal_int128_t newval) -{ - opal_int128_t prev = __sync_val_compare_and_swap(addr, *oldval, newval); - bool ret = prev == *oldval; - *oldval = prev; - return ret; -} +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 32, int32_t, +) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(and, 32, int32_t, &) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(or, 32, int32_t, |) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(xor, 32, int32_t, ^) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, 32, int32_t, -) -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 64, int64_t, +) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(and, 64, int64_t, &) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(or, 64, int64_t, |) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(xor, 64, int64_t, ^) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, 64, int64_t, -) -# else +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, size_t, size_t, +) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, size_t, size_t, -) -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 +# define opal_atomic_add(addr, value) \ + (void) atomic_fetch_add_explicit(addr, value, memory_order_relaxed) -# endif +#include "opal/sys/atomic_impl_minmax_math.h" #endif /* !defined(OPAL_ATOMIC_STDC_H) */ diff --git a/opal/include/opal/sys/cma.h b/opal/include/opal/sys/cma.h index e5b4961613e..b1db5f7700c 100644 --- a/opal/include/opal/sys/cma.h +++ b/opal/include/opal/sys/cma.h @@ -5,6 +5,8 @@ * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ */ @@ -19,12 +21,7 @@ #ifndef OPAL_SYS_CMA_H #define OPAL_SYS_CMA_H 1 -#if !defined(OPAL_ASSEMBLY_ARCH) -/* need opal_config.h for the assembly architecture */ -# include "opal_config.h" -#endif - -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #ifdef HAVE_SYS_TYPES_H # include @@ -34,28 +31,28 @@ # include #endif -#ifdef __linux__ +#ifdef PLATFORM_OS_LINUX /* Cross Memory Attach is so far only supported under linux */ -# if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +# if defined(PLATFORM_ARCH_x86_64) # define __NR_process_vm_readv 310 # define __NR_process_vm_writev 311 -# elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 +# elif defined(PLATFORM_ARCH_X86) # define __NR_process_vm_readv 347 # define __NR_process_vm_writev 348 -# elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 +# elif defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_32) # define __NR_process_vm_readv 351 # define __NR_process_vm_writev 352 -# elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 +# elif defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) # define __NR_process_vm_readv 351 # define __NR_process_vm_writev 352 -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM +# elif defined(PLATFORM_ARCH_ARM) # define __NR_process_vm_readv 376 # define __NR_process_vm_writev 377 -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +# elif defined(PLATFORM_ARCH_AARCH64) /* ARM64 uses the asm-generic syscall numbers */ diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 80f5254f144..4615f9fb4f3 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -18,6 +18,8 @@ * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,24 +35,8 @@ * Memory Barriers * *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 - -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_ADD_32 1 -#define OPAL_HAVE_ATOMIC_AND_32 1 -#define OPAL_HAVE_ATOMIC_OR_32 1 -#define OPAL_HAVE_ATOMIC_XOR_32 1 -#define OPAL_HAVE_ATOMIC_SUB_32 1 -#define OPAL_HAVE_ATOMIC_SWAP_32 1 -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -#define OPAL_HAVE_ATOMIC_ADD_64 1 -#define OPAL_HAVE_ATOMIC_AND_64 1 -#define OPAL_HAVE_ATOMIC_OR_64 1 -#define OPAL_HAVE_ATOMIC_XOR_64 1 -#define OPAL_HAVE_ATOMIC_SUB_64 1 -#define OPAL_HAVE_ATOMIC_SWAP_64 1 - -#if (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) && defined (__GNUC__) && !defined(__llvm) && (__GNUC__ < 6) + +#if defined(PLATFORM_ARCH_X86_64) && defined (__GNUC__) && !defined(__llvm) && (__GNUC__ < 6) /* work around a bug in older gcc versions where ACQUIRE seems to get * treated as a no-op instead */ #define OPAL_BUSTED_ATOMIC_MB 1 @@ -77,11 +63,10 @@ static inline void opal_atomic_wmb(void) __atomic_thread_fence(__ATOMIC_RELEASE); } -#define MB() opal_atomic_mb() /********************************************************************** * - * Atomic math operations + * Compare and Swap * *********************************************************************/ @@ -93,6 +78,13 @@ static inline void opal_atomic_wmb(void) # pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L) #endif +static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, + int32_t *oldval, int32_t newval) +{ + return __atomic_compare_exchange_n(addr, oldval, newval, false, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED); +} + static inline bool opal_atomic_compare_exchange_strong_acq_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { @@ -107,45 +99,13 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32(opal_atomic_int32_ __ATOMIC_RELAXED); } -static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, - int32_t *oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, + int64_t *oldval, int64_t newval) { return __atomic_compare_exchange_n(addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) -{ - int32_t oldval; - __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); - return oldval; -} - -static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *addr, int32_t delta) -{ - return __atomic_fetch_add(addr, delta, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_and_32(opal_atomic_int32_t *addr, int32_t value) -{ - return __atomic_fetch_and(addr, value, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_or_32(opal_atomic_int32_t *addr, int32_t value) -{ - return __atomic_fetch_or(addr, value, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_xor_32(opal_atomic_int32_t *addr, int32_t value) -{ - return __atomic_fetch_xor(addr, value, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_sub_32(opal_atomic_int32_t *addr, int32_t delta) -{ - return __atomic_fetch_sub(addr, delta, __ATOMIC_RELAXED); -} - static inline bool opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { @@ -160,44 +120,7 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_ __ATOMIC_RELAXED); } -static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, - int64_t *oldval, int64_t newval) -{ - return __atomic_compare_exchange_n(addr, oldval, newval, false, __ATOMIC_ACQUIRE, - __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) -{ - int64_t oldval; - __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); - return oldval; -} - -static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *addr, int64_t delta) -{ - return __atomic_fetch_add(addr, delta, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_and_64(opal_atomic_int64_t *addr, int64_t value) -{ - return __atomic_fetch_and(addr, value, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_or_64(opal_atomic_int64_t *addr, int64_t value) -{ - return __atomic_fetch_or(addr, value, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_xor_64(opal_atomic_int64_t *addr, int64_t value) -{ - return __atomic_fetch_xor(addr, value, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *addr, int64_t delta) -{ - return __atomic_fetch_sub(addr, delta, __ATOMIC_RELAXED); -} +#include "opal/sys/atomic_impl_ptr_cswap.h" #if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128 @@ -217,7 +140,7 @@ static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t /* __atomic version is not lock-free so use legacy __sync version */ -static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_opal_int128_t *addr, +static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_t *oldval, opal_int128_t newval) { @@ -229,20 +152,53 @@ static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_opal_int1 #endif + +/********************************************************************** + * + * Swap + * + *********************************************************************/ + +static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) +{ + int32_t oldval; + __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) +{ + int64_t oldval; + __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline intptr_t opal_atomic_swap_ptr(opal_atomic_intptr_t *addr, intptr_t newval) +{ + intptr_t oldval; + __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + + +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ + #if defined(__HLE__) # include -# define OPAL_HAVE_ATOMIC_SPINLOCKS 1 - static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value) { - lock->u.lock = value; + lock = value; } static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) { - int ret = __atomic_exchange_n(&lock->u.lock, OPAL_ATOMIC_LOCK_LOCKED, + int ret = __atomic_exchange_n(&lock, OPAL_ATOMIC_LOCK_LOCKED, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE); if (OPAL_ATOMIC_LOCK_LOCKED == ret) { /* abort the transaction */ @@ -256,7 +212,7 @@ static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) static inline void opal_atomic_lock(opal_atomic_lock_t *lock) { while (OPAL_ATOMIC_LOCK_LOCKED - == __atomic_exchange_n(&lock->u.lock, OPAL_ATOMIC_LOCK_LOCKED, + == __atomic_exchange_n(&lock, OPAL_ATOMIC_LOCK_LOCKED, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE)) { /* abort the transaction */ _mm_pause(); @@ -265,12 +221,54 @@ static inline void opal_atomic_lock(opal_atomic_lock_t *lock) static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) { - __atomic_store_n(&lock->u.lock, OPAL_ATOMIC_LOCK_UNLOCKED, + __atomic_store_n(&lock, OPAL_ATOMIC_LOCK_UNLOCKED, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE); } +#else /* #if defined(__HLE__) */ + +#include "opal/sys/atomic_impl_spinlock.h" + #endif + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +#define OPAL_ATOMIC_DEFINE_OP(type, bits, operator, name) \ + static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, type value) \ + { \ + return __atomic_fetch_##name(addr, value, __ATOMIC_RELAXED); \ + } \ + \ + static inline type opal_atomic_##name##_fetch_##bits(opal_atomic_##type *addr, type value) \ + { \ + return __atomic_##name##_fetch(addr, value, __ATOMIC_RELAXED); \ + } + +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, +, add) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, &, and) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, |, or) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, ^, xor) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, -, sub) + +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, +, add) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, &, and) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, |, or) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, ^, xor) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, -, sub) + +OPAL_ATOMIC_DEFINE_OP(size_t, size_t, +, add) +OPAL_ATOMIC_DEFINE_OP(size_t, size_t, -, sub) + +#define opal_atomic_add(ADDR, VALUE) \ + (void) __atomic_fetch_add(ADDR, VALUE, __ATOMIC_RELAXED) + +#include "opal/sys/atomic_impl_minmax_math.h" + #if defined(__SUNPRO_C) || defined(__SUNPRO_CC) # pragma error_messages(default, E_ARG_INCOMPATIBLE_WITH_ARG_L) #endif diff --git a/opal/include/opal/sys/powerpc/Makefile.am b/opal/include/opal/sys/powerpc/Makefile.am index 612dd2e4d7f..d2dbeeba0a5 100644 --- a/opal/include/opal/sys/powerpc/Makefile.am +++ b/opal/include/opal/sys/powerpc/Makefile.am @@ -20,4 +20,6 @@ headers += \ opal/sys/powerpc/atomic.h \ + opal/sys/powerpc/atomic_helper.h \ + opal/sys/powerpc/atomic_llsc.h \ opal/sys/powerpc/timer.h diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 6e7d3593655..a92457c6da1 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -14,6 +14,8 @@ * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,88 +30,40 @@ * On powerpc ... */ -#define MB() __asm__ __volatile__("sync" : : : "memory") -#define RMB() __asm__ __volatile__("lwsync" : : : "memory") -#define WMB() __asm__ __volatile__("lwsync" : : : "memory") -#define ISYNC() __asm__ __volatile__("isync" : : : "memory") - -/********************************************************************** - * - * Define constants for PowerPC 64 - * - *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 - -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_SWAP_32 1 -#define OPAL_HAVE_ATOMIC_LLSC_32 1 - -#define OPAL_HAVE_ATOMIC_ADD_32 1 -#define OPAL_HAVE_ATOMIC_AND_32 1 -#define OPAL_HAVE_ATOMIC_OR_32 1 -#define OPAL_HAVE_ATOMIC_XOR_32 1 -#define OPAL_HAVE_ATOMIC_SUB_32 1 -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -#define OPAL_HAVE_ATOMIC_SWAP_64 1 -#define OPAL_HAVE_ATOMIC_LLSC_64 1 -#define OPAL_HAVE_ATOMIC_ADD_64 1 -#define OPAL_HAVE_ATOMIC_AND_64 1 -#define OPAL_HAVE_ATOMIC_OR_64 1 -#define OPAL_HAVE_ATOMIC_XOR_64 1 -#define OPAL_HAVE_ATOMIC_SUB_64 1 +#include "opal/sys/powerpc/atomic_helper.h" /********************************************************************** * * Memory Barriers * *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY static inline void opal_atomic_mb(void) { - MB(); + __asm__ __volatile__("sync" : : : "memory"); } static inline void opal_atomic_rmb(void) { - RMB(); + __asm__ __volatile__ ("isync" : : : "memory"); } static inline void opal_atomic_wmb(void) { - WMB(); + __asm__ __volatile__("lwsync" : : : "memory"); } static inline void opal_atomic_isync(void) { - ISYNC(); + __asm__ __volatile__("isync" : : : "memory"); } -#endif /* end OPAL_GCC_INLINE_ASSEMBLY */ /********************************************************************** * - * Atomic math operations + * Compare and Swap * *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - -# if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) -/* work-around bizzare xlc bug in which it sign-extends - a pointer to a 32-bit signed integer */ -# define OPAL_ASM_ADDR(a) ((uintptr_t) a) -# else -# define OPAL_ASM_ADDR(a) (a) -# endif - -# if defined(__PGI) -/* work-around for bug in PGI 16.5-16.7 where the compiler fails to - * correctly emit load instructions for 64-bit operands. without this - * it will emit lwz instead of ld to load the 64-bit operand. */ -# define OPAL_ASM_VALUE64(x) (void *) (intptr_t)(x) -# else -# define OPAL_ASM_VALUE64(x) x -# endif static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) @@ -132,39 +86,6 @@ static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *a return ret; } -/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason - * is that even with an always_inline attribute the compiler may still emit instructions to store - * then load the arguments to/from the stack. This sequence may cause the ll reservation to be - * cancelled. */ -# define opal_atomic_ll_32(addr, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret; \ - __asm__ __volatile__("lwarx %0, 0, %1 \n\t" : "=&r"(_ret) : "r"(_addr)); \ - ret = (typeof(ret)) _ret; \ - } while (0) - -# define opal_atomic_sc_32(addr, value, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret, _foo, _newval = (int32_t) value; \ - \ - __asm__ __volatile__(" stwcx. %4, 0, %3 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r"(_ret), "=m"(*_addr), "=r"(_foo) \ - : "r"(_addr), "r"(_newval) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) - -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_32 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ static inline bool opal_atomic_compare_exchange_strong_acq_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { @@ -183,45 +104,6 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32(opal_atomic_int32_ return opal_atomic_compare_exchange_strong_32(addr, oldval, newval); } -static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) -{ - int32_t ret; - - __asm__ __volatile__("1: lwarx %0, 0, %2 \n\t" - " stwcx. %3, 0, %2 \n\t" - " bne- 1b \n\t" - : "=&r"(ret), "=m"(*addr) - : "r"(addr), "r"(newval) - : "cc", "memory"); - - return ret; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#if OPAL_GCC_INLINE_ASSEMBLY - -# define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ - static inline int64_t opal_atomic_fetch_##type##_64(opal_atomic_int64_t *v, int64_t val) \ - { \ - int64_t t, old; \ - \ - __asm__ __volatile__("1: ldarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stdcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r"(t), "=&r"(old), "=m"(*v) \ - : "r"(OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m"(*v) \ - : "cc"); \ - \ - return old; \ - } - -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(and, and) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) @@ -245,30 +127,46 @@ static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *a return ret; } -# define opal_atomic_ll_64(addr, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _ret; \ - __asm__ __volatile__("ldarx %0, 0, %1 \n\t" : "=&r"(_ret) : "r"(_addr)); \ - ret = (typeof(ret)) _ret; \ - } while (0) - -# define opal_atomic_sc_64(addr, value, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _newval = (int64_t) value; \ - int32_t _ret; \ - \ - __asm__ __volatile__(" stdcx. %2, 0, %1 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r"(_ret) \ - : "r"(_addr), "r"(OPAL_ASM_VALUE64(_newval)) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) +static inline bool opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, + int64_t *oldval, int64_t newval) +{ + bool rc; + + rc = opal_atomic_compare_exchange_strong_64(addr, oldval, newval); + opal_atomic_rmb(); + + return rc; +} + +static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_t *addr, + int64_t *oldval, int64_t newval) +{ + opal_atomic_wmb(); + return opal_atomic_compare_exchange_strong_64(addr, oldval, newval); +} + +#include "opal/sys/atomic_impl_ptr_cswap.h" + + +/********************************************************************** + * + * Swap + * + *********************************************************************/ + +static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) +{ + int32_t ret; + + __asm__ __volatile__("1: lwarx %0, 0, %2 \n\t" + " stwcx. %3, 0, %2 \n\t" + " bne- 1b \n\t" + : "=&r"(ret), "=m"(*addr) + : "r"(addr), "r"(newval) + : "cc", "memory"); + + return ret; +} static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) { @@ -284,48 +182,53 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new return ret; } -# endif /* OPAL_GCC_INLINE_ASSEMBLY */ +#include "opal/sys/atomic_impl_ptr_swap.h" -#if OPAL_GCC_INLINE_ASSEMBLY -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_64 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ -static inline bool opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, - int64_t *oldval, int64_t newval) -{ - bool rc; +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ - rc = opal_atomic_compare_exchange_strong_64(addr, oldval, newval); - opal_atomic_rmb(); +#include "opal/sys/atomic_impl_spinlock.h" - return rc; -} -static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_t *addr, - int64_t *oldval, int64_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_compare_exchange_strong_64(addr, oldval, newval); -} +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ -# define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ - static inline int32_t opal_atomic_fetch_##type##_32(opal_atomic_int32_t *v, int val) \ - { \ - int32_t t, old; \ - \ - __asm__ __volatile__("1: lwarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stwcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r"(t), "=&r"(old), "=m"(*v) \ - : "r"(val), "r" OPAL_ASM_ADDR(v), "m"(*v) \ - : "cc"); \ - \ - return old; \ - } +#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ + static inline int32_t opal_atomic_fetch_##type##_32(opal_atomic_int32_t *v, int val) \ + { \ + int32_t newval, old; \ + \ + __asm__ __volatile__("1: lwarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stwcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(newval), "=&r"(old), "=m"(*v) \ + : "r"(val), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return old; \ + } \ + static inline int32_t opal_atomic_##type##_fetch_32(opal_atomic_int32_t *v, int val) \ + { \ + int32_t newval, old; \ + \ + __asm__ __volatile__("1: lwarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stwcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(newval), "=&r"(old), "=m"(*v) \ + : "r"(val), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return newval; \ + } OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(add, add) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(and, and) @@ -333,6 +236,43 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(or, or) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(xor, xor) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(sub, subf) -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ +#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ + static inline int64_t opal_atomic_fetch_##type##_64(opal_atomic_int64_t *v, int64_t val) \ + { \ + int64_t newval, old; \ + \ + __asm__ __volatile__("1: ldarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stdcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(newval), "=&r"(old), "=m"(*v) \ + : "r"(OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return old; \ + } \ + static inline int64_t opal_atomic_##type##_fetch_64(opal_atomic_int64_t *v, int64_t val) \ + { \ + int64_t newval, old; \ + \ + __asm__ __volatile__("1: ldarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stdcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(newval), "=&r"(old), "=m"(*v) \ + : "r"(OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return newval; \ + } + +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(and, and) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) + +#include "opal/sys/atomic_impl_minmax_math.h" +#include "opal/sys/atomic_impl_size_t_math.h" #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/powerpc/atomic_helper.h b/opal/include/opal/sys/powerpc/atomic_helper.h new file mode 100644 index 00000000000..5cd43d20652 --- /dev/null +++ b/opal/include/opal/sys/powerpc/atomic_helper.h @@ -0,0 +1,44 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + > * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2021 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2021 Google, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_SYS_ARCH_ATOMIC_HELPER_H +#define OPAL_SYS_ARCH_ATOMIC_HELPER_H 1 + +#if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) +/* work-around bizzare xlc bug in which it sign-extends + a pointer to a 32-bit signed integer */ +# define OPAL_ASM_ADDR(a) ((uintptr_t) a) +#else +# define OPAL_ASM_ADDR(a) (a) +#endif + +#if defined(__PGI) +/* work-around for bug in PGI 16.5-16.7 where the compiler fails to + * correctly emit load instructions for 64-bit operands. without this + * it will emit lwz instead of ld to load the 64-bit operand. */ +# define OPAL_ASM_VALUE64(x) (void *) (intptr_t)(x) +#else +# define OPAL_ASM_VALUE64(x) x +#endif + +#endif /* OPAL_SYS_ARCH_ATOMIC_HELPER_H */ diff --git a/opal/include/opal/sys/powerpc/atomic_llsc.h b/opal/include/opal/sys/powerpc/atomic_llsc.h new file mode 100644 index 00000000000..9e6c7aa5863 --- /dev/null +++ b/opal/include/opal/sys/powerpc/atomic_llsc.h @@ -0,0 +1,94 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2021 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_SYS_ARCH_ATOMIC_LLSC_H +#define OPAL_SYS_ARCH_ATOMIC_LLSC_H 1 + +/* + * this file is included even when C11 or GCC built-in atomics are + * used, which is why we must check for gcc inline assembly support. + */ + +#if OPAL_C_GCC_INLINE_ASSEMBLY + +#include "opal/sys/powerpc/atomic_helper.h" + +#define OPAL_HAVE_ATOMIC_LLSC_32 1 +#define OPAL_HAVE_ATOMIC_LLSC_64 1 + +/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason + * is that even with an always_inline attribute the compiler may still emit instructions to store + * then load the arguments to/from the stack. This sequence may cause the ll reservation to be + * cancelled. */ +#define opal_atomic_ll_32(addr, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + __asm__ __volatile__("lwarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ + } while (0) + +#define opal_atomic_sc_32(addr, value, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + int32_t _ret, _foo, _newval = (int32_t) value; \ + \ + __asm__ __volatile__(" stwcx. %4, 0, %3 \n\t" \ + " li %0,0 \n\t" \ + " bne- 1f \n\t" \ + " ori %0,%0,1 \n\t" \ + "1:" \ + : "=r"(_ret), "=m"(*_addr), "=r"(_foo) \ + : "r"(_addr), "r"(_newval) \ + : "cc", "memory"); \ + ret = _ret; \ + } while (0) + +#define opal_atomic_ll_64(addr, ret) \ + do { \ + opal_atomic_int64_t *_addr = (addr); \ + __asm__ __volatile__("ldarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ + } while (0) + +#define opal_atomic_sc_64(addr, value, ret) \ + do { \ + opal_atomic_int64_t *_addr = (addr); \ + int64_t _newval = (int64_t) value; \ + int32_t _ret; \ + \ + __asm__ __volatile__(" stdcx. %2, 0, %1 \n\t" \ + " li %0,0 \n\t" \ + " bne- 1f \n\t" \ + " ori %0,%0,1 \n\t" \ + "1:" \ + : "=r"(_ret) \ + : "r"(_addr), "r"(OPAL_ASM_VALUE64(_newval)) \ + : "cc", "memory"); \ + ret = _ret; \ + } while (0) + +#include "opal/sys/atomic_impl_ptr_llsc.h" + +#endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ + +#endif /* OPAL_SYS_ARCH_ATOMIC_LLSC_H */ diff --git a/opal/include/opal/sys/powerpc/timer.h b/opal/include/opal/sys/powerpc/timer.h index 3dc165ce05d..216b7da01e7 100644 --- a/opal/include/opal/sys/powerpc/timer.h +++ b/opal/include/opal/sys/powerpc/timer.h @@ -9,6 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +23,7 @@ typedef uint64_t opal_timer_t; -#if OPAL_GCC_INLINE_ASSEMBLY +#if OPAL_C_GCC_INLINE_ASSEMBLY static inline opal_timer_t opal_sys_timer_get_cycles(void) { @@ -42,6 +44,6 @@ static inline opal_timer_t opal_sys_timer_get_cycles(void) # define OPAL_HAVE_SYS_TIMER_GET_CYCLES 0 -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ +#endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ #endif /* ! OPAL_SYS_ARCH_TIMER_H */ diff --git a/opal/include/opal/sys/timer.h b/opal/include/opal/sys/timer.h index da59c6235f4..3f63839a48c 100644 --- a/opal/include/opal/sys/timer.h +++ b/opal/include/opal/sys/timer.h @@ -15,6 +15,8 @@ * reserved. * Copyright (c) 2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,29 +33,12 @@ #ifndef OPAL_SYS_TIMER_H #define OPAL_SYS_TIMER_H 1 -#include "opal_config.h" - -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #ifdef HAVE_SYS_TYPES_H # include #endif -/* do some quick #define cleanup in cases where we are doing - testing... */ -#ifdef OPAL_DISABLE_INLINE_ASM -# undef OPAL_C_GCC_INLINE_ASSEMBLY -# define OPAL_C_GCC_INLINE_ASSEMBLY 0 -#endif - -/* define OPAL_{GCC,DEC,XLC}_INLINE_ASSEMBLY based on the - OPAL_{C,CXX}_{GCC,DEC,XLC}_INLINE_ASSEMBLY defines and whether we - are in C or C++ */ -#if defined(c_plusplus) || defined(__cplusplus) -# define OPAL_GCC_INLINE_ASSEMBLY OPAL_CXX_GCC_INLINE_ASSEMBLY -#else -# define OPAL_GCC_INLINE_ASSEMBLY OPAL_C_GCC_INLINE_ASSEMBLY -#endif /********************************************************************** * @@ -72,11 +57,11 @@ BEGIN_C_DECLS #if defined(DOXYGEN) /* don't include system-level gorp when generating doxygen files */ -#elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32 +#elif defined(PLATFORM_ARCH_X86_64) || defined(PLATFORM_ARCH_X86) # include "opal/sys/x86_64/timer.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 || OPAL_ASSEMBLY_ARCH == OPAL_ARM +#elif defined(PLATFORM_ARCH_ARM) || defined(PLATFORM_ARCH_AARCH64) # include "opal/sys/arm64/timer.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 || OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 +#elif defined(PLATFORM_ARCH_POWERPC) # include "opal/sys/powerpc/timer.h" #endif diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index 274f03582cb..ede9f721764 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -16,6 +16,8 @@ * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,53 +32,39 @@ */ #define SMPLOCK "lock; " -#define MB() __asm__ __volatile__("" : : : "memory") -/********************************************************************** - * - * Define constants for AMD64 / x86_64 / EM64T / ... - * - *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 - -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 - -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 /********************************************************************** * * Memory Barriers * *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY static inline void opal_atomic_mb(void) { - MB(); + __asm__ __volatile__("mfence": : :"memory"); } static inline void opal_atomic_rmb(void) { - MB(); + __asm__ __volatile__("" : : : "memory"); } static inline void opal_atomic_wmb(void) { - MB(); + __asm__ __volatile__("" : : : "memory"); } static inline void opal_atomic_isync(void) { } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ /********************************************************************** * - * Atomic math operations + * Compare and Swap * *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) @@ -91,13 +79,9 @@ static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *a return (bool) ret; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - #define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 #define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 -#if OPAL_GCC_INLINE_ASSEMBLY - static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { @@ -111,12 +95,12 @@ static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *a return (bool) ret; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - #define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64 #define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64 -#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T +#include "opal/sys/atomic_impl_ptr_cswap.h" + +#if OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_t *oldval, @@ -138,13 +122,14 @@ static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t # define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#if OPAL_GCC_INLINE_ASSEMBLY +#endif -# define OPAL_HAVE_ATOMIC_SWAP_32 1 -# define OPAL_HAVE_ATOMIC_SWAP_64 1 +/********************************************************************** + * + * Swap + * + *********************************************************************/ static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) { @@ -154,10 +139,6 @@ static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t new return oldval; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#if OPAL_GCC_INLINE_ASSEMBLY - static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) { int64_t oldval; @@ -166,19 +147,24 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new return oldval; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ +#include "opal/sys/atomic_impl_ptr_swap.h" + + +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY +#include "opal/sys/atomic_impl_spinlock.h" -# define OPAL_HAVE_ATOMIC_ADD_32 1 -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type int +/********************************************************************** * - * Atomically adds @i to @v. - */ + * Atomic math operations + * + *********************************************************************/ + static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *v, int i) { int ret = i; @@ -186,15 +172,11 @@ static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *v, int i) return ret; } -# define OPAL_HAVE_ATOMIC_ADD_64 1 +static inline int32_t opal_atomic_add_fetch_32(opal_atomic_int32_t *v, int i) +{ + return opal_atomic_fetch_add_32(v, i) + i; +} -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type int - * - * Atomically adds @i to @v. - */ static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *v, int64_t i) { int64_t ret = i; @@ -202,15 +184,11 @@ static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *v, int64_t i return ret; } -# define OPAL_HAVE_ATOMIC_SUB_32 1 +static inline int64_t opal_atomic_add_fetch_64(opal_atomic_int64_t *v, int64_t i) +{ + return opal_atomic_fetch_add_64(v, i) + i; +} -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type int - * - * Atomically subtracts @i from @v. - */ static inline int32_t opal_atomic_fetch_sub_32(opal_atomic_int32_t *v, int i) { int ret = -i; @@ -218,15 +196,11 @@ static inline int32_t opal_atomic_fetch_sub_32(opal_atomic_int32_t *v, int i) return ret; } -# define OPAL_HAVE_ATOMIC_SUB_64 1 +static inline int32_t opal_atomic_sub_fetch_32(opal_atomic_int32_t *v, int i) +{ + return opal_atomic_fetch_sub_32(v, i) - i; +} -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type int - * - * Atomically subtracts @i from @v. - */ static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *v, int64_t i) { int64_t ret = -i; @@ -234,6 +208,43 @@ static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *v, int64_t i return ret; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ +static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *v, int64_t i) +{ + return opal_atomic_sub_fetch_64(v, i) - i; +} + +#define OPAL_ATOMIC_DEFINE_OP(type, bits, operation, name) \ + static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, type value) \ + { \ + type oldval; \ + do { \ + oldval = *addr; \ + } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, \ + oldval operation value)); \ + \ + return oldval; \ + } \ + \ + static inline type opal_atomic_##name##_fetch_##bits(opal_atomic_##type *addr, type value) \ + { \ + type oldval, newval; \ + do { \ + oldval = *addr; \ + newval = oldval operation value; \ + } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, newval); \ + \ + return newval; \ + } + +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, &, and) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, |, or) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, ^, xor) + +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, &, and) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, |, or) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, ^, xor) + +#include "opal/sys/atomic_math_minmax_impl.h" +#include "opal/sys/atomic_math_size_t_impl.h" #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/x86_64/timer.h b/opal/include/opal/sys/x86_64/timer.h index 9c884a993e1..cbef5e82e95 100644 --- a/opal/include/opal/sys/x86_64/timer.h +++ b/opal/include/opal/sys/x86_64/timer.h @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. ALl rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,9 +30,9 @@ typedef uint64_t opal_timer_t; #undef OPAL_TIMER_MONOTONIC #define OPAL_TIMER_MONOTONIC 0 -#if OPAL_GCC_INLINE_ASSEMBLY +#if OPAL_C_GCC_INLINE_ASSEMBLY -# if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +# if defined(PLATFORM_ARCH_X86_64) /* TODO: add AMD mfence version and dispatch at init */ static inline opal_timer_t opal_sys_timer_get_cycles(void) @@ -85,6 +87,6 @@ static inline opal_timer_t opal_sys_timer_get_cycles(void) # define OPAL_HAVE_SYS_TIMER_GET_CYCLES 0 -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ +#endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ #endif /* ! OPAL_SYS_ARCH_TIMER_H */ diff --git a/opal/include/opal_stdatomic.h b/opal/include/opal_stdatomic.h index 4af17bc2b42..35437551208 100644 --- a/opal/include/opal_stdatomic.h +++ b/opal/include/opal_stdatomic.h @@ -2,6 +2,8 @@ /* * Copyright (c) 2018 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,7 +16,7 @@ # include "opal_stdint.h" -# if (OPAL_ASSEMBLY_BUILTIN != OPAL_BUILTIN_C11) || defined(__INTEL_COMPILER) +#if OPAL_USE_C11_ATOMICS == 0 typedef volatile int opal_atomic_int_t; typedef volatile long opal_atomic_long_t; @@ -29,15 +31,16 @@ typedef volatile ssize_t opal_atomic_ssize_t; typedef volatile intptr_t opal_atomic_intptr_t; typedef volatile uintptr_t opal_atomic_uintptr_t; -# else /* OPAL_HAVE_C__ATOMIC */ +typedef opal_atomic_int32_t opal_atomic_lock_t; -# include +enum { OPAL_ATOMIC_LOCK_UNLOCKED = 0, + OPAL_ATOMIC_LOCK_LOCKED = 1 }; -# ifdef __INTEL_COMPILER -# if __INTEL_COMPILER_BUILD_DATE <= 20200310 -#warning C11 _Atomic type not fully supported. The C11 atomic support should have been disabled. -# endif -# endif +# define OPAL_ATOMIC_LOCK_INIT OPAL_ATOMIC_LOCK_UNLOCKED + +#else /* OPAL_USE_C11_ATOMICS == 0 */ + +# include typedef atomic_int opal_atomic_int_t; typedef atomic_long opal_atomic_long_t; @@ -52,12 +55,18 @@ typedef _Atomic ssize_t opal_atomic_ssize_t; typedef _Atomic intptr_t opal_atomic_intptr_t; typedef _Atomic uintptr_t opal_atomic_uintptr_t; -# endif /* OPAL_HAVE_C__ATOMIC */ +typedef atomic_flag opal_atomic_lock_t; + +# define OPAL_ATOMIC_LOCK_UNLOCKED false +# define OPAL_ATOMIC_LOCK_LOCKED true + +# define OPAL_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT + +# endif /* OPAL_USE_C11_ATOMICS == 0 */ # if HAVE_OPAL_INT128_T -/* do not use C11 atomics for __int128 if they are not lock free */ -# if OPAL_HAVE_C11_CSWAP_INT128 && !defined(__INTEL_COMPILER) +# if OPAL_USE_C11_ATOMICS && OPAL_HAVE_C11_CSWAP_INT128 typedef _Atomic opal_int128_t opal_atomic_int128_t; diff --git a/opal/mca/allocator/bucket/Makefile.am b/opal/mca/allocator/bucket/Makefile.am index 4ddae2f2bf8..676ae96eef2 100644 --- a/opal/mca/allocator/bucket/Makefile.am +++ b/opal/mca/allocator/bucket/Makefile.am @@ -18,6 +18,8 @@ # $HEADER$ # +dist_opaldata_DATA = help-mca-allocator-bucket.txt + sources = \ allocator_bucket.c \ allocator_bucket_alloc.c \ diff --git a/opal/mca/allocator/bucket/allocator_bucket_alloc.c b/opal/mca/allocator/bucket/allocator_bucket_alloc.c index ab2da50a33e..170d941536f 100644 --- a/opal/mca/allocator/bucket/allocator_bucket_alloc.c +++ b/opal/mca/allocator/bucket/allocator_bucket_alloc.c @@ -20,6 +20,8 @@ #include "opal_config.h" #include "opal/mca/allocator/bucket/allocator_bucket_alloc.h" #include "opal/constants.h" +#include "opal/util/show_help.h" + /** * The define controls the size in bytes of the 1st bucket and hence every one * afterwards. @@ -31,6 +33,8 @@ */ #define MCA_ALLOCATOR_BUCKET_1_BITSHIFTS 3 +static int max_bucket_idx; + /* * Initializes the mca_allocator_bucket_options_t data structure for the passed * parameters. @@ -47,6 +51,9 @@ mca_allocator_bucket_init(mca_allocator_base_module_t *mem, int num_buckets, if (num_buckets <= 0) { num_buckets = 30; } + + max_bucket_idx = num_buckets - 1; + /* initialize the array of buckets */ size = sizeof(mca_allocator_bucket_bucket_t) * num_buckets; mem_options->buckets = (mca_allocator_bucket_bucket_t *) malloc(size); @@ -89,6 +96,13 @@ void *mca_allocator_bucket_alloc(mca_allocator_base_module_t *mem, size_t size) bucket_size <<= 1; } + if( bucket_num > max_bucket_idx ) { + size_t sz_bucket = MCA_ALLOCATOR_BUCKET_1_SIZE; + opal_show_help ("help-mca-allocator-bucket.txt", "buffer too large", 1, size, sz_bucket << max_bucket_idx, + "allocator_bucket_num_buckets", bucket_num + 1); + return (NULL); + } + /* now that we know what bucket it will come from, we must get the lock */ OPAL_THREAD_LOCK(&(mem_options->buckets[bucket_num].lock)); /* see if there is already a free chunk */ @@ -191,6 +205,14 @@ void *mca_allocator_bucket_alloc_align(mca_allocator_base_module_t *mem, size_t bucket_size >>= 1; bucket_num++; } + + if( bucket_num > max_bucket_idx ) { + size_t sz_bucket = MCA_ALLOCATOR_BUCKET_1_SIZE; + opal_show_help ("help-mca-allocator-bucket.txt", "aligned buffer too large", 1, allocated_size, sz_bucket << max_bucket_idx, + "allocator_bucket_num_buckets", bucket_num + 1); + return (NULL); + } + bucket_size = 1; bucket_size <<= MCA_ALLOCATOR_BUCKET_1_BITSHIFTS + bucket_num; diff --git a/opal/mca/allocator/bucket/help-mca-allocator-bucket.txt b/opal/mca/allocator/bucket/help-mca-allocator-bucket.txt new file mode 100644 index 00000000000..27edbb92fa4 --- /dev/null +++ b/opal/mca/allocator/bucket/help-mca-allocator-bucket.txt @@ -0,0 +1,19 @@ +# -*- text -*- +# +# Copyright (c) 2021 IBM Corporation. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI's allocator bucket support +# +[buffer too large] +ERROR: Requested buffer size %zu exceeds limit of %zu +Consider setting "%s" to %d +# +[aligned buffer too large] +ERROR: Requested aligned buffer size %zu exceeds limit of %zu +Consider setting "%s" to %d +# diff --git a/opal/mca/backtrace/execinfo/backtrace_execinfo.c b/opal/mca/backtrace/execinfo/backtrace_execinfo.c index 1e299676e30..f0cbf572819 100644 --- a/opal/mca/backtrace/execinfo/backtrace_execinfo.c +++ b/opal/mca/backtrace/execinfo/backtrace_execinfo.c @@ -30,6 +30,7 @@ #endif #include "opal/constants.h" +#include "opal/util/output.h" #include "opal/mca/backtrace/backtrace.h" int opal_backtrace_print(FILE *file, char *prefix, int strip) @@ -52,10 +53,10 @@ int opal_backtrace_print(FILE *file, char *prefix, int strip) for (i = strip; i < trace_size; i++) { if (NULL != prefix) { - write(fd, prefix, strlen(prefix)); + opal_best_effort_write(fd, prefix, strlen(prefix)); } len = snprintf(buf, sizeof(buf), "[%2d] ", i - strip); - write(fd, buf, len); + opal_best_effort_write(fd, buf, len); backtrace_symbols_fd(&trace[i], 1, fd); } diff --git a/opal/mca/base/mca_base_var.c b/opal/mca/base/mca_base_var.c index 08fa7ff4ad1..7b9e3bb9b99 100644 --- a/opal/mca/base/mca_base_var.c +++ b/opal/mca/base/mca_base_var.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2012 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, diff --git a/opal/mca/btl/base/btl_base_am_rdma.c b/opal/mca/btl/base/btl_base_am_rdma.c index 4feeff87c24..2b1e3400195 100644 --- a/opal/mca/btl/base/btl_base_am_rdma.c +++ b/opal/mca/btl/base/btl_base_am_rdma.c @@ -3,7 +3,9 @@ * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2020-2021 Google, LLC. All rights reserved. - * Copyright (c) 2021 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2021-2022 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,18 +13,27 @@ * $HEADER$ */ +#include "opal_config.h" + +#include + #include "opal/mca/btl/base/btl_base_am_rdma.h" #include "opal/mca/btl/base/base.h" #include "opal/mca/btl/base/btl_base_error.h" #include "opal/mca/threads/mutex.h" +#include "opal/util/minmax.h" + /** - * @brief data for active-message atomics + * @brief global data for active message wrapper * - * There is currently only one module but it is defined to allow - * moving the data pointer into the associated BTL module. + * While individual entries in queued_responses and + * queued_initiator_descriptors are module-specific (ie, per BTL + * module), they are progressed in a common progress function. It is + * much more efficient to have one list of work to do, rather than + * having to poll through all active btls to find the work to do. */ -struct mca_btl_base_am_rdma_module_t { +struct am_rdma_component_t { opal_object_t super; /** provides protection for multi-threaded access to module members */ opal_mutex_t mutex; @@ -31,29 +42,36 @@ struct mca_btl_base_am_rdma_module_t { /** queued initiator descriptors */ opal_list_t queued_initiator_descriptors; }; -typedef struct mca_btl_base_am_rdma_module_t mca_btl_base_am_rdma_module_t; +typedef struct am_rdma_component_t am_rdma_component_t; -static void mca_btl_base_am_rdma_module_init(mca_btl_base_am_rdma_module_t *module) +static am_rdma_component_t default_component; + +static void am_rdma_component_init(am_rdma_component_t *component) { - OBJ_CONSTRUCT(&module->mutex, opal_mutex_t); - OBJ_CONSTRUCT(&module->queued_responses, opal_list_t); - OBJ_CONSTRUCT(&module->queued_initiator_descriptors, opal_list_t); + OBJ_CONSTRUCT(&component->mutex, opal_mutex_t); + OBJ_CONSTRUCT(&component->queued_responses, opal_list_t); + OBJ_CONSTRUCT(&component->queued_initiator_descriptors, opal_list_t); } -static void mca_btl_base_am_rdma_module_fini(mca_btl_base_am_rdma_module_t *module) +static void am_rdma_component_fini(am_rdma_component_t *component) { - OBJ_DESTRUCT(&module->mutex); - OBJ_DESTRUCT(&module->queued_responses); - OBJ_DESTRUCT(&module->queued_initiator_descriptors); + OBJ_DESTRUCT(&component->mutex); + OBJ_DESTRUCT(&component->queued_responses); + OBJ_DESTRUCT(&component->queued_initiator_descriptors); } -static OBJ_CLASS_INSTANCE(mca_btl_base_am_rdma_module_t, opal_object_t, - mca_btl_base_am_rdma_module_init, mca_btl_base_am_rdma_module_fini); +static OBJ_CLASS_INSTANCE(am_rdma_component_t, opal_object_t, + am_rdma_component_init, am_rdma_component_fini); + + +OBJ_CLASS_INSTANCE(mca_btl_base_am_rdma_module_t, opal_object_t, + NULL, NULL); + /** * @brief response header for an active-message RDMA/atomic operation */ -struct mca_btl_base_rdma_response_hdr_t { +struct am_rdma_response_hdr_t { /** context for the response */ uint64_t context; /** initiator address */ @@ -63,12 +81,13 @@ struct mca_btl_base_rdma_response_hdr_t { /** response data may follow. the size is implied by the size of the incoming * descriptor */ }; -typedef struct mca_btl_base_rdma_response_hdr_t mca_btl_base_rdma_response_hdr_t; +typedef struct am_rdma_response_hdr_t am_rdma_response_hdr_t; + /** * @brief type of active-message RDMA/atomic operation */ -enum mca_btl_base_rdma_type_t { +enum am_rdma_type_t { /** active-message put. May be implemented with send/recv or RDMA get * depending on the functions that the BTL implements. */ MCA_BTL_BASE_AM_PUT, @@ -80,12 +99,13 @@ enum mca_btl_base_rdma_type_t { /** compare-and-swap */ MCA_BTL_BASE_AM_CAS, }; -typedef enum mca_btl_base_rdma_type_t mca_btl_base_rdma_type_t; +typedef enum am_rdma_type_t am_rdma_type_t; + /** * @brief origin-side operation context for an active-message RDMA/atomic operation */ -struct mca_btl_base_rdma_context_t { +struct am_rdma_context_t { opal_object_t super; /** operation type */ uint8_t type; @@ -110,35 +130,37 @@ struct mca_btl_base_rdma_context_t { /** local handle for this request */ struct mca_btl_base_registration_handle_t *local_handle; }; -typedef struct mca_btl_base_rdma_context_t mca_btl_base_rdma_context_t; +typedef struct am_rdma_context_t am_rdma_context_t; -static void mca_btl_base_rdma_context_init(mca_btl_base_rdma_context_t *context) +static void am_rdma_context_init(am_rdma_context_t *context) { context->sent = 0; context->acknowledged = 0; context->descriptor = NULL; } -static OBJ_CLASS_INSTANCE(mca_btl_base_rdma_context_t, opal_object_t, - mca_btl_base_rdma_context_init, NULL); +static OBJ_CLASS_INSTANCE(am_rdma_context_t, opal_object_t, + am_rdma_context_init, NULL); + /** * @brief queued initiator descriptor */ -struct mca_btl_base_am_rdma_queued_descriptor_t { +struct am_rdma_queued_descriptor_t { opal_list_item_t super; - mca_btl_base_module_t *btl; + mca_btl_base_am_rdma_module_t *am_module; struct mca_btl_base_endpoint_t *endpoint; mca_btl_base_descriptor_t *descriptor; }; -typedef struct mca_btl_base_am_rdma_queued_descriptor_t mca_btl_base_am_rdma_queued_descriptor_t; +typedef struct am_rdma_queued_descriptor_t am_rdma_queued_descriptor_t; + +static OBJ_CLASS_INSTANCE(am_rdma_queued_descriptor_t, opal_list_item_t, NULL, NULL); -static OBJ_CLASS_INSTANCE(mca_btl_base_am_rdma_queued_descriptor_t, opal_list_item_t, NULL, NULL); /** * @brief header for an active-message atomic/RDMA operation */ -struct mca_btl_base_rdma_hdr_t { +struct am_rdma_hdr_t { /** type of operation requested. */ uint8_t type; uint8_t padding[3]; @@ -176,16 +198,13 @@ struct mca_btl_base_rdma_hdr_t { /* the following fields are not used on the target and are only relevant * to the initiator */ uint64_t context; - - /* registration handles (if required) */ - uint8_t handle_data[]; }; -typedef struct mca_btl_base_rdma_hdr_t mca_btl_base_rdma_hdr_t; +typedef struct am_rdma_hdr_t am_rdma_hdr_t; /** * @brief target-side RDMA/atomic operation */ -struct mca_btl_base_rdma_operation_t { +struct am_rdma_operation_t { /** these may be stored in lists */ opal_list_item_t super; /** btl module associated with this operation */ @@ -197,7 +216,7 @@ struct mca_btl_base_rdma_operation_t { * needs to be retried. */ mca_btl_base_descriptor_t *descriptor; /** incoming operation header */ - mca_btl_base_rdma_hdr_t hdr; + am_rdma_hdr_t hdr; /** local memory handle (if using RDMA) */ uint8_t local_handle_data[MCA_BTL_REG_HANDLE_MAX_SIZE]; /** remote memory handle (if using RMDA) */ @@ -209,50 +228,32 @@ struct mca_btl_base_rdma_operation_t { /** rdma operation was completed (waiting response) */ bool is_completed; }; -typedef struct mca_btl_base_rdma_operation_t mca_btl_base_rdma_operation_t; +typedef struct am_rdma_operation_t am_rdma_operation_t; -static OBJ_CLASS_INSTANCE(mca_btl_base_rdma_operation_t, opal_list_item_t, NULL, NULL); +static OBJ_CLASS_INSTANCE(am_rdma_operation_t, opal_list_item_t, NULL, NULL); -static inline size_t size_t_min(size_t a, size_t b) -{ - return (a < b) ? a : b; -} -static inline size_t size_t_max(size_t a, size_t b) +static inline bool am_rdma_is_atomic(am_rdma_type_t type) { - return (a > b) ? a : b; -} - -static mca_btl_base_am_rdma_module_t default_module; - -static inline bool mca_btl_base_rdma_use_rdma_get(mca_btl_base_module_t *btl) -{ - return !!(btl->btl_flags & MCA_BTL_FLAGS_GET); + return (MCA_BTL_BASE_AM_PUT != type && MCA_BTL_BASE_AM_GET != type); } -static inline bool mca_btl_base_rdma_use_rdma_put(mca_btl_base_module_t *btl) -{ - return !!(btl->btl_flags & MCA_BTL_FLAGS_PUT); -} -static inline bool mca_btl_base_rdma_is_atomic(mca_btl_base_rdma_type_t type) +static inline size_t am_rdma_operation_size(mca_btl_base_am_rdma_module_t *am_module, + am_rdma_type_t type, + size_t remaining) { - return (MCA_BTL_BASE_AM_PUT != type && MCA_BTL_BASE_AM_GET != type); -} + mca_btl_base_module_t *btl = am_module->btl; -static inline size_t mca_btl_base_rdma_operation_size(mca_btl_base_module_t *btl, - mca_btl_base_rdma_type_t type, - size_t remaining) -{ switch (type) { case MCA_BTL_BASE_AM_PUT: - if (mca_btl_base_rdma_use_rdma_get(btl)) { - return size_t_min(remaining, btl->btl_get_limit); + if (am_module->use_rdma_get) { + return opal_min(remaining, btl->btl_get_limit); } break; case MCA_BTL_BASE_AM_GET: - if (mca_btl_base_rdma_use_rdma_put(btl)) { - return size_t_min(remaining, btl->btl_put_limit); + if (am_module->use_rdma_put) { + return opal_min(remaining, btl->btl_put_limit); } break; case MCA_BTL_BASE_AM_ATOMIC: @@ -261,10 +262,11 @@ static inline size_t mca_btl_base_rdma_operation_size(mca_btl_base_module_t *btl return remaining; } - return size_t_min(remaining, btl->btl_max_send_size - sizeof(mca_btl_base_rdma_hdr_t)); + return opal_min(remaining, btl->btl_max_send_size - sizeof(am_rdma_hdr_t)); } -static inline int mca_btl_base_rdma_tag(mca_btl_base_rdma_type_t type) + +static inline int am_rdma_tag(am_rdma_type_t type) { (void) type; switch (type) { @@ -278,11 +280,13 @@ static inline int mca_btl_base_rdma_tag(mca_btl_base_rdma_type_t type) return MCA_BTL_BASE_TAG_RDMA_RESP; } -static inline int mca_btl_base_rdma_resp_tag(void) + +static inline int am_rdma_resp_tag(void) { return MCA_BTL_BASE_TAG_RDMA_RESP; } + /** * @brief copy data from a segment to a local address * @@ -291,9 +295,9 @@ static inline int mca_btl_base_rdma_resp_tag(void) * @in segments segments to copy data from * @in segment_count number of segments */ -static void mca_btl_base_copy_from_segments(uint64_t addr, size_t skip_bytes, - const mca_btl_base_segment_t *segments, - size_t segment_count) +static void am_rdma_copy_from_segments(uint64_t addr, size_t skip_bytes, + const mca_btl_base_segment_t *segments, + size_t segment_count) { const void *seg0_data = (const void *) ((uintptr_t) segments[0].seg_addr.pval + skip_bytes); size_t seg0_len = segments[0].seg_len - skip_bytes; @@ -314,6 +318,7 @@ static void mca_btl_base_copy_from_segments(uint64_t addr, size_t skip_bytes, } } + /** * @brief copy data from a local address into a segment * @@ -322,11 +327,11 @@ static void mca_btl_base_copy_from_segments(uint64_t addr, size_t skip_bytes, * @in segments segments to copy data to * @in segment_count number of segments */ -static void mca_btl_base_copy_to_segments(uint64_t addr, size_t max_len, size_t skip_bytes, - mca_btl_base_segment_t *segments, size_t segment_count) +static void am_rdma_copy_to_segments(uint64_t addr, size_t max_len, size_t skip_bytes, + mca_btl_base_segment_t *segments, size_t segment_count) { void *seg0_data = (void *) ((uintptr_t) segments[0].seg_addr.pval + skip_bytes); - size_t seg0_len = size_t_min(max_len, segments[0].seg_len - skip_bytes); + size_t seg0_len = opal_min(max_len, segments[0].seg_len - skip_bytes); if (seg0_len > 0) { BTL_VERBOSE( @@ -338,7 +343,7 @@ static void mca_btl_base_copy_to_segments(uint64_t addr, size_t max_len, size_t } for (size_t i = 1; i < segment_count && max_len; ++i) { - size_t seg_len = size_t_min(segments[i].seg_len, max_len); + size_t seg_len = opal_min(segments[i].seg_len, max_len); BTL_VERBOSE(("packing %" PRIsize_t " bytes from 0x%" PRIx64 " to segment %" PRIsize_t, seg_len, addr, i)); @@ -351,27 +356,30 @@ static void mca_btl_base_copy_to_segments(uint64_t addr, size_t max_len, size_t } } -static void mca_btl_base_am_queue_initiator_descriptor(mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *descriptor) + +static void am_rdma_queue_initiator_descriptor(mca_btl_base_am_rdma_module_t *am_module, + struct mca_btl_base_endpoint_t *endpoint, + mca_btl_base_descriptor_t *descriptor) { - mca_btl_base_am_rdma_queued_descriptor_t *queued_descriptor = OBJ_NEW( - mca_btl_base_am_rdma_queued_descriptor_t); + am_rdma_queued_descriptor_t *queued_descriptor = OBJ_NEW(am_rdma_queued_descriptor_t); - queued_descriptor->btl = btl; + queued_descriptor->am_module = am_module; queued_descriptor->endpoint = endpoint; queued_descriptor->descriptor = descriptor; - OPAL_THREAD_SCOPED_LOCK(&default_module.mutex, - opal_list_append(&default_module.queued_initiator_descriptors, + OPAL_THREAD_SCOPED_LOCK(&default_component.mutex, + opal_list_append(&default_component.queued_initiator_descriptors, &queued_descriptor->super)); } -static inline int mca_btl_base_am_rdma_advance(mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - mca_btl_base_rdma_context_t *context, - bool send_descriptor) + +static inline int am_rdma_advance(mca_btl_base_am_rdma_module_t *am_module, + struct mca_btl_base_endpoint_t *endpoint, + am_rdma_context_t *context, + bool send_descriptor) { + mca_btl_base_module_t *btl = am_module->btl; + int ret; const size_t remaining = context->total_size - context->sent; if (0 == remaining) { @@ -387,14 +395,16 @@ static inline int mca_btl_base_am_rdma_advance(mca_btl_base_module_t *btl, } mca_btl_base_descriptor_t *descriptor = context->descriptor; - mca_btl_base_rdma_hdr_t *hdr = (mca_btl_base_rdma_hdr_t *) descriptor->des_segments[0] + am_rdma_hdr_t *hdr = (am_rdma_hdr_t *) descriptor->des_segments[0] .seg_addr.pval; - const size_t packet_size = mca_btl_base_rdma_operation_size(btl, hdr->type, remaining); + const size_t packet_size = am_rdma_operation_size(am_module, hdr->type, remaining); - if (!mca_btl_base_rdma_is_atomic(hdr->type)) { + if (!am_rdma_is_atomic(hdr->type)) { hdr->data.rdma.size = packet_size; hdr->data.rdma.initiator_address = (uint64_t) context->local_address + context->sent; } else { + /* atomics today are single datatype entries */ + assert(packet_size < UINT8_MAX); hdr->data.atomic.size = packet_size; } @@ -404,41 +414,50 @@ static inline int mca_btl_base_am_rdma_advance(mca_btl_base_module_t *btl, if (MCA_BTL_BASE_AM_PUT == hdr->type && !hdr->data.rdma.use_rdma) { /* copy the next block into the fragment buffer */ - mca_btl_base_copy_to_segments(hdr->data.rdma.initiator_address, packet_size, sizeof(*hdr), - descriptor->des_segments, descriptor->des_segment_count); + am_rdma_copy_to_segments(hdr->data.rdma.initiator_address, packet_size, sizeof(*hdr), + descriptor->des_segments, descriptor->des_segment_count); } if (send_descriptor) { - return btl->btl_send(btl, endpoint, descriptor, mca_btl_base_rdma_tag(hdr->type)); + assert(0 != (descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)); + ret = btl->btl_send(btl, endpoint, descriptor, am_rdma_tag(hdr->type)); + if (ret == 1) { + ret = OPAL_SUCCESS; + } + return ret; } /* queue for later to avoid btl_send in callback */ - mca_btl_base_am_queue_initiator_descriptor(btl, endpoint, descriptor); + am_rdma_queue_initiator_descriptor(am_module, endpoint, descriptor); return OPAL_SUCCESS; } -static void mca_btl_base_am_descriptor_complete(mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *descriptor, int status) + +static void am_rdma_descriptor_complete(mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, + mca_btl_base_descriptor_t *descriptor, int status) { - (void) mca_btl_base_am_rdma_advance(btl, endpoint, - (mca_btl_base_rdma_context_t *) descriptor->des_context, - /*send_descriptor=*/false); + mca_btl_base_am_rdma_module_t *am_module = (mca_btl_base_am_rdma_module_t *)descriptor->des_cbdata; + + (void) am_rdma_advance(am_module, endpoint, + (am_rdma_context_t *) descriptor->des_context, + /*send_descriptor=*/false); } -static inline int -mca_btl_base_rdma_start(mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - int type, uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, - int order, int flags, size_t size, void *local_address, - mca_btl_base_registration_handle_t *local_handle, int64_t remote_address, - mca_btl_base_registration_handle_t *remote_handle, - mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) + +static inline int am_rdma_start(mca_btl_base_am_rdma_module_t *am_module, struct mca_btl_base_endpoint_t *endpoint, + int type, uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, + int order, int flags, size_t size, void *local_address, + mca_btl_base_registration_handle_t *local_handle, int64_t remote_address, + mca_btl_base_registration_handle_t *remote_handle, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_base_rdma_hdr_t *hdr; + mca_btl_base_module_t *btl = am_module->btl; + am_rdma_hdr_t *hdr; size_t packet_size = sizeof(*hdr); mca_btl_base_descriptor_t *descriptor; - mca_btl_base_rdma_context_t *context = OBJ_NEW(mca_btl_base_rdma_context_t); + am_rdma_context_t *context = OBJ_NEW(am_rdma_context_t); if (OPAL_UNLIKELY(NULL == context)) { return OPAL_ERR_OUT_OF_RESOURCE; @@ -459,14 +478,14 @@ mca_btl_base_rdma_start(mca_btl_base_module_t *btl, struct mca_btl_base_endpoint if (sizeof(*hdr) + size <= btl->btl_eager_limit) { /* just go ahead and send the data */ packet_size += size; - } else if (!mca_btl_base_rdma_use_rdma_get (btl)) { - packet_size += size_t_min (size, btl->btl_max_send_size - sizeof (*hdr)); + } else if (!am_module->use_rdma_get) { + packet_size += opal_min (size, btl->btl_max_send_size - sizeof (*hdr)); } else { use_rdma = true; } } else if (MCA_BTL_BASE_AM_GET == type) { - if (!mca_btl_base_rdma_use_rdma_put(btl)) { - packet_size += size_t_min(size, btl->btl_max_send_size - sizeof(*hdr)); + if (!am_module->use_rdma_put) { + packet_size += opal_min(size, btl->btl_max_send_size - sizeof(*hdr)); } else { use_rdma = true; } @@ -496,14 +515,14 @@ mca_btl_base_rdma_start(mca_btl_base_module_t *btl, struct mca_btl_base_endpoint * be released on response before the descriptor callback has completed. */ OBJ_RETAIN(context); - descriptor->des_cbfunc = mca_btl_base_am_descriptor_complete; - descriptor->des_cbdata = local_handle; + descriptor->des_cbfunc = am_rdma_descriptor_complete; + descriptor->des_cbdata = am_module; descriptor->des_context = context; - hdr = (mca_btl_base_rdma_hdr_t *) descriptor->des_segments[0].seg_addr.pval; + hdr = (am_rdma_hdr_t *) descriptor->des_segments[0].seg_addr.pval; hdr->type = type; - if (!mca_btl_base_rdma_is_atomic(type)) { + if (!am_rdma_is_atomic(type)) { hdr->data.rdma.use_rdma = use_rdma; } else { hdr->data.atomic.op = op; @@ -520,14 +539,16 @@ mca_btl_base_rdma_start(mca_btl_base_module_t *btl, struct mca_btl_base_endpoint memcpy(handle_buffer, remote_handle, btl->btl_registration_handle_size); } - return mca_btl_base_am_rdma_advance(btl, endpoint, context, /*send_descriptor=*/true); + return am_rdma_advance(am_module, endpoint, context, /*send_descriptor=*/true); } -static mca_btl_base_rdma_operation_t *mca_btl_base_rdma_alloc_operation( - mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *descriptor, const mca_btl_base_rdma_hdr_t *hdr) + +static am_rdma_operation_t *am_rdma_alloc_operation(mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, + mca_btl_base_descriptor_t *descriptor, + const am_rdma_hdr_t *hdr) { - mca_btl_base_rdma_operation_t *operation = OBJ_NEW(mca_btl_base_rdma_operation_t); + am_rdma_operation_t *operation = OBJ_NEW(am_rdma_operation_t); if (NULL == operation) { return NULL; } @@ -539,7 +560,7 @@ static mca_btl_base_rdma_operation_t *mca_btl_base_rdma_alloc_operation( operation->is_queued = false; memcpy(&operation->hdr, hdr, sizeof(*hdr)); - if (!mca_btl_base_rdma_is_atomic(hdr->type) && hdr->data.rdma.use_rdma + if (!am_rdma_is_atomic(hdr->type) && hdr->data.rdma.use_rdma && btl->btl_register_mem) { const uint8_t *handle_data = (const uint8_t *) (hdr + 1); /* the initiator packs these in order of their local and then remote. */ @@ -551,15 +572,16 @@ static mca_btl_base_rdma_operation_t *mca_btl_base_rdma_alloc_operation( return operation; } -static void mca_btl_base_rdma_queue_operation(mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *descriptor, - uint64_t atomic_response, - const mca_btl_base_rdma_hdr_t *hdr, - mca_btl_base_rdma_operation_t *operation) + +static void am_rdma_queue_operation(mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, + mca_btl_base_descriptor_t *descriptor, + uint64_t atomic_response, + const am_rdma_hdr_t *hdr, + am_rdma_operation_t *operation) { if (NULL == operation) { - operation = mca_btl_base_rdma_alloc_operation(btl, endpoint, descriptor, hdr); + operation = am_rdma_alloc_operation(btl, endpoint, descriptor, hdr); if (NULL == operation) { /* couldn't even allocate a small amount of memory. not much else can be done. */ BTL_ERROR(("could not allocate memory to queue active-message RDMA operation")); @@ -569,21 +591,22 @@ static void mca_btl_base_rdma_queue_operation(mca_btl_base_module_t *btl, operation->is_queued = true; operation->atomic_response = atomic_response; - OPAL_THREAD_SCOPED_LOCK(&default_module.mutex, - opal_list_append(&default_module.queued_responses, &operation->super)); + OPAL_THREAD_SCOPED_LOCK(&default_component.mutex, + opal_list_append(&default_component.queued_responses, &operation->super)); } -static int mca_btl_base_am_rdma_respond(mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t **descriptor, void *addr, - const mca_btl_base_rdma_hdr_t *hdr) + +static int am_rdma_respond(mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, + mca_btl_base_descriptor_t **descriptor, void *addr, + const am_rdma_hdr_t *hdr) { mca_btl_base_descriptor_t *send_descriptor = *descriptor; *descriptor = NULL; if (NULL == send_descriptor) { - mca_btl_base_rdma_response_hdr_t *resp_hdr; - size_t data_size = mca_btl_base_rdma_is_atomic(hdr->type) ? hdr->data.atomic.size + am_rdma_response_hdr_t *resp_hdr; + size_t data_size = am_rdma_is_atomic(hdr->type) ? hdr->data.atomic.size : hdr->data.rdma.size; size_t packet_size = sizeof(*resp_hdr) + (addr ? data_size : 0); send_descriptor = btl->btl_alloc(btl, endpoint, MCA_BTL_NO_ORDER, packet_size, @@ -592,7 +615,7 @@ static int mca_btl_base_am_rdma_respond(mca_btl_base_module_t *btl, return OPAL_ERR_OUT_OF_RESOURCE; } - resp_hdr = (mca_btl_base_rdma_response_hdr_t *) send_descriptor->des_segments[0] + resp_hdr = (am_rdma_response_hdr_t *) send_descriptor->des_segments[0] .seg_addr.pval; resp_hdr->context = hdr->context; if (MCA_BTL_BASE_AM_GET == hdr->type) { @@ -604,9 +627,9 @@ static int mca_btl_base_am_rdma_respond(mca_btl_base_module_t *btl, resp_hdr->response_size = data_size; if (NULL != addr) { - mca_btl_base_copy_to_segments((uint64_t)(uintptr_t) addr, packet_size, - sizeof(*resp_hdr), send_descriptor->des_segments, - send_descriptor->des_segment_count); + am_rdma_copy_to_segments((uint64_t)(uintptr_t) addr, packet_size, + sizeof(*resp_hdr), send_descriptor->des_segments, + send_descriptor->des_segment_count); } } @@ -614,82 +637,95 @@ static int mca_btl_base_am_rdma_respond(mca_btl_base_module_t *btl, send_descriptor->des_cbfunc = NULL; - int ret = btl->btl_send(btl, endpoint, send_descriptor, mca_btl_base_rdma_resp_tag()); + /* There is no callback for the response descriptor, therefore it is + * safe to treat 0 and 1 return codes the same + */ + int ret = btl->btl_send(btl, endpoint, send_descriptor, am_rdma_resp_tag()); + if (ret == 1) { + ret = OPAL_SUCCESS; + } + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { *descriptor = send_descriptor; } return ret; } + static void -mca_btl_base_am_rmda_rdma_complete(mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, void *local_address, - struct mca_btl_base_registration_handle_t *local_handle, - void *context, void *cbdata, int status) +am_rdma_rdma_complete(mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, void *local_address, + struct mca_btl_base_registration_handle_t *local_handle, + void *context, void *cbdata, int status) { - mca_btl_base_rdma_operation_t *operation = (mca_btl_base_rdma_operation_t *) context; + am_rdma_operation_t *operation = (am_rdma_operation_t *) context; BTL_VERBOSE(("BTL RDMA operation complete. status=%d", status)); assert(OPAL_SUCCESS == status); operation->is_completed = true; - int ret = mca_btl_base_am_rdma_respond(operation->btl, operation->endpoint, - &operation->descriptor, NULL, &operation->hdr); + int ret = am_rdma_respond(operation->btl, operation->endpoint, + &operation->descriptor, NULL, &operation->hdr); if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { BTL_VERBOSE( ("could not send a response. queueing the response for later. endpoint=%p, ret=%d", (void*) endpoint, ret)); - mca_btl_base_rdma_queue_operation(btl, NULL, NULL, 0, NULL, operation); + am_rdma_queue_operation(btl, NULL, NULL, 0, NULL, operation); } OBJ_RELEASE(operation); } -static int mca_btl_base_am_rdma_target_get(mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t **descriptor, - void *target_address, const mca_btl_base_rdma_hdr_t *hdr, - mca_btl_base_rdma_operation_t **operation) + +static int am_rdma_target_put(mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, + mca_btl_base_descriptor_t **descriptor, + const mca_btl_base_segment_t *segments, + size_t segment_count, void *target_address, + const am_rdma_hdr_t *hdr, + am_rdma_operation_t **operation) { if (hdr->data.rdma.use_rdma) { if (NULL == *operation) { - *operation = mca_btl_base_rdma_alloc_operation(btl, endpoint, *descriptor, hdr); + *operation = am_rdma_alloc_operation(btl, endpoint, *descriptor, hdr); if (NULL == *operation) { return OPAL_ERR_OUT_OF_RESOURCE; } } - /* btl supports put but not get. emulating get with put */ + /* btl supports get but not put. emulating put with get */ OBJ_RETAIN(*operation); - int ret = btl->btl_put( + int ret = btl->btl_get( btl, endpoint, target_address, hdr->data.rdma.initiator_address, (struct mca_btl_base_registration_handle_t *) (*operation)->local_handle_data, (struct mca_btl_base_registration_handle_t *) (*operation)->remote_handle_data, - hdr->data.rdma.size, /*flags=*/0, MCA_BTL_NO_ORDER, mca_btl_base_am_rmda_rdma_complete, - *operation, NULL); + hdr->data.rdma.size, /*flags=*/0, MCA_BTL_NO_ORDER, am_rdma_rdma_complete, + operation, NULL); if (OPAL_SUCCESS != ret) { OBJ_RELEASE(*operation); } + if (OPAL_ERR_NOT_AVAILABLE != ret) { return ret; } + } else if (NULL != segments) { + am_rdma_copy_from_segments(hdr->target_address, sizeof(*hdr), segments, segment_count); } - return mca_btl_base_am_rdma_respond(btl, endpoint, descriptor, target_address, hdr); + return am_rdma_respond(btl, endpoint, descriptor, NULL, hdr); } -static int mca_btl_base_am_rdma_target_put(mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t **descriptor, - const mca_btl_base_segment_t *segments, - size_t segment_count, void *target_address, - const mca_btl_base_rdma_hdr_t *hdr, - mca_btl_base_rdma_operation_t **operation) + +static int am_rdma_target_get(mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, + mca_btl_base_descriptor_t **descriptor, + void *target_address, const am_rdma_hdr_t *hdr, + am_rdma_operation_t **operation) { if (hdr->data.rdma.use_rdma) { if (NULL == *operation) { - *operation = mca_btl_base_rdma_alloc_operation(btl, endpoint, *descriptor, hdr); + *operation = am_rdma_alloc_operation(btl, endpoint, *descriptor, hdr); if (NULL == *operation) { return OPAL_ERR_OUT_OF_RESOURCE; } @@ -697,27 +733,25 @@ static int mca_btl_base_am_rdma_target_put(mca_btl_base_module_t *btl, /* btl supports put but not get. emulating get with put */ OBJ_RETAIN(*operation); - int ret = btl->btl_get( + int ret = btl->btl_put( btl, endpoint, target_address, hdr->data.rdma.initiator_address, (struct mca_btl_base_registration_handle_t *) (*operation)->local_handle_data, (struct mca_btl_base_registration_handle_t *) (*operation)->remote_handle_data, - hdr->data.rdma.size, /*flags=*/0, MCA_BTL_NO_ORDER, mca_btl_base_am_rmda_rdma_complete, - operation, NULL); + hdr->data.rdma.size, /*flags=*/0, MCA_BTL_NO_ORDER, am_rdma_rdma_complete, + *operation, NULL); if (OPAL_SUCCESS != ret) { OBJ_RELEASE(*operation); } - if (OPAL_ERR_NOT_AVAILABLE != ret) { return ret; } - } else if (NULL != segments) { - mca_btl_base_copy_from_segments(hdr->target_address, sizeof(*hdr), segments, segment_count); } - return mca_btl_base_am_rdma_respond(btl, endpoint, descriptor, NULL, hdr); + return am_rdma_respond(btl, endpoint, descriptor, target_address, hdr); } -static void mca_btl_base_rdma_retry_operation(mca_btl_base_rdma_operation_t *operation) + +static void am_rdma_retry_operation(am_rdma_operation_t *operation) { void *target_address = (void *) (intptr_t) operation->hdr.target_address; int ret = OPAL_SUCCESS; @@ -725,75 +759,88 @@ static void mca_btl_base_rdma_retry_operation(mca_btl_base_rdma_operation_t *ope if (!operation->descriptor && !operation->is_completed) { switch (operation->hdr.type) { case MCA_BTL_BASE_AM_GET: - ret = mca_btl_base_am_rdma_target_get(operation->btl, operation->endpoint, - &operation->descriptor, target_address, - &operation->hdr, &operation); + ret = am_rdma_target_get(operation->btl, operation->endpoint, + &operation->descriptor, target_address, + &operation->hdr, &operation); break; case MCA_BTL_BASE_AM_PUT: - ret = mca_btl_base_am_rdma_target_put(operation->btl, operation->endpoint, - &operation->descriptor, - /*segments=*/NULL, - /*segment_count=*/0, target_address, - &operation->hdr, &operation); + ret = am_rdma_target_put(operation->btl, operation->endpoint, + &operation->descriptor, + /*segments=*/NULL, + /*segment_count=*/0, target_address, + &operation->hdr, &operation); break; case MCA_BTL_BASE_AM_ATOMIC: /* atomic operation was completed */ - ret = mca_btl_base_am_rdma_respond(operation->btl, operation->endpoint, - &operation->descriptor, &operation->atomic_response, - &operation->hdr); + ret = am_rdma_respond(operation->btl, operation->endpoint, + &operation->descriptor, &operation->atomic_response, + &operation->hdr); break; } } else { - ret = mca_btl_base_am_rdma_respond(operation->btl, operation->endpoint, - &operation->descriptor, - /*addr=*/NULL, /*hdr=*/NULL); + ret = am_rdma_respond(operation->btl, operation->endpoint, + &operation->descriptor, + /*addr=*/NULL, /*hdr=*/NULL); } if (OPAL_SUCCESS == ret) { if (operation->is_queued) { - opal_list_remove_item(&default_module.queued_responses, &operation->super); + opal_list_remove_item(&default_component.queued_responses, &operation->super); } OBJ_RELEASE(operation); } } -static int mca_btl_base_am_rdma_progress(void) + +static int am_rdma_progress(void) { - if (0 == opal_list_get_size(&default_module.queued_responses) - && 0 == opal_list_get_size(&default_module.queued_initiator_descriptors)) { + if (0 == opal_list_get_size(&default_component.queued_responses) + && 0 == opal_list_get_size(&default_component.queued_initiator_descriptors)) { return 0; } - OPAL_THREAD_SCOPED_LOCK(&default_module.mutex, ({ - mca_btl_base_rdma_operation_t *operation, *next; - OPAL_LIST_FOREACH_SAFE (operation, next, &default_module.queued_responses, - mca_btl_base_rdma_operation_t) { - mca_btl_base_rdma_retry_operation(operation); - } - })); - - OPAL_THREAD_SCOPED_LOCK(&default_module.mutex, ({ - mca_btl_base_am_rdma_queued_descriptor_t *descriptor, *next; - OPAL_LIST_FOREACH_SAFE (descriptor, next, &default_module.queued_initiator_descriptors, - mca_btl_base_am_rdma_queued_descriptor_t) { - mca_btl_base_rdma_context_t *context = (mca_btl_base_rdma_context_t *) - descriptor->descriptor->des_context; - int ret = descriptor->btl->btl_send(descriptor->btl, descriptor->endpoint, - descriptor->descriptor, - mca_btl_base_rdma_tag(context->type)); - if (OPAL_SUCCESS == ret) { - opal_list_remove_item(&default_module.queued_initiator_descriptors, - &descriptor->super); - } - } - })); + // It's a little cleaner, stylistically, to make the multi-line + // ACTION argument to OPAL_THREAD_SCOPED_LOCK be a macro itself + // (vs. using continuation characters in the use of + // OPAL_THREAD_SCOPED_LOCK). +#define ACTION1 \ + am_rdma_operation_t *operation, *next; \ + OPAL_LIST_FOREACH_SAFE(operation, next, \ + &default_component.queued_responses, \ + am_rdma_operation_t) { \ + am_rdma_retry_operation(operation); \ + } + + OPAL_THREAD_SCOPED_LOCK(&default_component.mutex, ACTION1); + +#define ACTION2 \ + am_rdma_queued_descriptor_t *descriptor, *next; \ + OPAL_LIST_FOREACH_SAFE(descriptor, next, \ + &default_component.queued_initiator_descriptors, \ + am_rdma_queued_descriptor_t) { \ + am_rdma_context_t *context = \ + (am_rdma_context_t *) descriptor->descriptor->des_context; \ + mca_btl_base_module_t *btl = descriptor->am_module->btl; \ + assert(0 != (descriptor->descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)); \ + int ret = btl->btl_send(btl, \ + descriptor->endpoint, \ + descriptor->descriptor, \ + am_rdma_tag(context->type)); \ + if (OPAL_SUCCESS == ret || 1 == ret) { \ + opal_list_remove_item(&default_component.queued_initiator_descriptors, \ + &descriptor->super); \ + } \ + } + + OPAL_THREAD_SCOPED_LOCK(&default_component.mutex, ACTION2); return 0; } -static int mca_btl_base_am_atomic_64(int64_t *operand, opal_atomic_int64_t *addr, - mca_btl_base_atomic_op_t op) + +static int am_rdma_atomic_64(int64_t *operand, opal_atomic_int64_t *addr, + mca_btl_base_atomic_op_t op) { int64_t result = 0; @@ -827,8 +874,9 @@ static int mca_btl_base_am_atomic_64(int64_t *operand, opal_atomic_int64_t *addr return OPAL_SUCCESS; } -static int mca_btl_base_am_atomic_32(int32_t *operand, opal_atomic_int32_t *addr, - mca_btl_base_atomic_op_t op) + +static int am_rdma_atomic_32(int32_t *operand, opal_atomic_int32_t *addr, + mca_btl_base_atomic_op_t op) { int32_t result = 0; @@ -862,16 +910,17 @@ static int mca_btl_base_am_atomic_32(int32_t *operand, opal_atomic_int32_t *addr return OPAL_SUCCESS; } -static void mca_btl_base_am_rdma_response(mca_btl_base_module_t *btl, - const mca_btl_base_receive_descriptor_t *desc) + +static void am_rdma_response(mca_btl_base_module_t *btl, + const mca_btl_base_receive_descriptor_t *desc) { - mca_btl_base_rdma_response_hdr_t *resp_hdr = (mca_btl_base_rdma_response_hdr_t *) desc + am_rdma_response_hdr_t *resp_hdr = (am_rdma_response_hdr_t *) desc ->des_segments[0] .seg_addr.pval; assert(desc->des_segments[0].seg_len >= sizeof(*resp_hdr)); - mca_btl_base_rdma_context_t *context = (mca_btl_base_rdma_context_t *) (uintptr_t) + am_rdma_context_t *context = (am_rdma_context_t *) (uintptr_t) resp_hdr->context; BTL_VERBOSE(("received response for RDMA operation. context=%p, size=%" PRIu64, (void*) context, @@ -885,8 +934,8 @@ static void mca_btl_base_am_rdma_response(mca_btl_base_module_t *btl, /* if there is a result copy it out of the incoming buffer. if RDMA is being used * (get/put or put/get) then the header should be the only thing in the incoming * message. */ - mca_btl_base_copy_from_segments(local_address, sizeof(*resp_hdr), desc->des_segments, - desc->des_segment_count); + am_rdma_copy_from_segments(local_address, sizeof(*resp_hdr), desc->des_segments, + desc->des_segment_count); } } @@ -898,8 +947,9 @@ static void mca_btl_base_am_rdma_response(mca_btl_base_module_t *btl, } } -static void mca_btl_base_am_process_rdma(mca_btl_base_module_t *btl, - const mca_btl_base_receive_descriptor_t *desc) + +static void am_rdma_process_rdma(mca_btl_base_module_t *btl, + const mca_btl_base_receive_descriptor_t *desc) { /* not all btls work with these active message atomics. at this time * all of the affected btls already have atomic support so there is @@ -909,11 +959,11 @@ static void mca_btl_base_am_process_rdma(mca_btl_base_module_t *btl, abort(); } - const mca_btl_base_rdma_hdr_t *hdr = (mca_btl_base_rdma_hdr_t *) desc->des_segments[0] + const am_rdma_hdr_t *hdr = (am_rdma_hdr_t *) desc->des_segments[0] .seg_addr.pval; void *target_address = (void *) (intptr_t) hdr->target_address; mca_btl_base_descriptor_t *descriptor = NULL; - mca_btl_base_rdma_operation_t *operation = NULL; + am_rdma_operation_t *operation = NULL; int ret; BTL_VERBOSE(("got active-message \"RDMA\" request. hdr->context=0x%" PRIx64 @@ -922,11 +972,11 @@ static void mca_btl_base_am_process_rdma(mca_btl_base_module_t *btl, hdr->context, target_address, desc->des_segments[0].seg_len)); if (MCA_BTL_BASE_AM_PUT == hdr->type) { - ret = mca_btl_base_am_rdma_target_put(btl, desc->endpoint, &descriptor, desc->des_segments, + ret = am_rdma_target_put(btl, desc->endpoint, &descriptor, desc->des_segments, desc->des_segment_count, target_address, hdr, &operation); } else if (MCA_BTL_BASE_AM_GET == hdr->type) { - ret = mca_btl_base_am_rdma_target_get(btl, desc->endpoint, &descriptor, target_address, hdr, + ret = am_rdma_target_get(btl, desc->endpoint, &descriptor, target_address, hdr, &operation); } else { BTL_ERROR(("Unexpected tag when processing active-message RDMA request")); @@ -934,12 +984,13 @@ static void mca_btl_base_am_process_rdma(mca_btl_base_module_t *btl, } if (OPAL_SUCCESS != ret) { - mca_btl_base_rdma_queue_operation(btl, desc->endpoint, descriptor, 0, hdr, operation); + am_rdma_queue_operation(btl, desc->endpoint, descriptor, 0, hdr, operation); } } -static void mca_btl_base_am_process_atomic(mca_btl_base_module_t *btl, - const mca_btl_base_receive_descriptor_t *desc) + +static void am_rdma_process_atomic(mca_btl_base_module_t *btl, + const mca_btl_base_receive_descriptor_t *desc) { /* not all btls work with these active message atomics. at this time * all of the affected btls already have atomic support so there is @@ -949,7 +1000,7 @@ static void mca_btl_base_am_process_atomic(mca_btl_base_module_t *btl, abort(); } - const mca_btl_base_rdma_hdr_t *hdr = (mca_btl_base_rdma_hdr_t *) desc->des_segments[0] + const am_rdma_hdr_t *hdr = (am_rdma_hdr_t *) desc->des_segments[0] .seg_addr.pval; uint64_t atomic_response = hdr->data.atomic.operand[0]; @@ -966,15 +1017,16 @@ static void mca_btl_base_am_process_atomic(mca_btl_base_module_t *btl, switch (hdr->type) { case MCA_BTL_BASE_AM_ATOMIC: if (4 == hdr->data.atomic.size) { - uint32_t tmp = (uint32_t) atomic_response; - mca_btl_base_am_atomic_32(&tmp, (opal_atomic_int32_t *) (uintptr_t) hdr->target_address, - hdr->data.atomic.op); + int32_t tmp = (int32_t) atomic_response; + am_rdma_atomic_32(&tmp, (opal_atomic_int32_t *) hdr->target_address, + hdr->data.atomic.op); + atomic_response = tmp; + } else if (8 == hdr->data.atomic.size) { + int64_t tmp = (int64_t) atomic_response; + am_rdma_atomic_64(&tmp, + (opal_atomic_int64_t *) hdr->target_address, + hdr->data.atomic.op); atomic_response = tmp; - } - if (8 == hdr->data.atomic.size) { - mca_btl_base_am_atomic_64(&atomic_response, - (opal_atomic_int64_t *) (uintptr_t) hdr->target_address, - hdr->data.atomic.op); } break; case MCA_BTL_BASE_AM_CAS: @@ -983,10 +1035,11 @@ static void mca_btl_base_am_process_atomic(mca_btl_base_module_t *btl, opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t *) hdr->target_address, &tmp, (int32_t) hdr->data.atomic.operand[1]); atomic_response = tmp; - } - if (8 == hdr->data.atomic.size) { + } else if (8 == hdr->data.atomic.size) { + int64_t tmp = (int64_t) atomic_response; opal_atomic_compare_exchange_strong_64((opal_atomic_int64_t *) hdr->target_address, - &atomic_response, hdr->data.atomic.operand[1]); + &tmp, hdr->data.atomic.operand[1]); + atomic_response = tmp; } break; default: @@ -995,24 +1048,120 @@ static void mca_btl_base_am_process_atomic(mca_btl_base_module_t *btl, } mca_btl_base_descriptor_t *descriptor = NULL; - int ret = mca_btl_base_am_rdma_respond(btl, desc->endpoint, &descriptor, &atomic_response, hdr); + int ret = am_rdma_respond(btl, desc->endpoint, &descriptor, &atomic_response, hdr); if (OPAL_SUCCESS != ret) { - mca_btl_base_rdma_queue_operation(btl, desc->endpoint, descriptor, atomic_response, hdr, + am_rdma_queue_operation(btl, desc->endpoint, descriptor, atomic_response, hdr, NULL); } } -static void mca_btl_sm_sc_emu_init(void) + +static int am_rdma_put(mca_btl_base_am_rdma_module_t *am_module, + struct mca_btl_base_endpoint_t *endpoint, void *local_address, + uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, + size_t size, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata) { - mca_btl_base_active_message_trigger[MCA_BTL_BASE_TAG_RDMA].cbfunc - = mca_btl_base_am_process_rdma; - mca_btl_base_active_message_trigger[MCA_BTL_BASE_TAG_ATOMIC].cbfunc - = mca_btl_base_am_process_atomic; - mca_btl_base_active_message_trigger[MCA_BTL_BASE_TAG_RDMA_RESP].cbfunc - = mca_btl_base_am_rdma_response; + return am_rdma_start(am_module, endpoint, MCA_BTL_BASE_AM_PUT, 0, 0, 0, order, flags, size, + local_address, local_handle, remote_address, remote_handle, + cbfunc, cbcontext, cbdata); } -static int mca_btl_base_am_fop(struct mca_btl_base_module_t *btl, + +static int am_rdma_get(mca_btl_base_am_rdma_module_t *am_module, + struct mca_btl_base_endpoint_t *endpoint, void *local_address, + uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, + size_t size, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata) +{ + return am_rdma_start(am_module, endpoint, MCA_BTL_BASE_AM_GET, 0, 0, 0, order, flags, size, + local_address, local_handle, remote_address, remote_handle, + cbfunc, cbcontext, cbdata); +} + + +static int am_rdma_fop(mca_btl_base_am_rdma_module_t *am_module, + struct mca_btl_base_endpoint_t *endpoint, void *local_address, + uint64_t remote_address, + mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, + mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata) +{ + size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; + + return am_rdma_start(am_module, endpoint, MCA_BTL_BASE_AM_ATOMIC, operand, 0, op, order, + flags, size, local_address, local_handle, remote_address, + remote_handle, cbfunc, cbcontext, cbdata); +} + + +static int am_rdma_cswap(mca_btl_base_am_rdma_module_t *am_module, + struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, + uint64_t compare, uint64_t value, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata) +{ + size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; + + return am_rdma_start(am_module, endpoint, MCA_BTL_BASE_AM_CAS, compare, value, 0, order, + flags, size, local_address, local_handle, remote_address, + remote_handle, cbfunc, cbcontext, cbdata); +} + + +static mca_btl_base_am_rdma_module_t *am_rdma_get_module(struct mca_btl_base_module_t *btl) +{ + assert(NULL != btl->btl_am_data); + return (mca_btl_base_am_rdma_module_t *)btl->btl_am_data; +} + + +static int am_rdma_put_wrapper(struct mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, void *local_address, + uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, + size_t size, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata) +{ + mca_btl_base_am_rdma_module_t *am_module = am_rdma_get_module(btl); + + return am_rdma_start(am_module, endpoint, MCA_BTL_BASE_AM_PUT, 0, 0, 0, order, flags, size, + local_address, local_handle, remote_address, remote_handle, + cbfunc, cbcontext, cbdata); +} + + +static int am_rdma_get_wrapper(struct mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, void *local_address, + uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, + size_t size, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata) +{ + mca_btl_base_am_rdma_module_t *am_module = am_rdma_get_module(btl); + + return am_rdma_start(am_module, endpoint, MCA_BTL_BASE_AM_GET, 0, 0, 0, order, flags, size, + local_address, local_handle, remote_address, remote_handle, + cbfunc, cbcontext, cbdata); +} + + +static int am_rdma_fop_wrapper(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, @@ -1021,96 +1170,204 @@ static int mca_btl_base_am_fop(struct mca_btl_base_module_t *btl, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { + mca_btl_base_am_rdma_module_t *am_module = am_rdma_get_module(btl); size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; - return mca_btl_base_rdma_start(btl, endpoint, MCA_BTL_BASE_AM_ATOMIC, operand, 0, op, order, - flags, size, local_address, local_handle, remote_address, - remote_handle, cbfunc, cbcontext, cbdata); + + return am_rdma_start(am_module, endpoint, MCA_BTL_BASE_AM_ATOMIC, operand, 0, op, order, + flags, size, local_address, local_handle, remote_address, + remote_handle, cbfunc, cbcontext, cbdata); } -static int mca_btl_base_am_cswap( - struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) + +static int am_rdma_cswap_wrapper(struct mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, + uint64_t compare, uint64_t value, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata) { + mca_btl_base_am_rdma_module_t *am_module = am_rdma_get_module(btl); size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; - return mca_btl_base_rdma_start(btl, endpoint, MCA_BTL_BASE_AM_CAS, compare, value, 0, order, - flags, size, local_address, local_handle, remote_address, - remote_handle, cbfunc, cbcontext, cbdata); + + return am_rdma_start(am_module, endpoint, MCA_BTL_BASE_AM_CAS, compare, value, 0, order, + flags, size, local_address, local_handle, remote_address, + remote_handle, cbfunc, cbcontext, cbdata); } -static int mca_btl_base_am_rdma_get(struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, - struct mca_btl_base_registration_handle_t *local_handle, - struct mca_btl_base_registration_handle_t *remote_handle, - size_t size, int flags, int order, - mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata) + +static void am_rdma_register_callbacks(void) { - return mca_btl_base_rdma_start(btl, endpoint, MCA_BTL_BASE_AM_GET, 0, 0, 0, order, flags, size, - local_address, local_handle, remote_address, remote_handle, - cbfunc, cbcontext, cbdata); + mca_btl_base_active_message_trigger[MCA_BTL_BASE_TAG_RDMA].cbfunc + = am_rdma_process_rdma; + mca_btl_base_active_message_trigger[MCA_BTL_BASE_TAG_ATOMIC].cbfunc + = am_rdma_process_atomic; + mca_btl_base_active_message_trigger[MCA_BTL_BASE_TAG_RDMA_RESP].cbfunc + = am_rdma_response; } -static int mca_btl_base_am_rdma_put(struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, - struct mca_btl_base_registration_handle_t *local_handle, - struct mca_btl_base_registration_handle_t *remote_handle, - size_t size, int flags, int order, - mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata) + +static int am_rdma_internal_init(mca_btl_base_module_t *btl, + uint32_t flags_requested, + bool no_memory_registration, + mca_btl_base_am_rdma_module_t **new_module) { - return mca_btl_base_rdma_start(btl, endpoint, MCA_BTL_BASE_AM_PUT, 0, 0, 0, order, flags, size, - local_address, local_handle, remote_address, remote_handle, - cbfunc, cbcontext, cbdata); + static bool initialized = false; + static opal_mutex_t initialized_mutex = OPAL_MUTEX_STATIC_INIT; + mca_btl_base_am_rdma_module_t *module; + size_t max_operation_size; + size_t operation_alignment; + + opal_mutex_lock(&initialized_mutex); + if (!initialized) { + initialized = true; + OBJ_CONSTRUCT(&default_component, am_rdma_component_t); + opal_progress_register(am_rdma_progress); + am_rdma_register_callbacks(); + } + opal_mutex_unlock(&initialized_mutex); + + module = OBJ_NEW(mca_btl_base_am_rdma_module_t); + if (NULL == module) { + return OPAL_ERR_TEMP_OUT_OF_RESOURCE; + } + + module->btl = btl; + module->use_rdma_put = !!(btl->btl_flags & MCA_BTL_FLAGS_PUT); + module->use_rdma_get = !!(btl->btl_flags & MCA_BTL_FLAGS_GET); + + /* if the requester asked for remote completion and the btl does + * not provide remove completion, we can not use put. + */ + if (!(btl->btl_flags & MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION)) { + module->use_rdma_put = false; + } + + /* if the requester does not want to do memory registration and + * the BTL requires memory registration, disable the use of RDMA. + */ + if (no_memory_registration && NULL != btl->btl_register_mem) { + module->use_rdma_put = false; + module->use_rdma_get = false; + } + + if (module->use_rdma_get) { + /* implement operations over get. */ + max_operation_size = btl->btl_get_limit; + operation_alignment = btl->btl_get_alignment; + BTL_VERBOSE(("am_rdma_init: btl %p using get. operation size %zu, alignment %zu", + (void *)btl, max_operation_size, operation_alignment)); + } else if (module->use_rdma_put) { + /* implement operations over put. */ + max_operation_size = btl->btl_put_limit; + operation_alignment = btl->btl_put_alignment; + BTL_VERBOSE(("am_rdma_init: btl %p using put. operation size %zu, alignment %zu", + (void *)btl, max_operation_size, operation_alignment)); + } else { + /* implement operations over send. */ + max_operation_size = btl->btl_max_send_size; + operation_alignment = 1; + BTL_VERBOSE(("am_rdma_init: btl %p using send. operation size %zu, alignment %zu", + (void *)btl, max_operation_size, operation_alignment)); + } + + module->am_btl_put_limit = max_operation_size - sizeof(am_rdma_hdr_t); + module->am_btl_put_alignment = operation_alignment; + module->am_btl_get_limit = max_operation_size - sizeof(am_rdma_response_hdr_t); + module->am_btl_get_alignment = operation_alignment; + + module->am_btl_put = am_rdma_put; + module->am_btl_get = am_rdma_get; + module->am_btl_atomic_fop = am_rdma_fop; + module->am_btl_atomic_cswap = am_rdma_cswap; + + *new_module = module; + + return OPAL_SUCCESS; } + +static int am_rdma_internal_fini(mca_btl_base_am_rdma_module_t *am_rdma_module) +{ + OBJ_RELEASE(am_rdma_module); + + return OPAL_SUCCESS; +} + + int mca_btl_base_am_rdma_init(mca_btl_base_module_t *btl) { - static bool progress_registered = false; + mca_btl_base_am_rdma_module_t *am_module; + int ret; + + BTL_VERBOSE(("am_rdma_init: called for btl %s (%p)", + btl->btl_component->btl_version.mca_component_name, (void *)btl)); if ((btl->btl_flags & (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS)) == (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS)) { - /* nothing to do */ + BTL_VERBOSE(("am_rdma_init: btl %p already supports rdma", (void *)btl)); return OPAL_SUCCESS; } - size_t max_operation_size = btl->btl_max_send_size; - size_t operation_alignment = 1; - if (mca_btl_base_rdma_use_rdma_get(btl)) { - /* implement put over get. */ - max_operation_size = btl->btl_get_limit; - operation_alignment = btl->btl_get_alignment; - } else if (mca_btl_base_rdma_use_rdma_put(btl)) { - /* implement get over put. */ - max_operation_size = btl->btl_put_limit; - operation_alignment = btl->btl_put_alignment; + /* + * note that it is not safe to access any am rdma functionality + * (even default_component global data) until internal_init returns + * successfully. + */ + ret = am_rdma_internal_init(btl, 0, false, &am_module); + if (OPAL_SUCCESS != ret) { + BTL_VERBOSE(("am_rdma_init: btl %p internal_init failure %d", + (void *)btl, ret)); + return ret; } + /* + * we can't lock any field on the BTL structure (because it's not + * ours to poke at), so take the global am rdma lock. I suppose we + * could do a cswap of the btl_am_data pointer to the same result, + * but that seems too cute for something that should be a relatively + * rare event. + */ + opal_mutex_lock(&default_component.mutex); + if (NULL != btl->btl_am_data) { + BTL_VERBOSE(("am_rdma_init: btl %p already initialized", (void *)btl)); + am_rdma_internal_fini(am_module); + opal_mutex_unlock(&default_component.mutex); + return OPAL_SUCCESS; + } + opal_mutex_unlock(&default_component.mutex); + + btl->btl_am_data = am_module; + + /* TODO: Ideally, we would swap the BTL's flush for our own + * implementation which completed all outstanding transactions on + * that BTL and then called the underlying flush(). Given the + * work and the lack of use case today, we instead just remove + * flush support from the underlying BTL. */ + btl->btl_flush = NULL; + if (!(btl->btl_flags & MCA_BTL_FLAGS_PUT)) { btl->btl_flags |= MCA_BTL_FLAGS_PUT_AM; - btl->btl_put_limit = max_operation_size - sizeof(mca_btl_base_rdma_hdr_t); - btl->btl_put_alignment = operation_alignment; - btl->btl_put = mca_btl_base_am_rdma_put; - BTL_VERBOSE(("Enabling AM-based RDMA put for BTL %p. max put = %zu", (void*) btl, btl->btl_put_limit)); + btl->btl_put_limit = am_module->am_btl_put_limit; + btl->btl_put_alignment = am_module->am_btl_put_alignment; + btl->btl_put = am_rdma_put_wrapper; + BTL_VERBOSE(("am_rdma_init: Enabling AM-based RDMA put for BTL %p. max put = %zu", (void*)btl, btl->btl_put_limit)); } if (!(btl->btl_flags & MCA_BTL_FLAGS_GET)) { btl->btl_flags |= MCA_BTL_FLAGS_GET_AM; - btl->btl_get_limit = max_operation_size - sizeof(mca_btl_base_rdma_response_hdr_t); - btl->btl_get_alignment = operation_alignment; - btl->btl_get = mca_btl_base_am_rdma_get; - BTL_VERBOSE(("Enabling AM-based RDMA get for BTL %p. max get = %zu", (void*) btl, btl->btl_get_limit)); + btl->btl_get_limit = am_module->am_btl_get_limit; + btl->btl_get_alignment = am_module->am_btl_get_alignment; + btl->btl_get = am_rdma_get_wrapper; + BTL_VERBOSE(("Enabling AM-based RDMA get for BTL %p. max get = %zu", (void*)btl, btl->btl_get_limit)); } if (!(btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_FOPS)) { - BTL_VERBOSE(("Enabling AM-based FOPs get for BTL %p", (void*) btl)); btl->btl_flags |= MCA_BTL_FLAGS_ATOMIC_AM_FOP; - btl->btl_atomic_fop = mca_btl_base_am_fop; - btl->btl_atomic_cswap = mca_btl_base_am_cswap; + btl->btl_atomic_fop = am_rdma_fop_wrapper; + btl->btl_atomic_cswap = am_rdma_cswap_wrapper; /* emulated RDMA atomics can support the full range of atomics. for * now only a handful are supported. */ @@ -1119,14 +1376,35 @@ int mca_btl_base_am_rdma_init(mca_btl_base_module_t *btl) | MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR | MCA_BTL_ATOMIC_SUPPORTS_SWAP | MCA_BTL_ATOMIC_SUPPORTS_MIN | MCA_BTL_ATOMIC_SUPPORTS_MAX; + BTL_VERBOSE(("Enabling AM-based FOPs get for BTL %p", (void*)btl)); } - if (!progress_registered) { - progress_registered = true; - opal_progress_register(mca_btl_base_am_rdma_progress); - mca_btl_sm_sc_emu_init(); - OBJ_CONSTRUCT(&default_module, mca_btl_base_am_rdma_module_t); + return OPAL_SUCCESS; +} + + +int opal_btl_base_am_rdma_create(mca_btl_base_module_t *btl, + uint32_t flags_requested, + bool no_memory_registration, + mca_btl_base_am_rdma_module_t **am_module) +{ + int ret; + + BTL_VERBOSE(("am_rdma_create: called for btl %s (%p)", + btl->btl_component->btl_version.mca_component_name, (void *)btl)); + + ret = am_rdma_internal_init(btl, flags_requested, no_memory_registration, am_module); + if (OPAL_SUCCESS != ret) { + BTL_VERBOSE(("am_rdma_create: btl %p internal_init failure %d", + (void *)btl, ret)); + return ret; } return OPAL_SUCCESS; } + + +int opal_btl_base_am_rdma_destroy(mca_btl_base_am_rdma_module_t *am_module) +{ + return am_rdma_internal_fini(am_module); +} diff --git a/opal/mca/btl/base/btl_base_am_rdma.h b/opal/mca/btl/base/btl_base_am_rdma.h index 9842f5a8a49..d90983429fa 100644 --- a/opal/mca/btl/base/btl_base_am_rdma.h +++ b/opal/mca/btl/base/btl_base_am_rdma.h @@ -3,6 +3,8 @@ * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,10 +15,46 @@ /** * This file provides support for active-message (send/recv) based RDMA. * It can be used with any btl that provides a minimum of send support but - * can also be used with partial-RDMA BTLs (put only, get only, etc). It - * will provide support for any RDMA or atomic operation not currently - * supported by the supplied BTL. For more info see the description of - * mca_btl_base_am_rdma_init. + * can also be used with partial-RDMA BTLs (put only, get only, etc) + * to provide a complete RDMA interface. + * + * There are two modes of using this interface, depending on your + * requirements: + * + * First, this interface can be used to provide a complete + * put/get/atomic interface for BTLs which do not natively provide + * such an interface. In this mode, active message rdma functions are + * only used if the underlying implementation does not already provide + * the required functionality. For example, if a BTL natively + * supports put but not get, the interface would provide an emulated + * get. The registration, completion and atomicity semantics of the + * BTL remain the native interface's capabilities. That is, if the + * native interface does not provide remote completion or atomics that + * are atomic with processor atomics, neither will the interface after + * initializing the am rdma interface for that BTL. This mode will + * likely give better performance than the second mode for transfers + * that fit within the BTL's native semantics. In this mode, the BTL + * interface is updated so that the btl_{put, get, atomic_fop, + * atomic_cswap} function pointers are usage. However, the btl + * capability flags will not be updated to indicate native support of + * the emulated functionality (for example, if btl_get() is emulated, + * MCA_BTL_FLAGS_GET will not be set). Instead, the emulated flags + * will be set (MCA_BTL_FLAGS_PUT_AM, MCA_BTL_FLAGS_GET_AM, + * MCA_BTL_FLAGS_ATOMIC_AM_FOP, etc.). + * + * Second, this interface can be used to provide different + * sementicsthan a BTL natively provides. This mode is not + * transparent to the caller (unlike the first mode). Instead, the + * caller must manage calling the active message put/get/atomic + * interface directly (rather than through the BTL function pointers). + * For interfaces which require strict remote completion or require + * implicit memory registration, this can greatly simplify the code, + * in return for marginally more management complexity and lower + * performance. + * + * While the calling convention and initialization are different, the + * communication routines uses by the active message rdma + * implementation are identical in both modes of operation. */ #include "opal_config.h" @@ -28,14 +66,86 @@ /** * @brief initialize active-message RDMA/atomic support * - * @inout btl btl module to augment + * @param btl[in,out] btl module to augment + * + * @retval OPAL_SUCCESS btl successfully updated, btl already + * updated, or btl has all available + * functionality natively. + * @retval OPAL_ERR_TEMP_OUT_OF_RESOURCE Allocating BTL-level data + * structure failed. * * This function adds functionality to the btl for any missing RDMA/atomic * operation. Atomic operations are entirely emulated using send/recv and * work best with a btl that also has async-progress enabled. Put/get * support will use either send/recv or get (for put)/put (for get) (if * available). + * + * Note that calling this function will change the BTL interface. + * Care must be taken to not call this function outside of early + * initialization routines. */ int mca_btl_base_am_rdma_init(mca_btl_base_module_t *btl); +struct mca_btl_base_am_rdma_module_t; + +typedef int (*mca_btl_base_am_rdma_module_put_fn_t)( + struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); + +typedef int (*mca_btl_base_am_rdma_module_get_fn_t)( + struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); + +typedef int (*mca_btl_base_am_rdma_module_atomic_fop64_fn_t)( + struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, + uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, + void *cbcontext, void *cbdata); + +typedef int (*mca_btl_base_am_rdma_module_atomic_cswap64_fn_t)( + struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, + int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); + +struct mca_btl_base_am_rdma_module_t { + opal_object_t super; + mca_btl_base_module_t *btl; + bool use_rdma_put; + bool use_rdma_get; + + size_t am_btl_put_limit; + size_t am_btl_put_alignment; + size_t am_btl_get_limit; + size_t am_btl_get_alignment; + + mca_btl_base_am_rdma_module_put_fn_t am_btl_put; + mca_btl_base_am_rdma_module_get_fn_t am_btl_get; + mca_btl_base_am_rdma_module_atomic_fop64_fn_t am_btl_atomic_fop; + mca_btl_base_am_rdma_module_atomic_cswap64_fn_t am_btl_atomic_cswap; +}; +typedef struct mca_btl_base_am_rdma_module_t mca_btl_base_am_rdma_module_t; + +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_base_am_rdma_module_t); + + +/** + * @brief create active-message RDMA/atomics functions + */ +int opal_btl_base_am_rdma_create(mca_btl_base_module_t *btl, + uint32_t flags_requested, + bool no_memory_registration, + mca_btl_base_am_rdma_module_t **am_module); + +int opal_btl_base_am_rdma_destroy(mca_btl_base_am_rdma_module_t *am_module); + #endif /* OPAL_MCA_BTL_BASE_AM_RDMA_H */ diff --git a/opal/mca/btl/btl.h b/opal/mca/btl/btl.h index 0fe8f806f5f..28c71b07530 100644 --- a/opal/mca/btl/btl.h +++ b/opal/mca/btl/btl.h @@ -19,6 +19,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Intel, Inc. All rights reserved. * Copyright (c) 2020-2021 Google, LLC. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -261,6 +263,16 @@ typedef uint8_t mca_btl_base_tag_t; /* The BTL has active-message based atomics */ #define MCA_BTL_FLAGS_ATOMIC_AM_FOP 0x400000 +/** Ths BTL's RDMA/atomics operation supports remote completion. + * When the BTL reported the completion of a RDMA/atomic operation + * on the initator side, the operation also finished on the target side. + * + * Note, this flag is for put and atomic write operations. Operations + * like get, atomic fetch and atomic swap support remote + * completion by nature. + */ +#define MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION 0x800000 + /* Default exclusivity levels */ #define MCA_BTL_EXCLUSIVITY_HIGH (64 * 1024) /* internal loopback */ #define MCA_BTL_EXCLUSIVITY_DEFAULT 1024 /* GM/IB/etc. */ @@ -556,28 +568,25 @@ typedef struct mca_btl_base_header_t mca_btl_base_header_t; * MCA->BTL Initializes the BTL component and creates specific BTL * module(s). * - * @param num_btls (OUT) Returns the number of btl modules created, or 0 - * if the transport is not available. - * - * @param enable_progress_threads (IN) Whether this component is - * allowed to run a hidden/progress thread or not. - * - * @param enable_mpi_threads (IN) Whether support for multiple MPI - * threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which - * indicates whether multiple threads may invoke this component - * simultaneously or not. - * - * @return Array of pointers to BTL modules, or NULL if the transport - * is not available. - * * During component initialization, the BTL component should discover * the physical devices that are available for the given transport, * and create a BTL module to represent each device. Any addressing * information required by peers to reach the device should be published * during this function via the modex_send() interface. * + * @param[OUT] num_btls Returns the number of btl modules created, or 0 + * if the transport is not available. + * @param[IN] enable_progress_threads Whether this component is + * allowed to run a hidden/progress thread or not. + * @param[IN] enable_mpi_threads Whether support for multiple MPI + * threads is enabled or not (i.e., + * MPI_THREAD_MULTIPLE), which indicates whether + * multiple threads may invoke this component + * simultaneously or not. + * + * @return Array of pointers to BTL modules, or NULL if the transport + * is not available. */ - typedef struct mca_btl_base_module_t **(*mca_btl_base_component_init_fn_t)( int *num_btls, bool enable_progress_threads, bool enable_mpi_threads); @@ -589,7 +598,6 @@ typedef struct mca_btl_base_module_t **(*mca_btl_base_component_init_fn_t)( * how many items where completed in the call * to progress. */ - typedef int (*mca_btl_base_component_progress_fn_t)(void); /** @@ -647,28 +655,20 @@ typedef struct mca_btl_base_component_3_0_0_t mca_btl_base_component_2_0_0_t; * MCA->BTL Clean up any resources held by BTL module * before the module is unloaded. * - * @param btl (IN) BTL module. - * @return OPAL_SUCCESS or error status on failure. - * * Prior to unloading a BTL module, the MCA framework will call * the BTL finalize method of the module. Any resources held by * the BTL should be released and if required the memory corresponding * to the BTL module freed. * + * @param[IN] btl BTL module. + * + * @return OPAL_SUCCESS or error status on failure. */ typedef int (*mca_btl_base_module_finalize_fn_t)(struct mca_btl_base_module_t *btl); /** * BML->BTL notification of change in the process list. * - * @param btl (IN) BTL module - * @param nprocs (IN) Number of processes - * @param procs (IN) Array of processes - * @param endpoint (OUT) Array of mca_btl_base_endpoint_t structures by BTL. - * @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this - * BTL. - * @return OPAL_SUCCESS or error status on failure. - * * The mca_btl_base_module_add_procs_fn_t() is called by the BML to * determine the set of BTLs that should be used to reach each process. * Any addressing information exported by the peer via the modex_send() @@ -685,6 +685,14 @@ typedef int (*mca_btl_base_module_finalize_fn_t)(struct mca_btl_base_module_t *b * functions (e.g btl_send). This may be used by the BTL to cache any * addressing or connection information (e.g. TCP socket, IB queue * pair). + * + * @param[IN] btl BTL module + * @param[IN] nprocs Number of processes + * @param[IN] procs Array of processes + * @param[OUT] endpoint Array of mca_btl_base_endpoint_t structures by BTL. + * @param[OUT] reachable Bitmask indicating set of peer processes that + * are reachable by this BTL. + * @return OPAL_SUCCESS or error status on failure. */ typedef int (*mca_btl_base_module_add_procs_fn_t)(struct mca_btl_base_module_t *btl, size_t nprocs, struct opal_proc_t **procs, @@ -694,15 +702,16 @@ typedef int (*mca_btl_base_module_add_procs_fn_t)(struct mca_btl_base_module_t * /** * Notification of change to the process list. * - * @param btl (IN) BTL module - * @param nprocs (IN) Number of processes - * @param proc (IN) Set of processes - * @param peer (IN) Set of peer addressing information. - * @return Status indicating if cleanup was successful - * * When the process list changes, the BML notifies the BTL of the * change, to provide the opportunity to cleanup or release any * resources associated with the peer. + * + * @param[IN] btl BTL module + * @param[IN] nprocs Number of processes + * @param[IN] proc Set of processes + * @param[IN] peer Set of peer addressing information. + * + * @return Status indicating if cleanup was successful */ typedef int (*mca_btl_base_module_del_procs_fn_t)(struct mca_btl_base_module_t *btl, size_t nprocs, struct opal_proc_t **procs, @@ -718,8 +727,8 @@ typedef int (*mca_btl_base_module_del_procs_fn_t)(struct mca_btl_base_module_t * * @param[IN] cbfunc The callback function * @param[IN] cbdata Opaque callback data * - * @return OPAL_SUCCESS The callback was registered successfully - * @return OPAL_ERROR The callback was NOT registered successfully + * @retval OPAL_SUCCESS The callback was registered successfully + * @retval OPAL_ERROR The callback was NOT registered successfully * */ typedef int (*mca_btl_base_module_register_fn_t)(struct mca_btl_base_module_t *btl, @@ -736,7 +745,6 @@ typedef int (*mca_btl_base_module_register_fn_t)(struct mca_btl_base_module_t *b * @param[IN] errproc process that had an error * @param[IN] btlinfo descriptive string from the BTL */ - typedef void (*mca_btl_base_module_error_cb_fn_t)(struct mca_btl_base_module_t *btl, int32_t flags, struct opal_proc_t *errproc, char *btlinfo); @@ -747,8 +755,8 @@ typedef void (*mca_btl_base_module_error_cb_fn_t)(struct mca_btl_base_module_t * * @param[IN] btl BTL module * @param[IN] cbfunc The callback function * - * @return OPAL_SUCCESS The callback was registered successfully - * @return OPAL_ERROR The callback was NOT registered successfully + * @retval OPAL_SUCCESS The callback was registered successfully + * @retval OPAL_ERROR The callback was NOT registered successfully * */ typedef int (*mca_btl_base_module_register_error_fn_t)(struct mca_btl_base_module_t *btl, @@ -764,10 +772,9 @@ typedef int (*mca_btl_base_module_register_error_fn_t)(struct mca_btl_base_modul * local completion callback function called and the order tag of * that descriptor is only valid upon the local completion callback function. * - * - * @param btl (IN) BTL module - * @param size (IN) Request segment size. - * @param order (IN) The ordering tag (may be MCA_BTL_NO_ORDER) + * @param[IN] btl BTL module + * @param[IN] size Request segment size. + * @param[IN] order The ordering tag (may be MCA_BTL_NO_ORDER) */ typedef mca_btl_base_descriptor_t *(*mca_btl_base_module_alloc_fn_t)( @@ -779,8 +786,8 @@ typedef mca_btl_base_descriptor_t *(*mca_btl_base_module_alloc_fn_t)( * A descriptor can only be deallocated after its local completion * callback function has called for all send/put/get operations. * - * @param btl (IN) BTL module - * @param segment (IN) Descriptor allocated from the BTL + * @param[IN] btl BTL module + * @param[IN] segment Descriptor allocated from the BTL */ typedef int (*mca_btl_base_module_free_fn_t)(struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *descriptor); @@ -798,15 +805,14 @@ typedef int (*mca_btl_base_module_free_fn_t)(struct mca_btl_base_module_t *btl, * called and the order tag of that descriptor is only valid upon the local * completion callback function. * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL peer addressing - * @param registration (IN) Memory registration - * @param convertor (IN) Data type convertor - * @param order (IN) The ordering tag (may be MCA_BTL_NO_ORDER) - * @param reserve (IN) Additional bytes requested by upper layer to precede user data - * @param size (IN/OUT) Number of bytes to prepare (IN), + * @param[IN] btl BTL module + * @param[IN] endpoint BTL peer addressing + * @param[IN] registration Memory registration + * @param[IN] convertor Data type convertor + * @param[IN] order The ordering tag (may be MCA_BTL_NO_ORDER) + * @param[IN] reserve Additional bytes requested by upper layer to precede user data + * @param[IN,OUT] size Number of bytes to prepare (IN), * number of bytes actually prepared (OUT) - * */ typedef struct mca_btl_base_descriptor_t *(*mca_btl_base_module_prepare_fn_t)( struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -816,15 +822,6 @@ typedef struct mca_btl_base_descriptor_t *(*mca_btl_base_module_prepare_fn_t)( /** * @brief Register a memory region for put/get/atomic operations. * - * @param btl (IN) BTL module - * @param endpoint(IN) BTL addressing information (or NULL for all endpoints) - * @param base (IN) Pointer to start of region - * @param size (IN) Size of region - * @param flags (IN) Flags including access permissions - * - * @returns a memory registration handle valid for both local and remote operations - * @returns NULL if the region could not be registered - * * This function registers the specified region with the hardware for use with * the btl_put, btl_get, btl_atomic_cas, btl_atomic_op, and btl_atomic_fop * functions. Care should be taken to not hold an excessive number of registrations @@ -834,6 +831,15 @@ typedef struct mca_btl_base_descriptor_t *(*mca_btl_base_module_prepare_fn_t)( * mca_btl_base_registration_handle_t*) is passed to the caller. The * BTL module cannot free or reuse the handle until it is returned via * the mca_btl_base_module_deregister_mem_fn_t function. + * + * @param[IN] btl BTL module + * @param[IN] endpoint BTL addressing information (or NULL for all endpoints) + * @param[IN] base Pointer to start of region + * @param[IN] size Size of region + * @param[IN] flags Flags including access permissions + * + * @returns a memory registration handle valid for both local and + * remote operations or NULL if the region could not be registered. */ typedef struct mca_btl_base_registration_handle_t *(*mca_btl_base_module_register_mem_fn_t)( struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *base, @@ -842,9 +848,6 @@ typedef struct mca_btl_base_registration_handle_t *(*mca_btl_base_module_registe /** * @brief Deregister a memory region * - * @param btl (IN) BTL module region was registered with - * @param handle (IN) BTL registration handle to deregister - * * This function deregisters the memory region associated with the specified handle. Care * should be taken to not perform any RDMA or atomic operation on this memory region * after it is deregistered. It is erroneous to specify a memory handle associated with @@ -854,23 +857,33 @@ typedef struct mca_btl_base_registration_handle_t *(*mca_btl_base_module_registe * mca_btl_base_module_register_mem_fn_t function. Ownership of the * memory pointed to by handle passes to the BTL module; this function * is now is allowed to free the memory, return it to a freelist, etc. + * + * @param[IN] btl BTL module region was registered with + * @param[IN] handle BTL registration handle to deregister */ typedef int (*mca_btl_base_module_deregister_mem_fn_t)( struct mca_btl_base_module_t *btl, struct mca_btl_base_registration_handle_t *handle); /** * Initiate an asynchronous send. - * Completion Semantics: the descriptor has been queued for a send operation - * the BTL now controls the descriptor until local - * completion callback is made on the descriptor * * All BTLs allow multiple concurrent asynchronous send operations on a descriptor * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transfered - * @param tag (IN) The tag value used to notify the peer. - * + * Completion Semantics: If OPAL_SUCCESS is returned, the descriptor + * has been queued for a send operation the BTL now controls the + * descriptor until local completion callback is made on the + * descriptor. If 1 is returned, the descriptor has been sent and the + * operation is complete. The local completion callback will not be + * made unless the MCA_BTL_DES_SEND_ALWAYS_CALLBACK flag was set on + * the descriptor. + * + * @param[IN] btl BTL module + * @param[IN] endpoint BTL addressing information + * @param[IN] descriptor Description of the data to be transfered + * @param[IN] tag The tag value used to notify the peer. + * + * @retval 1 The descriptor was successfully sent (see + * completion semantics above). * @retval OPAL_SUCCESS The descriptor was successfully queued for a send * @retval OPAL_ERROR The descriptor was NOT successfully queued for a send * @retval OPAL_ERR_UNREACH The endpoint is not reachable @@ -882,23 +895,24 @@ typedef int (*mca_btl_base_module_send_fn_t)(struct mca_btl_base_module_t *btl, /** * Initiate an immediate blocking send. - * Completion Semantics: the BTL will make a best effort - * to send the header and "size" bytes from the datatype using the convertor. - * The header is guaranteed to be delivered entirely in the first segment. - * Should the BTL be unable to deliver the data due to resource constraints - * the BTL will return a descriptor (via the OUT param) - * of size "payload_size + header_size". - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param convertor (IN) Data type convertor - * @param header (IN) Pointer to header. - * @param header_size (IN) Size of header. - * @param payload_size (IN) Size of payload (from convertor). - * @param order (IN) The ordering tag (may be MCA_BTL_NO_ORDER) - * @param flags (IN) Flags. - * @param tag (IN) The tag value used to notify the peer. - * @param descriptor (OUT) The descriptor to be returned unable to be sent immediately + * + * Completion Semantics: the BTL will make a best effort to send the + * header and "size" bytes from the datatype using the convertor. The + * header is guaranteed to be delivered entirely in the first segment. + * Should the BTL be unable to deliver the data due to resource + * constraints the BTL will return a descriptor (via the OUT param) of + * size "payload_size + header_size". + * + * @param[IN] btl BTL module + * @param[IN] endpoint BTL addressing information + * @param[IN] convertor Data type convertor + * @param[IN] header Pointer to header. + * @param[IN] header_size Size of header. + * @param[IN] payload_size Size of payload (from convertor). + * @param[IN] order The ordering tag (may be MCA_BTL_NO_ORDER) + * @param[IN] flags Flags. + * @param[IN] tag The tag value used to notify the peer. + * @param[OUT] descriptor The descriptor to be returned unable to be sent immediately * (may be NULL). * * @retval OPAL_SUCCESS The send was successfully queued @@ -907,7 +921,6 @@ typedef int (*mca_btl_base_module_send_fn_t)(struct mca_btl_base_module_t *btl, * @retval OPAL_ERR_RESOURCE_BUSY The BTL is busy a descriptor will be returned * (via the OUT param) if descriptors are available */ - typedef int (*mca_btl_base_module_sendi_fn_t)(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, void *header, @@ -917,6 +930,7 @@ typedef int (*mca_btl_base_module_sendi_fn_t)(struct mca_btl_base_module_t *btl, /** * Initiate an asynchronous put. + * * Completion Semantics: if this function returns a 1 then the operation * is complete. a return of OPAL_SUCCESS indicates * the put operation has been queued with the @@ -924,20 +938,20 @@ typedef int (*mca_btl_base_module_sendi_fn_t)(struct mca_btl_base_module_t *btl, * until all outstanding operations on that handle * have been completed. * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param local_address (IN) Local address to put from (registered) - * @param remote_address (IN) Remote address to put to (registered remotely) - * @param local_handle (IN) Registration handle for region containing + * @param[IN] btl BTL module + * @param[IN] endpoint BTL addressing information + * @param[IN] local_address Local address to put from (registered) + * @param[IN] remote_address Remote address to put to (registered remotely) + * @param[IN] local_handle Registration handle for region containing * (local_address, local_address + size) - * @param remote_handle (IN) Remote registration handle for region containing + * @param[IN] remote_handle Remote registration handle for region containing * (remote_address, remote_address + size) - * @param size (IN) Number of bytes to put - * @param flags (IN) Flags for this put operation - * @param order (IN) Ordering - * @param cbfunc (IN) Function to call on completion (if queued) - * @param cbcontext (IN) Context for the callback - * @param cbdata (IN) Data for callback + * @param[IN] size Number of bytes to put + * @param[IN] flags Flags for this put operation + * @param[IN] order Ordering + * @param[IN] cbfunc Function to call on completion (if queued) + * @param[IN] cbcontext Context for the callback + * @param[IN] cbdata Data for callback * * @retval OPAL_SUCCESS The descriptor was successfully queued for a put * @retval OPAL_ERROR The descriptor was NOT successfully queued for a put @@ -962,20 +976,20 @@ typedef int (*mca_btl_base_module_put_fn_t)( * until all outstanding operations on that handle * have been completed. * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param local_address (IN) Local address to put from (registered) - * @param remote_address (IN) Remote address to put to (registered remotely) - * @param local_handle (IN) Registration handle for region containing + * @param[IN] btl BTL module + * @param[IN] endpoint BTL addressing information + * @param[IN] local_address Local address to put from (registered) + * @param[IN] remote_address Remote address to put to (registered remotely) + * @param[IN] local_handle Registration handle for region containing * (local_address, local_address + size) - * @param remote_handle (IN) Remote registration handle for region containing + * @param[IN] remote_handle Remote registration handle for region containing * (remote_address, remote_address + size) - * @param size (IN) Number of bytes to put - * @param flags (IN) Flags for this put operation - * @param order (IN) Ordering - * @param cbfunc (IN) Function to call on completion (if queued) - * @param cbcontext (IN) Context for the callback - * @param cbdata (IN) Data for callback + * @param[IN] size Number of bytes to put + * @param[IN] flags Flags for this put operation + * @param[IN] order Ordering + * @param[IN] cbfunc Function to call on completion (if queued) + * @param[IN] cbcontext Context for the callback + * @param[IN] cbdata Data for callback * * @retval OPAL_SUCCESS The descriptor was successfully queued for a put * @retval OPAL_ERROR The descriptor was NOT successfully queued for a put @@ -998,18 +1012,24 @@ typedef int (*mca_btl_base_module_get_fn_t)( * the atomic operation has been queued with the * network. * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param remote_address (IN) Remote address to put to (registered remotely) - * @param remote_handle (IN) Remote registration handle for region containing + * After the operation is complete the remote address specified by {remote_address} and + * {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand. + * The btl will guarantee consistency of atomic operations performed via the btl. Note, + * however, that not all btls will provide consistency between btl atomic operations and + * cpu or other btl atomics. + * + * @param[IN] btl BTL module + * @param[IN] endpoint BTL addressing information + * @param[IN] remote_address Remote address to put to (registered remotely) + * @param[IN] remote_handle Remote registration handle for region containing * (remote_address, remote_address + 8) - * @param op (IN) Operation to perform - * @param operand (IN) Operand for the operation - * @param flags (IN) Flags for this atomic operation - * @param order (IN) Ordering - * @param cbfunc (IN) Function to call on completion (if queued) - * @param cbcontext (IN) Context for the callback - * @param cbdata (IN) Data for callback + * @param[IN] op Operation to perform + * @param[IN] operand Operand for the operation + * @param[IN] flags Flags for this atomic operation + * @param[IN] order Ordering + * @param[IN] cbfunc Function to call on completion (if queued) + * @param[IN] cbcontext Context for the callback + * @param[IN] cbdata Data for callback * * @retval OPAL_SUCCESS The operation was successfully queued * @retval 1 The operation is complete @@ -1019,12 +1039,6 @@ typedef int (*mca_btl_base_module_get_fn_t)( * @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to * alignment restrictions or the operation {op} is not supported * by the hardware. - * - * After the operation is complete the remote address specified by {remote_address} and - * {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand. - * The btl will guarantee consistency of atomic operations performed via the btl. Note, - * however, that not all btls will provide consistency between btl atomic operations and - * cpu or other btl atomics. */ typedef int (*mca_btl_base_module_atomic_op64_fn_t)( struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -1039,21 +1053,28 @@ typedef int (*mca_btl_base_module_atomic_op64_fn_t)( * the atomic operation has been queued with the * network. * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param local_address (OUT) Local address to store the result in - * @param remote_address (IN) Remote address perfom operation on to (registered remotely) - * @param local_handle (IN) Local registration handle for region containing + * After the operation is complete the remote address specified by {remote_address} and + * {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand. + * {local_address} will be updated with the previous value stored in {remote_address}. + * The btl will guarantee consistency of atomic operations performed via the btl. Note, + * however, that not all btls will provide consistency between btl atomic operations and + * cpu or other btl atomics. + * + * @param[IN] btl BTL module + * @param[IN] endpoint BTL addressing information + * @param[IN] local_address (OUT) Local address to store the result in + * @param[IN] remote_address Remote address perfom operation on to (registered remotely) + * @param[IN] local_handle Local registration handle for region containing * (local_address, local_address + 8) - * @param remote_handle (IN) Remote registration handle for region containing + * @param[IN] remote_handle Remote registration handle for region containing * (remote_address, remote_address + 8) - * @param op (IN) Operation to perform - * @param operand (IN) Operand for the operation - * @param flags (IN) Flags for this atomic operation - * @param order (IN) Ordering - * @param cbfunc (IN) Function to call on completion (if queued) - * @param cbcontext (IN) Context for the callback - * @param cbdata (IN) Data for callback + * @param[IN] op Operation to perform + * @param[IN] operand Operand for the operation + * @param[IN] flags Flags for this atomic operation + * @param[IN] order Ordering + * @param[IN] cbfunc Function to call on completion (if queued) + * @param[IN] cbcontext Context for the callback + * @param[IN] cbdata Data for callback * * @retval OPAL_SUCCESS The operation was successfully queued * @retval 1 The operation is complete @@ -1063,13 +1084,6 @@ typedef int (*mca_btl_base_module_atomic_op64_fn_t)( * @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to * alignment restrictions or the operation {op} is not supported * by the hardware. - * - * After the operation is complete the remote address specified by {remote_address} and - * {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand. - * {local_address} will be updated with the previous value stored in {remote_address}. - * The btl will guarantee consistency of atomic operations performed via the btl. Note, - * however, that not all btls will provide consistency between btl atomic operations and - * cpu or other btl atomics. */ typedef int (*mca_btl_base_module_atomic_fop64_fn_t)( struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -1086,21 +1100,28 @@ typedef int (*mca_btl_base_module_atomic_fop64_fn_t)( * the atomic operation has been queued with the * network. * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param local_address (OUT) Local address to store the result in - * @param remote_address (IN) Remote address perfom operation on to (registered remotely) - * @param local_handle (IN) Local registration handle for region containing + * After the operation is complete the remote address specified by {remote_address} and + * {remote_handle} will be updated with {value} if *remote_address == compare. + * {local_address} will be updated with the previous value stored in {remote_address}. + * The btl will guarantee consistency of atomic operations performed via the btl. Note, + * however, that not all btls will provide consistency between btl atomic operations and + * cpu atomics. + * + * @param[IN] btl BTL module + * @param[IN] endpoint BTL addressing information + * @param[OUT] local_address Local address to store the result in + * @param[IN] remote_address Remote address perfom operation on to (registered remotely) + * @param[IN] local_handle Local registration handle for region containing * (local_address, local_address + 8) - * @param remote_handle (IN) Remote registration handle for region containing + * @param[IN] remote_handle Remote registration handle for region containing * (remote_address, remote_address + 8) - * @param compare (IN) Operand for the operation - * @param value (IN) Value to store on success - * @param flags (IN) Flags for this atomic operation - * @param order (IN) Ordering - * @param cbfunc (IN) Function to call on completion (if queued) - * @param cbcontext (IN) Context for the callback - * @param cbdata (IN) Data for callback + * @param[IN] compare Operand for the operation + * @param[IN] value Value to store on success + * @param[IN] flags Flags for this atomic operation + * @param[IN] order Ordering + * @param[IN] cbfunc Function to call on completion (if queued) + * @param[IN] cbcontext Context for the callback + * @param[IN] cbdata Data for callback * * @retval OPAL_SUCCESS The operation was successfully queued * @retval 1 The operation is complete @@ -1110,13 +1131,6 @@ typedef int (*mca_btl_base_module_atomic_fop64_fn_t)( * @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to * alignment restrictions or the operation {op} is not supported * by the hardware. - * - * After the operation is complete the remote address specified by {remote_address} and - * {remote_handle} will be updated with {value} if *remote_address == compare. - * {local_address} will be updated with the previous value stored in {remote_address}. - * The btl will guarantee consistency of atomic operations performed via the btl. Note, - * however, that not all btls will provide consistency between btl atomic operations and - * cpu atomics. */ typedef int (*mca_btl_base_module_atomic_cswap64_fn_t)( struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -1128,9 +1142,9 @@ typedef int (*mca_btl_base_module_atomic_cswap64_fn_t)( /** * Diagnostic dump of btl state. * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL endpoint - * @param verbose (IN) Verbosity level + * @param[IN] btl BTL module + * @param[IN] endpoint BTL endpoint + * @param[IN] verbose Verbosity level */ typedef void (*mca_btl_base_module_dump_fn_t)(struct mca_btl_base_module_t *btl, @@ -1140,8 +1154,8 @@ typedef void (*mca_btl_base_module_dump_fn_t)(struct mca_btl_base_module_t *btl, /** * Flush all outstanding RDMA operations on an endpoint or all endpoints. * - * @param btl (IN) BTL module - * @param endpoint (IN) Endpoint to flush (NULL == all) + * @param[IN] btl BTL module + * @param[IN] endpoint Endpoint to flush (NULL == all) * * This function returns when all outstanding RDMA (put, get, atomic) operations * that were started prior to the flush call have completed. This call does @@ -1225,7 +1239,14 @@ struct mca_btl_base_module_t { mca_btl_base_module_flush_fn_t btl_flush; /**< flush all previous operations on an endpoint */ - unsigned char padding[256]; /**< padding to future-proof the btl module */ + + union { + struct { + void *btl_am_data; + }; + unsigned char padding[256]; /**< padding to future-proof the + btl module */ + }; }; typedef struct mca_btl_base_module_t mca_btl_base_module_t; diff --git a/opal/mca/btl/ofi/btl_ofi_component.c b/opal/mca/btl/ofi/btl_ofi_component.c index 8d9f3f4025e..e2f5512ae93 100644 --- a/opal/mca/btl/ofi/btl_ofi_component.c +++ b/opal/mca/btl/ofi/btl_ofi_component.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * - * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -123,6 +123,7 @@ static int validate_info(struct fi_info *info, uint64_t required_caps, char **in /* Register the MCA parameters */ static int mca_btl_ofi_component_register(void) { + int ret; char *msg; mca_btl_ofi_module_t *module = &mca_btl_ofi_module_template; @@ -191,7 +192,10 @@ static int mca_btl_ofi_component_register(void) /* for now we want this component to lose to the MTL. */ module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 50; - opal_common_ofi_register_mca_variables(&mca_btl_ofi_component.super.btl_version); + ret = opal_common_ofi_mca_register(&mca_btl_ofi_component.super.btl_version); + if (OPAL_SUCCESS != ret) { + return ret; + } return mca_btl_base_param_register(&mca_btl_ofi_component.super.btl_version, &module->super); } @@ -199,7 +203,7 @@ static int mca_btl_ofi_component_register(void) static int mca_btl_ofi_component_open(void) { mca_btl_ofi_component.module_count = 0; - return OPAL_SUCCESS; + return opal_common_ofi_open(); } /* @@ -207,10 +211,11 @@ static int mca_btl_ofi_component_open(void) */ static int mca_btl_ofi_component_close(void) { - opal_common_ofi_mca_deregister(); + int ret; + ret = opal_common_ofi_close(); /* If we don't sleep, sockets provider freaks out. Ummm this is a scary comment */ sleep(1); - return OPAL_SUCCESS; + return ret; } void mca_btl_ofi_exit(void) @@ -258,8 +263,6 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules, struct fi_domain_attr domain_attr = {0}; uint64_t required_caps; - opal_common_ofi_mca_register(); - switch (mca_btl_ofi_component.mode) { case MCA_BTL_OFI_MODE_TWO_SIDED: @@ -443,6 +446,19 @@ static int mca_btl_ofi_init_device(struct fi_info *info) * to prevent races. */ mca_btl_ofi_rcache_init(module); + /* for similar reasons to the rcache call, this must be called + * during single threaded part of the code and before Libfabric + * configures its memory monitors. Easiest to do that before + * domain open. Silently ignore not-supported errors, as they + * are not critical to program correctness, but only indicate + * that LIbfabric will have to pick a different, possibly less + * optimial, monitor. */ + rc = opal_common_ofi_export_memory_monitor(); + if (0 != rc && -FI_ENOSYS != rc) { + BTL_VERBOSE(("Failed to inject Libfabric memory monitor: %s", + fi_strerror(-rc))); + } + linux_device_name = info->domain_attr->name; BTL_VERBOSE( ("initializing dev:%s provider:%s", linux_device_name, info->fabric_attr->prov_name)); diff --git a/opal/mca/btl/ofi/btl_ofi_module.c b/opal/mca/btl/ofi/btl_ofi_module.c index cffa0c27317..e632b5f8bd2 100644 --- a/opal/mca/btl/ofi/btl_ofi_module.c +++ b/opal/mca/btl/ofi/btl_ofi_module.c @@ -390,8 +390,10 @@ mca_btl_ofi_module_t *mca_btl_ofi_module_alloc(int mode) module->super.btl_register_mem = mca_btl_ofi_register_mem; module->super.btl_deregister_mem = mca_btl_ofi_deregister_mem; + /* btl/ofi support remote completion because it required FI_DELIVERY_COMPLETE capability + */ module->super.btl_flags |= MCA_BTL_FLAGS_ATOMIC_FOPS | MCA_BTL_FLAGS_ATOMIC_OPS - | MCA_BTL_FLAGS_RDMA; + | MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION; module->super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_SWAP | MCA_BTL_ATOMIC_SUPPORTS_CSWAP diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index 899d7947bdb..775b8858bed 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -175,14 +175,14 @@ static int btl_portals4_init_interface(void) } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMEAppend (overflow list) OK for NI %d", interface)); - } - ret = mca_btl_portals4_recv_enable(portals4_btl); - if (PTL_OK != ret) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: Initialization of recv buffer failed: %d", __FILE__, __LINE__, - ret); - goto error; + ret = mca_btl_portals4_recv_enable(portals4_btl); + if (PTL_OK != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: Initialization of recv buffer failed: %d", __FILE__, __LINE__, + ret); + goto error; + } } return OPAL_SUCCESS; @@ -522,8 +522,7 @@ mca_btl_base_descriptor_t *mca_btl_portals4_prepare_src(struct mca_btl_base_modu uint8_t order, size_t reserve, size_t *size, uint32_t flags) { - struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t *) btl_base; - mca_btl_portals4_frag_t *frag; + mca_btl_portals4_frag_t *frag = NULL; size_t max_data = *size; struct iovec iov; uint32_t iov_count = 1; @@ -531,7 +530,8 @@ mca_btl_base_descriptor_t *mca_btl_portals4_prepare_src(struct mca_btl_base_modu OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "mca_btl_portals4_prepare_src NI=%d reserve=%ld size=%ld max_data=%ld\n", - portals4_btl->interface_num, reserve, *size, max_data)); + ((struct mca_btl_portals4_module_t *) btl_base)->interface_num, + reserve, *size, max_data)); if (0 != reserve || 0 != opal_convertor_need_buffers(convertor)) { OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, @@ -629,11 +629,11 @@ int mca_btl_portals4_deregister_mem(mca_btl_base_module_t *btl_base, mca_btl_base_registration_handle_t *handle) { int ret; - struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t *) btl_base; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld me_h=%d\n", - portals4_btl->interface_num, (void *) handle, handle->key, handle->me_h)); + ((struct mca_btl_portals4_module_t *) btl_base)->interface_num, + (void *) handle, handle->key, handle->me_h)); if (!PtlHandleIsEqual(handle->me_h, PTL_INVALID_HANDLE)) { ret = PtlMEUnlink(handle->me_h); diff --git a/opal/mca/btl/portals4/btl_portals4_component.c b/opal/mca/btl/portals4/btl_portals4_component.c index 364bdfb536e..c5f89eb4bec 100644 --- a/opal/mca/btl/portals4/btl_portals4_component.c +++ b/opal/mca/btl/portals4/btl_portals4_component.c @@ -636,8 +636,8 @@ int mca_btl_portals4_component_progress(void) recv_descriptor.cbdata = reg->cbdata; OPAL_OUTPUT_VERBOSE((50, opal_btl_base_framework.framework_output, - "PTL_EVENT_PUT: tag=%x base_descriptor=%p cbfunc: %lx\n", tag, - (void *) &btl_base_descriptor, (uint64_t) reg->cbfunc)); + "PTL_EVENT_PUT: tag=%x recv_descriptor=%p cbfunc: %lx\n", tag, + (void *) &recv_descriptor, (uint64_t) reg->cbfunc)); reg->cbfunc(&portals4_btl->super, &recv_descriptor); goto done; diff --git a/opal/mca/btl/self/btl_self_component.c b/opal/mca/btl/self/btl_self_component.c index 576716e4d73..875fd066bb7 100644 --- a/opal/mca/btl/self/btl_self_component.c +++ b/opal/mca/btl/self/btl_self_component.c @@ -107,6 +107,8 @@ static int mca_btl_self_component_register(void) mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_self.btl_min_rdma_pipeline_size = 0; mca_btl_self.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND; + /* for self, remote completion is local completion */ + mca_btl_self.btl_flags |= MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION; mca_btl_self.btl_bandwidth = 100; mca_btl_self.btl_latency = 0; mca_btl_base_param_register(&mca_btl_self_component.super.btl_version, &mca_btl_self); diff --git a/opal/mca/btl/sm/Makefile.am b/opal/mca/btl/sm/Makefile.am index 4439a91c598..9b19dad4f88 100644 --- a/opal/mca/btl/sm/Makefile.am +++ b/opal/mca/btl/sm/Makefile.am @@ -13,7 +13,7 @@ # Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. -# Copyright (c) 2020 Google, LLC. All rights reserved. +# Copyright (c) 2020-2021 Google, LLC. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -29,7 +29,6 @@ libmca_btl_sm_la_sources = \ btl_sm_module.c \ btl_sm.h \ btl_sm_component.c \ - btl_sm_endpoint.h \ btl_sm_fifo.h \ btl_sm_frag.c \ btl_sm_frag.h \ @@ -38,10 +37,6 @@ libmca_btl_sm_la_sources = \ btl_sm_fbox.h \ btl_sm_get.c \ btl_sm_put.c \ - btl_sm_xpmem.c \ - btl_sm_xpmem.h \ - btl_sm_knem.c \ - btl_sm_knem.h \ btl_sm_types.h \ btl_sm_virtual.h diff --git a/opal/mca/btl/sm/btl_sm.h b/opal/mca/btl/sm/btl_sm.h index 2ffa00b76d6..43e74f17d10 100644 --- a/opal/mca/btl/sm/btl_sm.h +++ b/opal/mca/btl/sm/btl_sm.h @@ -61,9 +61,6 @@ #include "opal/mca/pmix/pmix-internal.h" -#include "btl_sm_knem.h" -#include "btl_sm_xpmem.h" - BEGIN_C_DECLS #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -121,32 +118,12 @@ int mca_btl_sm_sendi(struct mca_btl_base_module_t *btl, struct mca_btl_base_endp * @param endpoint (IN) BTL addressing information * @param descriptor (IN) Description of the data to be transferred */ -#if OPAL_BTL_SM_HAVE_XPMEM -int mca_btl_sm_put_xpmem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata); -#endif - -#if OPAL_BTL_SM_HAVE_CMA -int mca_btl_sm_put_cma(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata); -#endif - -#if OPAL_BTL_SM_HAVE_KNEM -int mca_btl_sm_put_knem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata); -#endif +int mca_btl_sm_put(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, + int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata); /** * Initiate an synchronous get. @@ -155,34 +132,12 @@ int mca_btl_sm_put_knem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *end * @param endpoint (IN) BTL addressing information * @param descriptor (IN) Description of the data to be transferred */ -#if OPAL_BTL_SM_HAVE_XPMEM -int mca_btl_sm_get_xpmem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata); -#endif - -#if OPAL_BTL_SM_HAVE_CMA -int mca_btl_sm_get_cma(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata); -#endif - -#if OPAL_BTL_SM_HAVE_KNEM -int mca_btl_sm_get_knem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata); -#endif - -ino_t mca_btl_sm_get_user_ns_id(void); +int mca_btl_sm_get(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, + int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata); /** * Allocate a segment. @@ -202,6 +157,10 @@ mca_btl_base_descriptor_t *mca_btl_sm_alloc(struct mca_btl_base_module_t *btl, */ int mca_btl_sm_free(struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des); +static inline bool mca_btl_is_self_endpoint(mca_btl_base_endpoint_t *endpoint) { + return endpoint->peer_smp_rank == MCA_BTL_SM_LOCAL_RANK; +} + END_C_DECLS #endif diff --git a/opal/mca/btl/sm/btl_sm_component.c b/opal/mca/btl/sm/btl_sm_component.c index a7040296b62..ac09e25192c 100644 --- a/opal/mca/btl/sm/btl_sm_component.c +++ b/opal/mca/btl/sm/btl_sm_component.c @@ -21,7 +21,7 @@ * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. - * Copyright (c) 2019-2020 Google, Inc. All rights reserved. + * Copyright (c) 2019-2021 Google, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * @@ -41,7 +41,8 @@ #include "opal/mca/btl/sm/btl_sm_fbox.h" #include "opal/mca/btl/sm/btl_sm_fifo.h" #include "opal/mca/btl/sm/btl_sm_frag.h" -#include "opal/mca/btl/sm/btl_sm_xpmem.h" +#include "opal/mca/smsc/base/base.h" +#include "opal/mca/smsc/smsc.h" #ifdef HAVE_SYS_STAT_H # include @@ -66,20 +67,6 @@ static int mca_btl_sm_component_register(void); static mca_btl_base_module_t ** mca_btl_sm_component_init(int *num_btls, bool enable_progress_threads, bool enable_mpi_threads); -/* This enumeration is in order of preference */ -static mca_base_var_enum_value_t single_copy_mechanisms[] = { -#if OPAL_BTL_SM_HAVE_XPMEM - {.value = MCA_BTL_SM_XPMEM, .string = "xpmem"}, -#endif -#if OPAL_BTL_SM_HAVE_CMA - {.value = MCA_BTL_SM_CMA, .string = "cma"}, -#endif -#if OPAL_BTL_SM_HAVE_KNEM - {.value = MCA_BTL_SM_KNEM, .string = "knem"}, -#endif - {.value = MCA_BTL_SM_NONE, .string = "none"}, - {.value = 0, .string = NULL}}; - /* * Shared Memory (SM) component instance. */ @@ -106,8 +93,6 @@ mca_btl_sm_component_t mca_btl_sm_component = { static int mca_btl_sm_component_register(void) { - mca_base_var_enum_t *new_enum; - (void) mca_base_var_group_component_register(&mca_btl_sm_component.super.btl_version, "Enhanced shared memory byte transport later"); @@ -146,25 +131,15 @@ static int mca_btl_sm_component_register(void) MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_sm_component.memcpy_limit); -#if OPAL_BTL_SM_HAVE_XPMEM - mca_btl_sm_component.log_attach_align = 21; - (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, "log_align", - "Log base 2 of the alignment to use for xpmem " - "segments (default: 21, minimum: 12, maximum: 25)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, - MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_LOCAL, - &mca_btl_sm_component.log_attach_align); -#endif -#if OPAL_BTL_SM_HAVE_XPMEM && 64 == MCA_BTL_SM_BITNESS +#if 64 == MCA_BTL_SM_BITNESS mca_btl_sm_component.segment_size = 1 << 24; #else mca_btl_sm_component.segment_size = 1 << 22; #endif (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, "segment_size", "Maximum size of all shared " -#if OPAL_BTL_SM_HAVE_XPMEM && 64 == MCA_BTL_SM_BITNESS +#if 64 == MCA_BTL_SM_BITNESS "memory buffers (default: 16M)", #else "memory buffers (default: 4M)", @@ -212,21 +187,6 @@ static int mca_btl_sm_component_register(void) MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_sm_component.fbox_size); - (void) mca_base_var_enum_create("btl_sm_single_copy_mechanisms", single_copy_mechanisms, - &new_enum); - - /* Default to the best available mechanism (see the enumerator for ordering) */ - mca_btl_sm_component.single_copy_mechanism = single_copy_mechanisms[0].value; - (void) - mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, - "single_copy_mechanism", - "Single copy mechanism to use (defaults to best available)", - MCA_BASE_VAR_TYPE_INT, new_enum, 0, - MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_GROUP, - &mca_btl_sm_component.single_copy_mechanism); - OBJ_RELEASE(new_enum); - if (0 == access("/dev/shm", W_OK)) { mca_btl_sm_component.backing_directory = "/dev/shm"; } else { @@ -240,45 +200,19 @@ static int mca_btl_sm_component_register(void) MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_sm_component.backing_directory); -#if OPAL_BTL_SM_HAVE_KNEM - /* Currently disabling DMA mode by default; it's not clear that this is useful in all - * applications and architectures. */ - mca_btl_sm_component.knem_dma_min = 0; - (void) mca_base_component_var_register( - &mca_btl_sm_component.super.btl_version, "knem_dma_min", - "Minimum message size (in bytes) to use the knem DMA mode; " - "ignored if knem does not support DMA mode (0 = do not use the " - "knem DMA mode, default: 0)", - MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_sm_component.knem_dma_min); -#endif - mca_btl_sm.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; - if (MCA_BTL_SM_XPMEM == mca_btl_sm_component.single_copy_mechanism) { - mca_btl_sm.super.btl_eager_limit = 32 * 1024; - mca_btl_sm.super.btl_rndv_eager_limit = mca_btl_sm.super.btl_eager_limit; - mca_btl_sm.super.btl_max_send_size = mca_btl_sm.super.btl_eager_limit; - mca_btl_sm.super.btl_min_rdma_pipeline_size = INT_MAX; - } else { - mca_btl_sm.super.btl_eager_limit = 4 * 1024; - mca_btl_sm.super.btl_rndv_eager_limit = 32 * 1024; - mca_btl_sm.super.btl_max_send_size = 32 * 1024; - mca_btl_sm.super.btl_min_rdma_pipeline_size = INT_MAX; - } + mca_btl_sm.super.btl_eager_limit = 4 * 1024; + mca_btl_sm.super.btl_rndv_eager_limit = 32 * 1024; + mca_btl_sm.super.btl_max_send_size = 32 * 1024; + mca_btl_sm.super.btl_min_rdma_pipeline_size = INT_MAX; mca_btl_sm.super.btl_rdma_pipeline_send_length = mca_btl_sm.super.btl_eager_limit; mca_btl_sm.super.btl_rdma_pipeline_frag_size = mca_btl_sm.super.btl_eager_limit; mca_btl_sm.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND; - if (MCA_BTL_SM_NONE != mca_btl_sm_component.single_copy_mechanism) { - /* True single copy mechanisms should provide better bandwidth */ - mca_btl_sm.super.btl_bandwidth = 40000; /* Mbs */ - } else { - mca_btl_sm.super.btl_bandwidth = 10000; /* Mbs */ - } - + mca_btl_sm.super.btl_bandwidth = 20000; /* Mbs */ mca_btl_sm.super.btl_latency = 1; /* Microsecs */ /* Call the BTL based to register its MCA params */ @@ -302,9 +236,6 @@ static int mca_btl_sm_component_open(void) OBJ_CONSTRUCT(&mca_btl_sm_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_sm_component.pending_endpoints, opal_list_t); OBJ_CONSTRUCT(&mca_btl_sm_component.pending_fragments, opal_list_t); -#if OPAL_BTL_SM_HAVE_KNEM - mca_btl_sm.knem_fd = -1; -#endif return OPAL_SUCCESS; } @@ -323,17 +254,8 @@ static int mca_btl_sm_component_close(void) OBJ_DESTRUCT(&mca_btl_sm_component.pending_endpoints); OBJ_DESTRUCT(&mca_btl_sm_component.pending_fragments); - if (MCA_BTL_SM_XPMEM == mca_btl_sm_component.single_copy_mechanism - && NULL != mca_btl_sm_component.my_segment) { - munmap(mca_btl_sm_component.my_segment, mca_btl_sm_component.segment_size); - } - mca_btl_sm_component.my_segment = NULL; -#if OPAL_BTL_SM_HAVE_KNEM - mca_btl_sm_knem_fini(); -#endif - if (mca_btl_sm_component.mpool) { mca_btl_sm_component.mpool->mpool_finalize(mca_btl_sm_component.mpool); mca_btl_sm_component.mpool = NULL; @@ -342,157 +264,35 @@ static int mca_btl_sm_component_close(void) return OPAL_SUCCESS; } -/* - * mca_btl_sm_parse_proc_ns_user() tries to get the user namespace ID - * of the current process. - * Returns the ID of the user namespace. In the case of an error '0' is returned. - */ -ino_t mca_btl_sm_get_user_ns_id(void) -{ - struct stat buf; - - if (0 > stat("/proc/self/ns/user", &buf)) { - /* - * Something went wrong, probably an old kernel that does not support namespaces - * simply assume all processes are in the same user namespace and return 0 - */ - return 0; - } - - return buf.st_ino; -} static int mca_btl_base_sm_modex_send(void) { - union sm_modex_t modex; - int modex_size, rc; + mca_btl_sm_modex_t modex; + int modex_size; -#if OPAL_BTL_SM_HAVE_XPMEM - if (MCA_BTL_SM_XPMEM == mca_btl_sm_component.single_copy_mechanism) { - modex.xpmem.seg_id = mca_btl_sm_component.my_seg_id; - modex.xpmem.segment_base = mca_btl_sm_component.my_segment; - modex.xpmem.address_max = mca_btl_sm_component.my_address_max; + modex_size = sizeof(modex) - sizeof(modex.seg_ds); - modex_size = sizeof(modex.xpmem); - } else { -#endif - modex.other.seg_ds_size = opal_shmem_sizeof_shmem_ds(&mca_btl_sm_component.seg_ds); - memmove(&modex.other.seg_ds, &mca_btl_sm_component.seg_ds, modex.other.seg_ds_size); - modex.other.user_ns_id = mca_btl_sm_get_user_ns_id(); - /* - * If modex.other.user_ns_id is '0' something did not work out - * during user namespace detection. Assuming there are no - * namespaces available it will return '0' for all processes and - * the check later will see '0' everywhere and not disable CMA. - */ - modex_size = sizeof(modex.other); - -#if OPAL_BTL_SM_HAVE_XPMEM - } -#endif + modex.seg_ds_size = opal_shmem_sizeof_shmem_ds(&mca_btl_sm_component.seg_ds); + memmove(&modex.seg_ds, &mca_btl_sm_component.seg_ds, modex.seg_ds_size); + modex_size += modex.seg_ds_size; + int rc; OPAL_MODEX_SEND(rc, PMIX_LOCAL, &mca_btl_sm_component.super.btl_version, &modex, modex_size); return rc; } -#if OPAL_BTL_SM_HAVE_XPMEM || OPAL_BTL_SM_HAVE_CMA || OPAL_BTL_SM_HAVE_KNEM -static void mca_btl_sm_select_next_single_copy_mechanism(void) +static mca_btl_base_registration_handle_t * +mca_btl_sm_register_mem(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *base, size_t size, uint32_t flags) { - for (int i = 0; single_copy_mechanisms[i].value != MCA_BTL_SM_NONE; ++i) { - if (single_copy_mechanisms[i].value == mca_btl_sm_component.single_copy_mechanism) { - mca_btl_sm_component.single_copy_mechanism = single_copy_mechanisms[i + 1].value; - return; - } - } + return (mca_btl_base_registration_handle_t *) MCA_SMSC_CALL(register_region, base, size); } -#endif -static void mca_btl_sm_check_single_copy(void) +static int mca_btl_sm_deregister_mem_knem(struct mca_btl_base_module_t *btl, + struct mca_btl_base_registration_handle_t *handle) { -#if OPAL_BTL_SM_HAVE_XPMEM || OPAL_BTL_SM_HAVE_CMA || OPAL_BTL_SM_HAVE_KNEM - int initial_mechanism = mca_btl_sm_component.single_copy_mechanism; -#endif - -#if OPAL_BTL_SM_HAVE_XPMEM - if (MCA_BTL_SM_XPMEM == mca_btl_sm_component.single_copy_mechanism) { - /* try to create an xpmem segment for the entire address space */ - int rc = mca_btl_sm_xpmem_init(); - if (OPAL_SUCCESS != rc) { - if (MCA_BTL_SM_XPMEM == initial_mechanism) { - opal_show_help("help-btl-sm.txt", "xpmem-make-failed", true, - opal_process_info.nodename, errno, strerror(errno)); - } - - mca_btl_sm_select_next_single_copy_mechanism(); - } - } -#endif - -#if OPAL_BTL_SM_HAVE_CMA - if (MCA_BTL_SM_CMA == mca_btl_sm_component.single_copy_mechanism) { - /* Check if we have the proper permissions for CMA */ - char buffer = '0'; - bool cma_happy = false; - int fd; - - /* check system setting for current ptrace scope */ - fd = open("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY); - if (0 <= fd) { - read(fd, &buffer, 1); - close(fd); - } - - /* ptrace scope 0 will allow an attach from any of the process owner's - * processes. ptrace scope 1 limits attachers to the process tree - * starting at the parent of this process. */ - if ('0' != buffer) { -# if defined PR_SET_PTRACER - /* try setting the ptrace scope to allow attach */ - int ret = prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0); - if (0 == ret) { - cma_happy = true; - } -# endif - } else { - cma_happy = true; - } - - if (!cma_happy) { - mca_btl_sm_select_next_single_copy_mechanism(); - - if (MCA_BTL_SM_CMA == initial_mechanism) { - opal_show_help("help-btl-sm.txt", "cma-permission-denied", true, - opal_process_info.nodename); - } - } else { - /* ptrace_scope will allow CMA */ - mca_btl_sm.super.btl_get = mca_btl_sm_get_cma; - mca_btl_sm.super.btl_put = mca_btl_sm_put_cma; - } - } -#endif - -#if OPAL_BTL_SM_HAVE_KNEM - if (MCA_BTL_SM_KNEM == mca_btl_sm_component.single_copy_mechanism) { - /* mca_btl_sm_knem_init will set the appropriate get/put functions */ - int rc = mca_btl_sm_knem_init(); - if (OPAL_SUCCESS != rc) { - if (MCA_BTL_SM_KNEM == initial_mechanism) { - opal_show_help("help-btl-sm.txt", "knem requested but not available", true, - opal_process_info.nodename); - } - - /* disable single copy */ - mca_btl_sm_select_next_single_copy_mechanism(); - } - } -#endif - - if (MCA_BTL_SM_NONE == mca_btl_sm_component.single_copy_mechanism) { - mca_btl_sm.super.btl_flags &= ~MCA_BTL_FLAGS_RDMA; - mca_btl_sm.super.btl_get = NULL; - mca_btl_sm.super.btl_put = NULL; - } + MCA_SMSC_CALL(deregister_region, (void *) handle); + return OPAL_SUCCESS; } /* @@ -513,15 +313,6 @@ mca_btl_sm_component_init(int *num_btls, bool enable_progress_threads, bool enab return NULL; } -#if OPAL_BTL_SM_HAVE_XPMEM - /* limit segment alignment to be between 4k and 16M */ - if (component->log_attach_align < 12) { - component->log_attach_align = 12; - } else if (component->log_attach_align > 25) { - component->log_attach_align = 25; - } -#endif - btls = (mca_btl_base_module_t **) calloc(1, sizeof(mca_btl_base_module_t *)); if (NULL == btls) { return NULL; @@ -542,42 +333,62 @@ mca_btl_sm_component_init(int *num_btls, bool enable_progress_threads, bool enab /* no fast boxes allocated initially */ component->num_fbox_in_endpoints = 0; - mca_btl_sm_check_single_copy(); + rc = mca_smsc_base_select(); + if (OPAL_SUCCESS == rc) { + mca_btl_sm.super.btl_flags |= MCA_BTL_FLAGS_RDMA; + mca_btl_sm.super.btl_get = mca_btl_sm_get; + mca_btl_sm.super.btl_put = mca_btl_sm_put; - if (MCA_BTL_SM_XPMEM != mca_btl_sm_component.single_copy_mechanism) { - char *sm_file; + mca_btl_sm.super.btl_bandwidth = 40000; /* Mbs */ - rc = opal_asprintf(&sm_file, "%s" OPAL_PATH_SEP "sm_segment.%s.%u.%x.%d", - mca_btl_sm_component.backing_directory, opal_process_info.nodename, - geteuid(), OPAL_PROC_MY_NAME.jobid, MCA_BTL_SM_LOCAL_RANK); - if (0 > rc) { - free(btls); - return NULL; + if (mca_smsc_base_has_feature(MCA_SMSC_FEATURE_CAN_MAP)) { + mca_btl_sm.super.btl_eager_limit = 32 * 1024; + mca_btl_sm.super.btl_rndv_eager_limit = mca_btl_sm.super.btl_eager_limit; + mca_btl_sm.super.btl_max_send_size = mca_btl_sm.super.btl_eager_limit; + mca_btl_sm.super.btl_min_rdma_pipeline_size = INT_MAX; } - opal_pmix_register_cleanup(sm_file, false, false, false); - - rc = opal_shmem_segment_create(&component->seg_ds, sm_file, component->segment_size); - free(sm_file); - if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("Could not create shared memory segment")); - free(btls); - return NULL; + if (mca_smsc_base_has_feature(MCA_SMSC_FEATURE_REQUIRE_REGISTATION)) { + ssize_t handle_size = mca_smsc_base_registration_data_size(); + if (handle_size > 0) { + mca_btl_sm.super.btl_registration_handle_size = (size_t) handle_size; + mca_btl_sm.super.btl_register_mem = mca_btl_sm_register_mem; + mca_btl_sm.super.btl_deregister_mem = mca_btl_sm_deregister_mem_knem; + } else { + BTL_ERROR(("single-copy component requires registration but could not provide the " + "registration handle size")); + rc = (int) handle_size; + } } + } + if (OPAL_SUCCESS != rc) { + mca_btl_sm.super.btl_flags &= ~MCA_BTL_FLAGS_RDMA; + mca_btl_sm.super.btl_get = NULL; + mca_btl_sm.super.btl_put = NULL; + } - component->my_segment = opal_shmem_segment_attach(&component->seg_ds); - if (NULL == component->my_segment) { - BTL_VERBOSE(("Could not attach to just created shared memory segment")); - goto failed; - } - } else { - /* when using xpmem it is safe to use an anonymous segment */ - component->my_segment = mmap(NULL, component->segment_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_SHARED, -1, 0); - if ((void *) -1 == component->my_segment) { - BTL_VERBOSE(("Could not create anonymous memory segment")); - free(btls); - return NULL; - } + char *sm_file; + + rc = opal_asprintf(&sm_file, "%s" OPAL_PATH_SEP "sm_segment.%s.%u.%x.%d", + mca_btl_sm_component.backing_directory, opal_process_info.nodename, + geteuid(), OPAL_PROC_MY_NAME.jobid, MCA_BTL_SM_LOCAL_RANK); + if (0 > rc) { + free(btls); + return NULL; + } + opal_pmix_register_cleanup(sm_file, false, false, false); + + rc = opal_shmem_segment_create(&component->seg_ds, sm_file, component->segment_size); + free(sm_file); + if (OPAL_SUCCESS != rc) { + BTL_VERBOSE(("Could not create shared memory segment")); + free(btls); + return NULL; + } + + component->my_segment = opal_shmem_segment_attach(&component->seg_ds); + if (NULL == component->my_segment) { + BTL_VERBOSE(("Could not attach to just created shared memory segment")); + goto failed; } /* initialize my fifo */ @@ -599,12 +410,7 @@ mca_btl_sm_component_init(int *num_btls, bool enable_progress_threads, bool enab return btls; failed: -#if OPAL_BTL_SM_HAVE_XPMEM - if (MCA_BTL_SM_XPMEM == mca_btl_sm_component.single_copy_mechanism) { - munmap(component->my_segment, component->segment_size); - } else -#endif - opal_shmem_unlink(&component->seg_ds); + opal_shmem_unlink(&component->seg_ds); if (btls) { free(btls); @@ -630,23 +436,17 @@ void mca_btl_sm_poll_handle_frag(mca_btl_sm_hdr_t *hdr, struct mca_btl_base_endp .cbdata = reg->cbdata}; if (hdr->flags & MCA_BTL_SM_FLAG_SINGLE_COPY) { -#if OPAL_BTL_SM_HAVE_XPMEM - mca_rcache_base_registration_t *xpmem_reg; - - xpmem_reg = sm_get_registation(endpoint, hdr->sc_iov.iov_base, hdr->sc_iov.iov_len, 0, - &segments[1].seg_addr.pval); - assert(NULL != xpmem_reg); + void *ctx = MCA_SMSC_CALL(map_peer_region, endpoint->smsc_endpoint, /*flags=*/0, + hdr->sc_iov.iov_base, hdr->sc_iov.iov_len, + &segments[1].seg_addr.pval); + assert(NULL != ctx); segments[1].seg_len = hdr->sc_iov.iov_len; frag.des_segment_count = 2; /* recv upcall */ reg->cbfunc(&mca_btl_sm.super, &frag); - sm_return_registration(xpmem_reg, endpoint); -#else - BTL_ERROR(("illegal flag set in incoming fragment")); - _exit(EXIT_FAILURE); -#endif + MCA_SMSC_CALL(unmap_peer_region, ctx); } else { reg->cbfunc(&mca_btl_sm.super, &frag); } diff --git a/opal/mca/btl/sm/btl_sm_get.c b/opal/mca/btl/sm/btl_sm_get.c index dc52e7ad939..10ed71ca165 100644 --- a/opal/mca/btl/sm/btl_sm_get.c +++ b/opal/mca/btl/sm/btl_sm_get.c @@ -4,7 +4,7 @@ * reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2019 Google, Inc. All rights reserved. + * Copyright (c) 2019-2022 Google, Inc. All rights reserved. * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -18,18 +18,8 @@ #include "opal_config.h" #include "opal/mca/btl/sm/btl_sm.h" -#include "opal/mca/btl/sm/btl_sm_endpoint.h" #include "opal/mca/btl/sm/btl_sm_frag.h" -#include "opal/mca/btl/sm/btl_sm_xpmem.h" - -#if OPAL_BTL_SM_HAVE_CMA -# include - -# if OPAL_CMA_NEED_SYSCALL_DEFS -# include "opal/sys/cma.h" -# endif /* OPAL_CMA_NEED_SYSCALL_DEFS */ - -#endif +#include "opal/mca/smsc/smsc.h" /** * Initiate an synchronous get. @@ -38,131 +28,22 @@ * @param endpoint (IN) BTL addressing information * @param descriptor (IN) Description of the data to be transferred */ -#if OPAL_BTL_SM_HAVE_XPMEM -int mca_btl_sm_get_xpmem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata) -{ - mca_rcache_base_registration_t *reg; - void *rem_ptr; - - /* silence warning about unused arguments */ - (void) local_handle; - (void) remote_handle; - - reg = sm_get_registation(endpoint, (void *) (intptr_t) remote_address, size, 0, &rem_ptr); - if (OPAL_UNLIKELY(NULL == rem_ptr)) { - return OPAL_ERROR; - } - - sm_memmove(local_address, rem_ptr, size); - - sm_return_registration(reg, endpoint); - - /* always call the callback function */ - cbfunc(btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - - return OPAL_SUCCESS; -} -#endif -#if OPAL_BTL_SM_HAVE_CMA -int mca_btl_sm_get_cma(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata) +int mca_btl_sm_get(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, + int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata) { - struct iovec src_iov = {.iov_base = (void *) (intptr_t) remote_address, .iov_len = size}; - struct iovec dst_iov = {.iov_base = local_address, .iov_len = size}; - ssize_t ret; - - /* - * According to the man page : - * "On success, process_vm_readv() returns the number of bytes read and - * process_vm_writev() returns the number of bytes written. This return - * value may be less than the total number of requested bytes, if a - * partial read/write occurred. (Partial transfers apply at the - * granularity of iovec elements. These system calls won't perform a - * partial transfer that splits a single iovec element.)". - * So since we use a single iovec element, the returned size should either - * be 0 or size, and the do loop should not be needed here. - * We tried on various Linux kernels with size > 2 GB, and surprisingly, - * the returned value is always 0x7ffff000 (fwiw, it happens to be the size - * of the larger number of pages that fits a signed 32 bits integer). - * We do not know whether this is a bug from the kernel, the libc or even - * the man page, but for the time being, we do as is process_vm_readv() could - * return any value. - */ - do { - ret = process_vm_readv(endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, - 1, 0); - if (0 > ret) { - if (ESRCH == errno) { - BTL_PEER_ERROR(NULL, ("CMA read %ld, expected %lu, errno = %d\n", (long) ret, - (unsigned long) size, errno)); - return OPAL_ERROR; - } - BTL_ERROR(("CMA read %ld, expected %lu, errno = %d\n", (long) ret, (unsigned long) size, - errno)); - return OPAL_ERROR; + if (!mca_btl_is_self_endpoint(endpoint)) { + int ret = MCA_SMSC_CALL(copy_from, endpoint->smsc_endpoint, local_address, + (void *) (intptr_t) remote_address, size, remote_handle); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; } - src_iov.iov_base = (void *) ((char *) src_iov.iov_base + ret); - src_iov.iov_len -= ret; - dst_iov.iov_base = (void *) ((char *) dst_iov.iov_base + ret); - dst_iov.iov_len -= ret; - } while (0 < src_iov.iov_len); - - /* always call the callback function */ - cbfunc(btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - - return OPAL_SUCCESS; -} -#endif - -#if OPAL_BTL_SM_HAVE_KNEM -int mca_btl_sm_get_knem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata) -{ - struct knem_cmd_param_iovec recv_iovec; - struct knem_cmd_inline_copy icopy; - - /* Fill in the ioctl data fields. There's no async completion, so - we don't need to worry about getting a slot, etc. */ - recv_iovec.base = (uintptr_t) local_address; - recv_iovec.len = size; - icopy.local_iovec_array = (uintptr_t) &recv_iovec; - icopy.local_iovec_nr = 1; - icopy.remote_cookie = remote_handle->cookie; - icopy.remote_offset = remote_address - remote_handle->base_addr; - icopy.write = 0; - icopy.flags = 0; - - /* Use the DMA flag if knem supports it *and* the segment length - * is greater than the cutoff. Not that if DMA is not supported - * or the user specified 0 for knem_dma_min the knem_dma_min was - * set to UINT_MAX in mca_btl_sm_knem_init. */ - if (mca_btl_sm_component.knem_dma_min <= size) { - icopy.flags = KNEM_FLAG_DMA; - } - /* synchronous flags only, no need to specify icopy.async_status_index */ - - /* When the ioctl returns, the transfer is done and we can invoke - the btl callback and return the frag */ - if (OPAL_UNLIKELY(0 != ioctl(mca_btl_sm.knem_fd, KNEM_CMD_INLINE_COPY, &icopy))) { - return OPAL_ERROR; - } - - if (KNEM_STATUS_FAILED == icopy.current_status) { - return OPAL_ERROR; + } else { + memcpy(local_address, (void *)(uintptr_t) remote_address, size); } /* always call the callback function */ @@ -170,4 +51,3 @@ int mca_btl_sm_get_knem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *end return OPAL_SUCCESS; } -#endif diff --git a/opal/mca/btl/sm/btl_sm_knem.c b/opal/mca/btl/sm/btl_sm_knem.c deleted file mode 100644 index a005beeef05..00000000000 --- a/opal/mca/btl/sm/btl_sm_knem.c +++ /dev/null @@ -1,203 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal/mca/btl/sm/btl_sm.h" - -#if OPAL_BTL_SM_HAVE_KNEM - -# include - -# include -# include -# include -# include - -# include "opal/util/show_help.h" - -OBJ_CLASS_INSTANCE(mca_btl_sm_registration_handle_t, mca_rcache_base_registration_t, NULL, NULL); - -static int mca_btl_sm_knem_reg(void *reg_data, void *base, size_t size, - mca_rcache_base_registration_t *reg) -{ - mca_btl_sm_registration_handle_t *knem_reg = (mca_btl_sm_registration_handle_t *) reg; - struct knem_cmd_create_region knem_cr; - struct knem_cmd_param_iovec knem_iov; - - knem_iov.base = (uintptr_t) base; - knem_iov.len = size; - - knem_cr.iovec_array = (uintptr_t) &knem_iov; - knem_cr.iovec_nr = 1; - knem_cr.protection = 0; - - if (reg->access_flags & (MCA_RCACHE_ACCESS_LOCAL_WRITE | MCA_RCACHE_ACCESS_REMOTE_WRITE)) { - knem_cr.protection |= PROT_WRITE; - } - - if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_READ) { - knem_cr.protection |= PROT_READ; - } - - /* Sm will explicitly destroy this cookie */ - knem_cr.flags = 0; - if (OPAL_UNLIKELY(ioctl(mca_btl_sm.knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) { - return OPAL_ERROR; - } - - knem_reg->btl_handle.cookie = knem_cr.cookie; - knem_reg->btl_handle.base_addr = (intptr_t) base; - - return OPAL_SUCCESS; -} - -static int mca_btl_sm_knem_dereg(void *reg_data, mca_rcache_base_registration_t *reg) -{ - mca_btl_sm_registration_handle_t *knem_reg = (mca_btl_sm_registration_handle_t *) reg; - - /* NTH: explicity ignore the return code. Don't care about this cookie anymore anyway. */ - (void) ioctl(mca_btl_sm.knem_fd, KNEM_CMD_DESTROY_REGION, &knem_reg->btl_handle.cookie); - - return OPAL_SUCCESS; -} - -static mca_btl_base_registration_handle_t * -mca_btl_sm_register_mem_knem(struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, void *base, size_t size, - uint32_t flags) -{ - mca_btl_sm_t *sm_module = (mca_btl_sm_t *) btl; - mca_btl_sm_registration_handle_t *reg = NULL; - int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; - int rc; - - rc = sm_module->knem_rcache->rcache_register(sm_module->knem_rcache, base, size, 0, - access_flags, - (mca_rcache_base_registration_t **) ®); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - return NULL; - } - - return ®->btl_handle; -} - -static int mca_btl_sm_deregister_mem_knem(struct mca_btl_base_module_t *btl, - struct mca_btl_base_registration_handle_t *handle) -{ - mca_btl_sm_t *sm_module = (mca_btl_sm_t *) btl; - mca_btl_sm_registration_handle_t *reg = (mca_btl_sm_registration_handle_t - *) ((intptr_t) handle - - offsetof(mca_btl_sm_registration_handle_t, - btl_handle)); - - sm_module->knem_rcache->rcache_deregister(sm_module->knem_rcache, ®->base); - - return OPAL_SUCCESS; -} - -int mca_btl_sm_knem_init(void) -{ - mca_rcache_base_resources_t rcache_resources = {.cache_name = "sm", - .reg_data = NULL, - .sizeof_reg = sizeof( - mca_btl_sm_registration_handle_t), - .register_mem = mca_btl_sm_knem_reg, - .deregister_mem = mca_btl_sm_knem_dereg}; - struct knem_cmd_info knem_info; - int rc; - - /* Open the knem device. Try to print a helpful message if we - fail to open it. */ - mca_btl_sm.knem_fd = open("/dev/knem", O_RDWR); - if (mca_btl_sm.knem_fd < 0) { - if (EACCES == errno) { - struct stat sbuf; - if (0 != stat("/dev/knem", &sbuf)) { - sbuf.st_mode = 0; - } - opal_show_help("help-btl-sm.txt", "knem permission denied", true, - opal_process_info.nodename, sbuf.st_mode); - } else { - opal_show_help("help-btl-sm.txt", "knem fail open", true, opal_process_info.nodename, - errno, strerror(errno)); - } - - return OPAL_ERR_NOT_AVAILABLE; - } - - do { - /* Check that the ABI if kernel module running is the same - * as what we were compiled against. */ - memset(&knem_info, 0, sizeof(knem_info)); - rc = ioctl(mca_btl_sm.knem_fd, KNEM_CMD_GET_INFO, &knem_info); - if (rc < 0) { - opal_show_help("help-btl-sm.txt", "knem get ABI fail", true, opal_process_info.nodename, - errno, strerror(errno)); - break; - } - - if (KNEM_ABI_VERSION != knem_info.abi) { - opal_show_help("help-btl-sm.txt", "knem ABI mismatch", true, opal_process_info.nodename, - KNEM_ABI_VERSION, knem_info.abi); - break; - } - - if (!(mca_btl_sm_component.knem_dma_min && (knem_info.features & KNEM_FEATURE_DMA))) { - /* disable DMA */ - mca_btl_sm_component.knem_dma_min = UINT_MAX; - } - - /* TODO: add async support */ - - /* knem set up successfully */ - mca_btl_sm.super.btl_get = mca_btl_sm_get_knem; - mca_btl_sm.super.btl_put = mca_btl_sm_put_knem; - - /* knem requires registration */ - mca_btl_sm.super.btl_register_mem = mca_btl_sm_register_mem_knem; - mca_btl_sm.super.btl_deregister_mem = mca_btl_sm_deregister_mem_knem; - mca_btl_sm.super.btl_registration_handle_size = sizeof(mca_btl_base_registration_handle_t); - - mca_btl_sm.knem_rcache = mca_rcache_base_module_create("grdma", NULL, &rcache_resources); - if (NULL == mca_btl_sm.knem_rcache) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - return OPAL_SUCCESS; - } while (0); - - mca_btl_sm_knem_fini(); - - return OPAL_ERR_NOT_AVAILABLE; - ; -} - -int mca_btl_sm_knem_fini(void) -{ - if (-1 != mca_btl_sm.knem_fd) { - close(mca_btl_sm.knem_fd); - mca_btl_sm.knem_fd = -1; - } - - if (mca_btl_sm.knem_rcache) { - (void) mca_rcache_base_module_destroy(mca_btl_sm.knem_rcache); - mca_btl_sm.knem_rcache = NULL; - } - - return OPAL_SUCCESS; -} - -int mca_btl_sm_knem_progress(void) -{ - /* NTH: does nothing until async support is added */ - return OPAL_SUCCESS; -} - -#endif diff --git a/opal/mca/btl/sm/btl_sm_knem.h b/opal/mca/btl/sm/btl_sm_knem.h deleted file mode 100644 index bea93f5fc64..00000000000 --- a/opal/mca/btl/sm/btl_sm_knem.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef BTL_SM_KNEM_H -#define BTL_SM_KNEM_H - -#if OPAL_BTL_SM_HAVE_KNEM - -# include - -int mca_btl_sm_knem_init(void); -int mca_btl_sm_knem_fini(void); -int mca_btl_sm_knem_progress(void); - -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - -#endif /* BTL_SM_KNEM_H */ diff --git a/opal/mca/btl/sm/btl_sm_module.c b/opal/mca/btl/sm/btl_sm_module.c index 8720f17f317..883d2048886 100644 --- a/opal/mca/btl/sm/btl_sm_module.c +++ b/opal/mca/btl/sm/btl_sm_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, @@ -19,7 +19,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2018-2019 Triad National Security, LLC. All rights * reserved. - * Copyright (c) 2020-2021 Google, LLC. All rights reserved. + * Copyright (c) 2020-2022 Google, LLC. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * @@ -32,11 +32,10 @@ #include "opal/util/show_help.h" #include "opal/mca/btl/sm/btl_sm.h" -#include "opal/mca/btl/sm/btl_sm_endpoint.h" #include "opal/mca/btl/sm/btl_sm_fbox.h" #include "opal/mca/btl/sm/btl_sm_fifo.h" #include "opal/mca/btl/sm/btl_sm_frag.h" -#include "opal/mca/btl/sm/btl_sm_xpmem.h" +#include "opal/mca/smsc/smsc.h" #include @@ -64,27 +63,6 @@ mca_btl_sm_t mca_btl_sm = { .btl_prepare_src = sm_prepare_src, .btl_send = mca_btl_sm_send, .btl_sendi = mca_btl_sm_sendi, .btl_dump = mca_btl_base_dump, .btl_register_error = sm_register_error_cb}}; -/* - * Exit function copied from btl_usnic_util.c - * - * The following comment tells Coverity that this function does not return. - * See https://scan.coverity.com/tune. - */ - -/* coverity[+kill] */ -static void sm_btl_exit(mca_btl_sm_t *btl) -{ - if (NULL != btl && NULL != btl->error_cb) { - btl->error_cb(&btl->super, MCA_BTL_ERROR_FLAGS_FATAL, (opal_proc_t *) opal_proc_local_get(), - "The sm BTL is aborting the MPI job (via PML error callback)."); - } - - /* If the PML error callback returns (or if there wasn't one), just exit. Shrug. */ - fprintf(stderr, "*** The Open MPI sm BTL is aborting the MPI job (via exit(3)).\n"); - fflush(stderr); - exit(1); -} - static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) { mca_btl_sm_component_t *component = &mca_btl_sm_component; @@ -147,7 +125,7 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) return rc; } - if (MCA_BTL_SM_XPMEM != mca_btl_sm_component.single_copy_mechanism) { + if (!mca_smsc_base_has_feature(MCA_SMSC_FEATURE_CAN_MAP)) { /* initialize free list for buffered send fragments */ rc = opal_free_list_init(&component->sm_frags_max_send, sizeof(mca_btl_sm_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_sm_frag_t), @@ -164,20 +142,13 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) /* set flag indicating btl has been inited */ sm_btl->btl_inited = true; -#if OPAL_BTL_SM_HAVE_XPMEM - if (MCA_BTL_SM_XPMEM == mca_btl_sm_component.single_copy_mechanism) { - mca_btl_sm_component.vma_module = mca_rcache_base_vma_module_alloc(); - } -#endif - return OPAL_SUCCESS; } static int init_sm_endpoint(struct mca_btl_base_endpoint_t **ep_out, struct opal_proc_t *proc) { mca_btl_sm_component_t *component = &mca_btl_sm_component; - union sm_modex_t *modex; - ino_t my_user_ns_id; + mca_btl_sm_modex_t *modex; size_t msg_size; int rc; @@ -196,7 +167,7 @@ static int init_sm_endpoint(struct mca_btl_base_endpoint_t **ep_out, struct opal ep->peer_smp_rank = peer_local_rank; - if (peer_local_rank != MCA_BTL_SM_LOCAL_RANK) { + if (!mca_btl_is_self_endpoint(ep)) { OPAL_MODEX_RECV_IMMEDIATE(rc, &component->super.btl_version, &proc->proc_name, (void **) &modex, &msg_size); if (OPAL_SUCCESS != rc) { @@ -204,69 +175,29 @@ static int init_sm_endpoint(struct mca_btl_base_endpoint_t **ep_out, struct opal } /* attach to the remote segment */ -#if OPAL_BTL_SM_HAVE_XPMEM - if (MCA_BTL_SM_XPMEM == mca_btl_sm_component.single_copy_mechanism) { - /* always use xpmem if it is available */ - ep->segment_data.xpmem.apid = xpmem_get(modex->xpmem.seg_id, XPMEM_RDWR, - XPMEM_PERMIT_MODE, (void *) 0666); - ep->segment_data.xpmem.address_max = modex->xpmem.address_max; - (void) sm_get_registation(ep, modex->xpmem.segment_base, - mca_btl_sm_component.segment_size, MCA_RCACHE_FLAGS_PERSIST, - (void **) &ep->segment_base); - } else { -#endif + ep->smsc_endpoint = NULL; /* assume no one sided support */ + if( NULL != mca_smsc ) { + ep->smsc_endpoint = MCA_SMSC_CALL(get_endpoint, proc); + } + if (NULL == ep->smsc_endpoint) { + /* disable RDMA */ + mca_btl_sm.super.btl_get = NULL; + mca_btl_sm.super.btl_put = NULL; + mca_btl_sm.super.btl_flags &= ~MCA_BTL_FLAGS_RDMA; + } /* store a copy of the segment information for detach */ - ep->segment_data.other.seg_ds = malloc(modex->other.seg_ds_size); - if (NULL == ep->segment_data.other.seg_ds) { + ep->seg_ds = malloc(modex->seg_ds_size); + if (NULL == ep->seg_ds) { return OPAL_ERR_OUT_OF_RESOURCE; } - memcpy(ep->segment_data.other.seg_ds, &modex->other.seg_ds, modex->other.seg_ds_size); + memcpy(ep->seg_ds, &modex->seg_ds, modex->seg_ds_size); - ep->segment_base = opal_shmem_segment_attach(ep->segment_data.other.seg_ds); + ep->segment_base = opal_shmem_segment_attach(ep->seg_ds); if (NULL == ep->segment_base) { return OPAL_ERROR; } - if (MCA_BTL_SM_CMA == mca_btl_sm_component.single_copy_mechanism) { - my_user_ns_id = mca_btl_sm_get_user_ns_id(); - if (my_user_ns_id != modex->other.user_ns_id) { - mca_base_var_source_t source; - int vari; - rc = mca_base_var_find_by_name("btl_sm_single_copy_mechanism", &vari); - if (OPAL_ERROR == rc) { - return OPAL_ERROR; - } - rc = mca_base_var_get_value(vari, NULL, &source, NULL); - if (OPAL_ERROR == rc) { - return OPAL_ERROR; - } - /* - * CMA is not possible as different user namespaces are in use. - * Currently the kernel does not allow * process_vm_{read,write}v() - * for processes running in different user namespaces even if - * all involved user IDs are mapped to the same user ID. - */ - if (MCA_BASE_VAR_SOURCE_DEFAULT != source) { - /* If CMA has been explicitly selected we want to error out */ - opal_show_help("help-btl-sm.txt", "cma-different-user-namespace-error", - true, opal_process_info.nodename); - sm_btl_exit(&mca_btl_sm); - } - /* - * If CMA has been selected because it is the default or - * some fallback, this falls back even further. - */ - opal_show_help("help-btl-sm.txt", "cma-different-user-namespace-warning", true, - opal_process_info.nodename); - mca_btl_sm_component.single_copy_mechanism = MCA_BTL_SM_NONE; - mca_btl_sm.super.btl_get = NULL; - mca_btl_sm.super.btl_put = NULL; - } - } -#if OPAL_BTL_SM_HAVE_XPMEM - } -#endif OBJ_CONSTRUCT(&ep->lock, opal_mutex_t); free(modex); @@ -415,16 +346,8 @@ static int sm_finalize(struct mca_btl_base_module_t *btl) free(component->fbox_in_endpoints); component->fbox_in_endpoints = NULL; - if (MCA_BTL_SM_XPMEM != mca_btl_sm_component.single_copy_mechanism) { - opal_shmem_unlink(&mca_btl_sm_component.seg_ds); - opal_shmem_segment_detach(&mca_btl_sm_component.seg_ds); - } - -#if OPAL_BTL_SM_HAVE_XPMEM - if (NULL != mca_btl_sm_component.vma_module) { - OBJ_RELEASE(mca_btl_sm_component.vma_module); - } -#endif + opal_shmem_unlink(&mca_btl_sm_component.seg_ds); + opal_shmem_segment_detach(&mca_btl_sm_component.seg_ds); return OPAL_SUCCESS; } @@ -459,7 +382,7 @@ mca_btl_base_descriptor_t *mca_btl_sm_alloc(struct mca_btl_base_module_t *btl, MCA_BTL_SM_FRAG_ALLOC_USER(frag, endpoint); } else if (size <= mca_btl_sm.super.btl_eager_limit) { MCA_BTL_SM_FRAG_ALLOC_EAGER(frag, endpoint); - } else if (MCA_BTL_SM_XPMEM != mca_btl_sm_component.single_copy_mechanism + } else if (!mca_smsc_base_has_feature(MCA_SMSC_FEATURE_CAN_MAP) && size <= mca_btl_sm.super.btl_max_send_size) { MCA_BTL_SM_FRAG_ALLOC_MAX(frag, endpoint); } @@ -512,7 +435,7 @@ static struct mca_btl_base_descriptor_t *sm_prepare_src(struct mca_btl_base_modu struct iovec iov; /* non-contiguous data requires the convertor */ - if (MCA_BTL_SM_XPMEM != mca_btl_sm_component.single_copy_mechanism + if (!mca_smsc_base_has_feature(MCA_SMSC_FEATURE_CAN_MAP) && total_size > mca_btl_sm.super.btl_eager_limit) { MCA_BTL_SM_FRAG_ALLOC_MAX(frag, endpoint); } else { @@ -534,7 +457,7 @@ static struct mca_btl_base_descriptor_t *sm_prepare_src(struct mca_btl_base_modu frag->segments[0].seg_len = *size + reserve; } else { - if (MCA_BTL_SM_XPMEM != mca_btl_sm_component.single_copy_mechanism) { + if (!mca_smsc_base_has_feature(MCA_SMSC_FEATURE_CAN_MAP)) { if (OPAL_LIKELY(total_size <= mca_btl_sm.super.btl_eager_limit)) { MCA_BTL_SM_FRAG_ALLOC_EAGER(frag, endpoint); } else { @@ -548,10 +471,9 @@ static struct mca_btl_base_descriptor_t *sm_prepare_src(struct mca_btl_base_modu return NULL; } -#if OPAL_BTL_SM_HAVE_XPMEM - /* use xpmem to send this segment if it is above the max inline send size */ - if (OPAL_UNLIKELY(MCA_BTL_SM_XPMEM == mca_btl_sm_component.single_copy_mechanism - && total_size > (size_t) mca_btl_sm_component.max_inline_send)) { + /* use single-copy to send this segment if it is above the max inline send size */ + if (mca_smsc_base_has_feature(MCA_SMSC_FEATURE_CAN_MAP) + && total_size > (size_t) mca_btl_sm_component.max_inline_send) { /* single copy send */ frag->hdr->flags = MCA_BTL_SM_FLAG_SINGLE_COPY; @@ -564,14 +486,11 @@ static struct mca_btl_base_descriptor_t *sm_prepare_src(struct mca_btl_base_modu frag->segments[1].seg_addr.pval = data_ptr; frag->base.des_segment_count = 2; } else { -#endif /* NTH: the covertor adds some latency so we bypass it here */ memcpy((void *) ((uintptr_t) frag->segments[0].seg_addr.pval + reserve), data_ptr, *size); frag->segments[0].seg_len = total_size; -#if OPAL_BTL_SM_HAVE_XPMEM } -#endif } frag->base.order = order; @@ -588,37 +507,34 @@ static void mca_btl_sm_endpoint_constructor(mca_btl_sm_endpoint_t *ep) ep->fbox_out.fbox = NULL; } -#if OPAL_BTL_SM_HAVE_XPMEM -#endif - static void mca_btl_sm_endpoint_destructor(mca_btl_sm_endpoint_t *ep) { OBJ_DESTRUCT(&ep->pending_frags); OBJ_DESTRUCT(&ep->pending_frags_lock); -#if OPAL_BTL_SM_HAVE_XPMEM - if (MCA_BTL_SM_XPMEM == mca_btl_sm_component.single_copy_mechanism) { - mca_btl_sm_xpmem_cleanup_endpoint(ep); - } else -#endif - if (ep->segment_data.other.seg_ds) { + if (ep->seg_ds) { opal_shmem_ds_t seg_ds; /* opal_shmem_segment_detach expects a opal_shmem_ds_t and will * stomp past the end of the seg_ds if it is too small (which * ep->seg_ds probably is) */ - memcpy(&seg_ds, ep->segment_data.other.seg_ds, - opal_shmem_sizeof_shmem_ds(ep->segment_data.other.seg_ds)); - free(ep->segment_data.other.seg_ds); - ep->segment_data.other.seg_ds = NULL; + memcpy(&seg_ds, ep->seg_ds, opal_shmem_sizeof_shmem_ds(ep->seg_ds)); + free(ep->seg_ds); + ep->seg_ds = NULL; /* disconnect from the peer's segment */ opal_shmem_segment_detach(&seg_ds); } + if (ep->fbox_out.fbox) { opal_free_list_return(&mca_btl_sm_component.sm_fboxes, ep->fbox_out.fbox); } + if (ep->smsc_endpoint) { + MCA_SMSC_CALL(return_endpoint, ep->smsc_endpoint); + ep->smsc_endpoint = NULL; + } + ep->fbox_in.buffer = ep->fbox_out.buffer = NULL; ep->fbox_out.fbox = NULL; ep->segment_base = NULL; diff --git a/opal/mca/btl/sm/btl_sm_put.c b/opal/mca/btl/sm/btl_sm_put.c index c497999d7a2..6b7588ea622 100644 --- a/opal/mca/btl/sm/btl_sm_put.c +++ b/opal/mca/btl/sm/btl_sm_put.c @@ -4,7 +4,7 @@ * reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2019 Google, Inc. All rights reserved. + * Copyright (c) 2019-2022 Google, Inc. All rights reserved. * Copyright (c) 2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -18,18 +18,8 @@ #include "opal_config.h" #include "opal/mca/btl/sm/btl_sm.h" -#include "opal/mca/btl/sm/btl_sm_endpoint.h" #include "opal/mca/btl/sm/btl_sm_frag.h" -#include "opal/mca/btl/sm/btl_sm_xpmem.h" - -#if OPAL_BTL_SM_HAVE_CMA -# include - -# if OPAL_CMA_NEED_SYSCALL_DEFS -# include "opal/sys/cma.h" -# endif /* OPAL_CMA_NEED_SYSCALL_DEFS */ - -#endif +#include "opal/mca/smsc/smsc.h" /** * Initiate an synchronous put. @@ -38,111 +28,22 @@ * @param endpoint (IN) BTL addressing information * @param descriptor (IN) Description of the data to be transferred */ -#if OPAL_BTL_SM_HAVE_XPMEM -int mca_btl_sm_put_xpmem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata) -{ - mca_rcache_base_registration_t *reg; - void *rem_ptr; - - reg = sm_get_registation(endpoint, (void *) (intptr_t) remote_address, size, 0, &rem_ptr); - if (OPAL_UNLIKELY(NULL == reg)) { - return OPAL_ERROR; - } - - sm_memmove(rem_ptr, local_address, size); - - sm_return_registration(reg, endpoint); - /* always call the callback function */ - cbfunc(btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - - return OPAL_SUCCESS; -} -#endif - -#if OPAL_BTL_SM_HAVE_CMA -int mca_btl_sm_put_cma(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata) +int mca_btl_sm_put(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, + mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, + int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, + void *cbdata) { - struct iovec src_iov = {.iov_base = local_address, .iov_len = size}; - struct iovec dst_iov = {.iov_base = (void *) (intptr_t) remote_address, .iov_len = size}; - ssize_t ret; - - /* This should not be needed, see the rationale in mca_btl_sm_get_cma() */ - do { - ret = process_vm_writev(endpoint->segment_data.other.seg_ds->seg_cpid, &src_iov, 1, - &dst_iov, 1, 0); - if (0 > ret) { - if (ESRCH == errno) { - BTL_PEER_ERROR(NULL, ("CMA wrote %ld, expected %lu, errno = %d\n", (long) ret, - (unsigned long) size, errno)); - return OPAL_ERROR; - } - BTL_ERROR(("CMA wrote %ld, expected %lu, errno = %d\n", (long) ret, - (unsigned long) size, errno)); - return OPAL_ERROR; + if (!mca_btl_is_self_endpoint(endpoint)) { + int ret = MCA_SMSC_CALL(copy_to, endpoint->smsc_endpoint, local_address, + (void *) (intptr_t) remote_address, size, remote_handle); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; } - src_iov.iov_base = (void *) ((char *) src_iov.iov_base + ret); - src_iov.iov_len -= ret; - dst_iov.iov_base = (void *) ((char *) dst_iov.iov_base + ret); - dst_iov.iov_len -= ret; - } while (0 < src_iov.iov_len); - - /* always call the callback function */ - cbfunc(btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - - return OPAL_SUCCESS; -} -#endif - -#if OPAL_BTL_SM_HAVE_KNEM -int mca_btl_sm_put_knem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata) -{ - struct knem_cmd_param_iovec send_iovec; - struct knem_cmd_inline_copy icopy; - - /* Fill in the ioctl data fields. There's no async completion, so - we don't need to worry about getting a slot, etc. */ - send_iovec.base = (uintptr_t) local_address; - send_iovec.len = size; - icopy.local_iovec_array = (uintptr_t) &send_iovec; - icopy.local_iovec_nr = 1; - icopy.remote_cookie = remote_handle->cookie; - icopy.remote_offset = remote_address - remote_handle->base_addr; - icopy.write = 1; - icopy.flags = 0; - - /* Use the DMA flag if knem supports it *and* the segment length - * is greater than the cutoff. Not that if DMA is not supported - * or the user specified 0 for knem_dma_min the knem_dma_min was - * set to UINT_MAX in mca_btl_sm_knem_init. */ - if (mca_btl_sm_component.knem_dma_min <= size) { - icopy.flags = KNEM_FLAG_DMA; - } - /* synchronous flags only, no need to specify icopy.async_status_index */ - - /* When the ioctl returns, the transfer is done and we can invoke - the btl callback and return the frag */ - if (OPAL_UNLIKELY(0 != ioctl(mca_btl_sm.knem_fd, KNEM_CMD_INLINE_COPY, &icopy))) { - return OPAL_ERROR; - } - - if (KNEM_STATUS_FAILED == icopy.current_status) { - return OPAL_ERROR; + } else { + memcpy((void *)(uintptr_t) remote_address, local_address, size); } /* always call the callback function */ @@ -150,4 +51,3 @@ int mca_btl_sm_put_knem(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *end return OPAL_SUCCESS; } -#endif diff --git a/opal/mca/btl/sm/btl_sm_types.h b/opal/mca/btl/sm/btl_sm_types.h index 7ca79d0095a..c9e5803c5a7 100644 --- a/opal/mca/btl/sm/btl_sm_types.h +++ b/opal/mca/btl/sm/btl_sm_types.h @@ -35,49 +35,19 @@ #include "opal_config.h" #include "opal/class/opal_free_list.h" #include "opal/mca/btl/btl.h" - -#if OPAL_BTL_SM_HAVE_XPMEM - -# if defined(HAVE_XPMEM_H) -# include - -typedef struct xpmem_addr xpmem_addr_t; -# elif defined(HAVE_SN_XPMEM_H) -# include - -typedef int64_t xpmem_segid_t; -typedef int64_t xpmem_apid_t; -# endif -#endif +#include "opal/mca/smsc/smsc.h" /* * Modex data */ -union sm_modex_t { -#if OPAL_BTL_SM_HAVE_XPMEM - struct sm_modex_xpmem_t { - xpmem_segid_t seg_id; - void *segment_base; - uintptr_t address_max; - } xpmem; -#endif - struct sm_modex_other_t { - ino_t user_ns_id; - int seg_ds_size; - /* seg_ds needs to be the last element */ - opal_shmem_ds_t seg_ds; - } other; +struct mca_btl_sm_modex_t { + uint64_t segment_base; + int seg_ds_size; + /* seg_ds needs to be the last element */ + opal_shmem_ds_t seg_ds; }; -/** - * Single copy mechanisms - */ -enum { - MCA_BTL_SM_XPMEM = 0, - MCA_BTL_SM_CMA = 1, - MCA_BTL_SM_KNEM = 2, - MCA_BTL_SM_NONE = 3, -}; +typedef struct mca_btl_sm_modex_t mca_btl_sm_modex_t; typedef struct mca_btl_base_endpoint_t { opal_list_item_t super; @@ -109,18 +79,9 @@ typedef struct mca_btl_base_endpoint_t { opal_mutex_t lock; /**< lock to protect endpoint structures from concurrent * access */ - union { -#if OPAL_BTL_SM_HAVE_XPMEM - struct { - xpmem_apid_t apid; /**< xpmem apid for remote peer */ - uintptr_t address_max; /**< largest address that can be attached */ - } xpmem; -#endif - struct { - pid_t pid; /**< pid of remote peer (used for CMA) */ - opal_shmem_ds_t *seg_ds; /**< stored segment information for detach */ - } other; - } segment_data; + mca_smsc_endpoint_t *smsc_endpoint; + void *smsc_map_context; + opal_shmem_ds_t *seg_ds; /**< stored segment information for detach */ opal_mutex_t pending_frags_lock; /**< protect pending_frags */ opal_list_t pending_frags; /**< fragments pending fast box space */ @@ -139,11 +100,6 @@ struct mca_btl_sm_component_t { int sm_free_list_num; /**< initial size of free lists */ int sm_free_list_max; /**< maximum size of free lists */ int sm_free_list_inc; /**< number of elements to alloc when growing free lists */ -#if OPAL_BTL_SM_HAVE_XPMEM - xpmem_segid_t my_seg_id; /**< this rank's xpmem segment id */ - uintptr_t my_address_max; /**< largest address */ - mca_rcache_base_vma_module_t *vma_module; /**< registration cache for xpmem segments */ -#endif opal_shmem_ds_t seg_ds; /**< this rank's shared memory segment (when not using xpmem) */ opal_mutex_t lock; /**< lock to protect concurrent updates to this structure's members */ @@ -163,7 +119,6 @@ struct mca_btl_sm_component_t { int single_copy_mechanism; /**< single copy mechanism to use */ int memcpy_limit; /**< Limit where we switch from memmove to memcpy */ - int log_attach_align; /**< Log of the alignment for xpmem segments */ unsigned int max_inline_send; /**< Limit for copy-in-copy-out fragments */ mca_btl_base_endpoint_t @@ -177,10 +132,6 @@ struct mca_btl_sm_component_t { char *backing_directory; /**< directory to place shared memory backing files */ - /* knem stuff */ -#if OPAL_BTL_SM_HAVE_KNEM - unsigned int knem_dma_min; /**< minimum size to enable DMA for knem transfers (0 disables) */ -#endif mca_mpool_base_module_t *mpool; }; typedef struct mca_btl_sm_component_t mca_btl_sm_component_t; @@ -192,12 +143,6 @@ struct mca_btl_sm_t { mca_btl_base_module_t super; /**< base BTL interface */ bool btl_inited; /**< flag indicating if btl has been inited */ mca_btl_base_module_error_cb_fn_t error_cb; -#if OPAL_BTL_SM_HAVE_KNEM - int knem_fd; - - /* registration cache */ - mca_rcache_base_module_t *knem_rcache; -#endif }; typedef struct mca_btl_sm_t mca_btl_sm_t; @@ -278,18 +223,6 @@ typedef struct mca_btl_sm_frag_t mca_btl_sm_frag_t; OBJ_CLASS_DECLARATION(mca_btl_sm_frag_t); -/* At this time only knem requires a registration of "RDMA" buffers */ -struct mca_btl_base_registration_handle_t { - uint64_t cookie; - intptr_t base_addr; -}; - -struct mca_btl_sm_registration_handle_t { - mca_rcache_base_registration_t base; - mca_btl_base_registration_handle_t btl_handle; -}; -typedef struct mca_btl_sm_registration_handle_t mca_btl_sm_registration_handle_t; - /** FIFO types **/ typedef opal_atomic_intptr_t atomic_fifo_value_t; typedef intptr_t fifo_value_t; diff --git a/opal/mca/btl/sm/btl_sm_xpmem.c b/opal/mca/btl/sm/btl_sm_xpmem.c deleted file mode 100644 index eda6fbd85a2..00000000000 --- a/opal/mca/btl/sm/btl_sm_xpmem.c +++ /dev/null @@ -1,249 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2020 Google, LLC. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal/mca/btl/sm/btl_sm.h" - -#include "opal/include/opal/align.h" -#include "opal/mca/memchecker/base/base.h" - -#if OPAL_BTL_SM_HAVE_XPMEM - -int mca_btl_sm_xpmem_init(void) -{ - /* Any attachment that goes past the Linux TASK_SIZE will always fail. To prevent this we need - * to determine the value of TASK_SIZE. On x86_64 the value was hard-coded in sm to be - * 0x7ffffffffffful but this approach does not work with AARCH64 (and possibly other - * architectures). Since there is really no way to directly determine the value we can (in all - * cases?) look through the mapping for this process to determine what the largest address is. - * This should be the top of the stack. No heap allocations should be larger than this value. - * Since the largest address may differ between processes the value must be shared as part of - * the modex and stored in the endpoint. */ - FILE *fh = fopen("/proc/self/maps", "r"); - if (NULL == fh) { - BTL_ERROR(("could not open /proc/self/maps for reading. disabling XPMEM")); - return OPAL_ERR_NOT_AVAILABLE; - } - - char buffer[1024]; - uintptr_t address_max = 0; - while (fgets(buffer, sizeof(buffer), fh)) { - uintptr_t low, high; - char *tmp; - /* each line of /proc/self/maps starts with low-high in hexidecimal (without a 0x) */ - low = strtoul(buffer, &tmp, 16); - high = strtoul(tmp + 1, NULL, 16); - if (address_max < high) { - address_max = high; - } - } - - fclose(fh); - - if (0 == address_max) { - BTL_ERROR(("could not determine the address max")); - return OPAL_ERR_NOT_AVAILABLE; - } - - /* save the calcuated maximum */ - mca_btl_sm_component.my_address_max = address_max - 1; - - /* it is safe to use XPMEM_MAXADDR_SIZE here (which is always (size_t)-1 even though - * it is not safe for attach */ - mca_btl_sm_component.my_seg_id = xpmem_make(0, XPMEM_MAXADDR_SIZE, XPMEM_PERMIT_MODE, - (void *) 0666); - if (-1 == mca_btl_sm_component.my_seg_id) { - return OPAL_ERR_NOT_AVAILABLE; - } - - mca_btl_sm.super.btl_get = mca_btl_sm_get_xpmem; - mca_btl_sm.super.btl_put = mca_btl_sm_put_xpmem; - - return OPAL_SUCCESS; -} - -struct sm_check_reg_ctx_t { - mca_btl_base_endpoint_t *ep; - mca_rcache_base_registration_t **reg; - uintptr_t base; - uintptr_t bound; -}; -typedef struct sm_check_reg_ctx_t sm_check_reg_ctx_t; - -static int sm_check_reg(mca_rcache_base_registration_t *reg, void *ctx) -{ - sm_check_reg_ctx_t *sm_ctx = (sm_check_reg_ctx_t *) ctx; - - if ((intptr_t) reg->alloc_base != sm_ctx->ep->peer_smp_rank) { - /* ignore this registration */ - return OPAL_SUCCESS; - } - - sm_ctx->reg[0] = reg; - - if (sm_ctx->bound <= (uintptr_t) reg->bound && sm_ctx->base >= (uintptr_t) reg->base) { - if (0 == opal_atomic_fetch_add_32(®->ref_count, 1)) { - /* registration is being deleted by a thread in sm_return_registration. the - * VMA tree implementation will block in mca_rcache_delete until we finish - * iterating over the VMA tree so it is safe to just ignore this registration - * and continue. */ - sm_ctx->reg[0] = NULL; - return OPAL_SUCCESS; - } - return 1; - } - - if (MCA_RCACHE_FLAGS_INVALID & opal_atomic_fetch_or_32(®->flags, MCA_RCACHE_FLAGS_INVALID)) { - /* another thread has already marked this registration as invalid. ignore and continue. */ - sm_ctx->reg[0] = NULL; - return OPAL_SUCCESS; - } - - /* let the caller know we found an overlapping registration that can be coalesced into - * the requested interval. the caller will remove the last reference and delete the - * registration. */ - return 2; -} - -void sm_return_registration(mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep) -{ - mca_rcache_base_vma_module_t *vma_module = mca_btl_sm_component.vma_module; - int32_t ref_count; - - ref_count = opal_atomic_add_fetch_32(®->ref_count, -1); - if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) { -# if OPAL_ENABLE_DEBUG - int ret = mca_rcache_base_vma_delete(vma_module, reg); - assert(OPAL_SUCCESS == ret); -# else - (void) mca_rcache_base_vma_delete(vma_module, reg); -# endif - opal_memchecker_base_mem_noaccess(reg->rcache_context, (uintptr_t)(reg->bound - reg->base)); - (void) xpmem_detach(reg->rcache_context); - OBJ_RELEASE(reg); - } -} - -/* look up the remote pointer in the peer rcache and attach if - * necessary */ -mca_rcache_base_registration_t *sm_get_registation(struct mca_btl_base_endpoint_t *ep, - void *rem_ptr, size_t size, int flags, - void **local_ptr) -{ - mca_rcache_base_vma_module_t *vma_module = mca_btl_sm_component.vma_module; - uint64_t attach_align = 1 << mca_btl_sm_component.log_attach_align; - mca_rcache_base_registration_t *reg = NULL; - sm_check_reg_ctx_t check_ctx = {.ep = ep, .reg = ®}; - xpmem_addr_t xpmem_addr; - uintptr_t base, bound; - int rc; - - base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t); - bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1; - if (OPAL_UNLIKELY(bound > ep->segment_data.xpmem.address_max)) { - bound = ep->segment_data.xpmem.address_max; - } - - check_ctx.base = base; - check_ctx.bound = bound; - - /* several segments may match the base pointer */ - rc = mca_rcache_base_vma_iterate(vma_module, (void *) base, bound - base, true, sm_check_reg, - &check_ctx); - if (2 == rc) { - bound = bound < (uintptr_t) reg->bound ? (uintptr_t) reg->bound : bound; - base = base > (uintptr_t) reg->base ? (uintptr_t) reg->base : base; - sm_return_registration(reg, ep); - reg = NULL; - } - - if (NULL == reg) { - reg = OBJ_NEW(mca_rcache_base_registration_t); - if (OPAL_LIKELY(NULL != reg)) { - /* stick around for awhile */ - reg->ref_count = 2; - reg->base = (unsigned char *) base; - reg->bound = (unsigned char *) bound; - reg->flags = flags; - reg->alloc_base = (void *) (intptr_t) ep->peer_smp_rank; - -# if defined(HAVE_SN_XPMEM_H) - xpmem_addr.id = ep->segment_data.xpmem.apid; -# else - xpmem_addr.apid = ep->segment_data.xpmem.apid; -# endif - xpmem_addr.offset = base; - - reg->rcache_context = xpmem_attach(xpmem_addr, bound - base, NULL); - if (OPAL_UNLIKELY((void *) -1 == reg->rcache_context)) { - OBJ_RELEASE(reg); - return NULL; - } - - opal_memchecker_base_mem_defined(reg->rcache_context, bound - base); - - if (!(flags & MCA_RCACHE_FLAGS_PERSIST)) { - mca_rcache_base_vma_insert(vma_module, reg, 0); - } - } - } - - opal_atomic_wmb(); - *local_ptr = (void *) ((uintptr_t) reg->rcache_context - + (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base)); - - return reg; -} - -struct sm_cleanup_reg_ctx { - mca_btl_sm_endpoint_t *ep; - opal_list_t *registrations; -}; - -static int mca_btl_sm_endpoint_xpmem_rcache_cleanup(mca_rcache_base_registration_t *reg, void *ctx) -{ - struct sm_cleanup_reg_ctx *cleanup_ctx = (struct sm_cleanup_reg_ctx *) ctx; - if ((intptr_t) reg->alloc_base == cleanup_ctx->ep->peer_smp_rank) { - opal_list_append(cleanup_ctx->registrations, ®->super.super); - } - - return OPAL_SUCCESS; -} - -void mca_btl_sm_xpmem_cleanup_endpoint(struct mca_btl_base_endpoint_t *ep) -{ - mca_rcache_base_registration_t *reg; - opal_list_t registrations; - struct sm_cleanup_reg_ctx cleanup_ctx = {.ep = ep, .registrations = ®istrations}; - - OBJ_CONSTRUCT(®istrations, opal_list_t); - - /* clean out the registration cache */ - (void) mca_rcache_base_vma_iterate(mca_btl_sm_component.vma_module, NULL, (size_t) -1, true, - mca_btl_sm_endpoint_xpmem_rcache_cleanup, - (void *) &cleanup_ctx); - while (NULL - != (reg = (mca_rcache_base_registration_t *) opal_list_remove_first(®istrations))) { - sm_return_registration(reg, ep); - } - OBJ_DESTRUCT(®istrations); - - if (ep->segment_base) { - xpmem_release(ep->segment_data.xpmem.apid); - ep->segment_data.xpmem.apid = 0; - } -} - -#endif /* OPAL_BTL_SM_HAVE_XPMEM */ diff --git a/opal/mca/btl/sm/btl_sm_xpmem.h b/opal/mca/btl/sm/btl_sm_xpmem.h deleted file mode 100644 index 42dbb1bca94..00000000000 --- a/opal/mca/btl/sm/btl_sm_xpmem.h +++ /dev/null @@ -1,41 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2016 ARM, Inc. All rights reserved. - * Copyright (c) 2020 Google, LLC. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_SM_XPMEM_H -#define MCA_BTL_SM_XPMEM_H - -#include "opal_config.h" -#include "opal/mca/btl/sm/btl_sm_types.h" -#include "opal/mca/rcache/base/rcache_base_vma.h" -#include "opal/mca/rcache/rcache.h" - -#if OPAL_BTL_SM_HAVE_XPMEM - -/* look up the remote pointer in the peer rcache and attach if - * necessary */ - -struct mca_btl_base_endpoint_t; - -int mca_btl_sm_xpmem_init(void); - -mca_rcache_base_registration_t *sm_get_registation(struct mca_btl_base_endpoint_t *endpoint, - void *rem_ptr, size_t size, int flags, - void **local_ptr); - -void sm_return_registration(mca_rcache_base_registration_t *reg, - struct mca_btl_base_endpoint_t *endpoint); -void mca_btl_sm_xpmem_cleanup_endpoint(struct mca_btl_base_endpoint_t *ep); - -#endif /* OPAL_BTL_SM_HAVE_XPMEM */ - -#endif /* MCA_BTL_SM_XPMEM_H */ diff --git a/opal/mca/btl/sm/configure.m4 b/opal/mca/btl/sm/configure.m4 index f1eea2710dc..2edbdad5d8a 100644 --- a/opal/mca/btl/sm/configure.m4 +++ b/opal/mca/btl/sm/configure.m4 @@ -21,25 +21,6 @@ AC_DEFUN([MCA_opal_btl_sm_CONFIG],[ AC_CONFIG_FILES([opal/mca/btl/sm/Makefile]) - OPAL_VAR_SCOPE_PUSH([btl_sm_xpmem_happy btl_sm_cma_happy btl_sm_knem_happy]) - - # Check for single-copy APIs - - OPAL_CHECK_XPMEM([btl_sm], [btl_sm_xpmem_happy=1], [btl_sm_xpmem_happy=0]) - OPAL_CHECK_KNEM([btl_sm], [btl_sm_knem_happy=1],[btl_sm_knem_happy=0]) - OPAL_CHECK_CMA([btl_sm], [AC_CHECK_HEADER([sys/prctl.h]) btl_sm_cma_happy=1], [btl_sm_cma_happy=0]) - - AC_DEFINE_UNQUOTED([OPAL_BTL_SM_HAVE_XPMEM], [$btl_sm_xpmem_happy], - [If XPMEM support can be enabled within sm]) - - AC_DEFINE_UNQUOTED([OPAL_BTL_SM_HAVE_CMA], [$btl_sm_cma_happy], - [If CMA support can be enabled within sm]) - - AC_DEFINE_UNQUOTED([OPAL_BTL_SM_HAVE_KNEM], [$btl_sm_knem_happy], - [If KNEM support can be enabled within sm]) - - OPAL_VAR_SCOPE_POP - # always happy [$1] diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index 2c5318302d4..e68b82690ed 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -644,8 +644,8 @@ static int mca_btl_tcp_create(const int if_kindex, const char *if_name) */ static char **split_and_resolve(char **orig_str, char *name, bool reqd) { - int i, ret, save, if_index; - char **argv, *str, *tmp; + int i, n, ret, if_index, match_count, interface_count; + char **argv, **interfaces, *str, *tmp; char if_name[OPAL_IF_NAMESIZE]; struct sockaddr_storage argv_inaddr, if_inaddr; uint32_t argv_prefix; @@ -659,9 +659,22 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) if (NULL == argv) { return NULL; } - for (save = i = 0; NULL != argv[i]; ++i) { + interface_count = 0; + interfaces = NULL; + for (i = 0; NULL != argv[i]; ++i) { if (isalpha(argv[i][0])) { - argv[save++] = argv[i]; + /* This is an interface name. If not already in the interfaces array, add it */ + for (n = 0; n < interface_count; n++) { + if (0 == strcmp(argv[i], interfaces[n])) { + break; + } + } + if (n == interface_count) { + opal_output_verbose(20, + opal_btl_base_framework.framework_output, + "btl: tcp: Using interface: %s ", argv[i]); + opal_argv_append(&interface_count, &interfaces, argv[i]); + } continue; } @@ -698,16 +711,37 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) opal_net_get_hostname((struct sockaddr *) &argv_inaddr), argv_prefix); /* Go through all interfaces and see if we can find a match */ - for (if_index = opal_ifbegin(); if_index >= 0; if_index = opal_ifnext(if_index)) { - opal_ifindextoaddr(if_index, (struct sockaddr *) &if_inaddr, sizeof(if_inaddr)); - if (opal_net_samenetwork((struct sockaddr *) &argv_inaddr, - (struct sockaddr *) &if_inaddr, argv_prefix)) { - break; + match_count = 0; + for (if_index = opal_ifbegin(); if_index >= 0; + if_index = opal_ifnext(if_index)) { + opal_ifindextoaddr(if_index, + (struct sockaddr*) &if_inaddr, + sizeof(if_inaddr)); + if (opal_net_samenetwork((struct sockaddr*) &argv_inaddr, + (struct sockaddr*) &if_inaddr, + argv_prefix)) { + /* We found a match. If it's not already in the interfaces array, + add it. If it's already in the array, treat it as a match */ + match_count = match_count + 1; + opal_ifindextoname(if_index, if_name, sizeof(if_name)); + for (n = 0; n < interface_count; n++) { + if (0 == strcmp(if_name, interfaces[n])) { + break; + } + } + if (n == interface_count) { + opal_output_verbose(20, + opal_btl_base_framework.framework_output, + "btl: tcp: Found match: %s (%s)", + opal_net_get_hostname((struct sockaddr*) &if_inaddr), + if_name); + opal_argv_append(&interface_count, &interfaces, if_name); + } } } /* If we didn't find a match, keep trying */ - if (if_index < 0) { + if (0 == match_count) { if (reqd || mca_btl_tcp_component.report_all_unfound_interfaces) { opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude", true, name, opal_process_info.nodename, tmp, @@ -717,22 +751,17 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) continue; } - /* We found a match; get the name and replace it in the - argv */ - opal_ifindextoname(if_index, if_name, sizeof(if_name)); - opal_output_verbose(20, opal_btl_base_framework.framework_output, - "btl: tcp: Found match: %s (%s)", - opal_net_get_hostname((struct sockaddr *) &if_inaddr), if_name); - argv[save++] = strdup(if_name); free(tmp); } - /* The list may have been compressed if there were invalid - entries, so ensure we end it with a NULL entry */ - argv[save] = NULL; + /* Mark the end of the interface name array with NULL */ + if (NULL != interfaces) { + interfaces[interface_count] = NULL; + } + free(argv); free(*orig_str); - *orig_str = opal_argv_join(argv, ','); - return argv; + *orig_str = opal_argv_join(interfaces, ','); + return interfaces; } /* diff --git a/opal/mca/btl/uct/btl_uct_component.c b/opal/mca/btl/uct/btl_uct_component.c index 0de76088154..c68102a40e4 100644 --- a/opal/mca/btl/uct/btl_uct_component.c +++ b/opal/mca/btl/uct/btl_uct_component.c @@ -17,7 +17,7 @@ * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. - * Copyright (c) 2019-2020 Google, LLC. All rights reserved. + * Copyright (c) 2019-2021 Google, LLC. All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * @@ -43,17 +43,19 @@ #include "btl_uct_am.h" #include "btl_uct_device_context.h" +static int use_safety_valve = 0; + static int mca_btl_uct_component_register(void) { mca_btl_uct_module_t *module = &mca_btl_uct_module_template; - mca_btl_uct_component.memory_domains = "none"; + mca_btl_uct_component.memory_domains = "mlx5_0,mlx4_0"; (void) mca_base_component_var_register( &mca_btl_uct_component.super.btl_version, "memory_domains", "Comma-delimited list of memory domains of the form " "to use for communication. Memory domains MUST provide transports that " "support put, get, and amos. Special values: all (all available), none." - " (default: none)", + " (default: mlx5_0,mlx4_0)", MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_uct_component.memory_domains); @@ -143,6 +145,7 @@ static int mca_btl_uct_component_open(void) & opal_mem_hooks_support_level()))) { ucm_set_external_event(UCM_EVENT_VM_UNMAPPED); opal_mem_hooks_register_release(mca_btl_uct_mem_release_cb, NULL); + use_safety_valve = 1; } return OPAL_SUCCESS; @@ -667,3 +670,10 @@ mca_btl_uct_component_t mca_btl_uct_component = { .btl_init = mca_btl_uct_component_init, .btl_progress = mca_btl_uct_component_progress, }}; + +static void safety_valve(void) __attribute__((destructor)); +void safety_valve(void) { + if (use_safety_valve) { + opal_mem_hooks_unregister_release(mca_btl_uct_mem_release_cb); + } +} diff --git a/opal/mca/btl/uct/configure.m4 b/opal/mca/btl/uct/configure.m4 index 5e7ad3838d9..ac444db251b 100644 --- a/opal/mca/btl/uct/configure.m4 +++ b/opal/mca/btl/uct/configure.m4 @@ -51,22 +51,23 @@ dnl AC_MSG_RESULT([no]) fi - max_allowed_uct_major=1 - max_allowed_uct_minor=9 + min_allowed_uct_major=1 + min_allowed_uct_minor=9 if test "$btl_uct_happy" = "yes" && test "$enable_uct_version_check" != "no"; then AC_MSG_CHECKING([UCT version compatibility]) OPAL_VAR_SCOPE_PUSH([CPPFLAGS_save]) CPPFLAGS_save="$CPPFLAGS" CPPFLAGS="$CPPFLAGS $btl_uct_CPPFLAGS" AC_PREPROC_IFELSE([AC_LANG_PROGRAM([#include - #if (UCT_VERNO_MAJOR > $max_allowed_uct_major) - #error "UCT MAJOR VERNO > $max_allowed_uct_major" + #if (UCT_VERNO_MAJOR < $min_allowed_uct_major) + #error "UCT MAJOR VERNO < $min_allowed_uct_major" #endif - #if (UCT_VERNO_MINOR > $max_allowed_uct_minor) - #error "UCT MINOR VERNO > $max_allowed_uct_minor" + #if (UCT_VERNO_MAJOR == $min_allowed_uct_major) &&\ + (UCT_VERNO_MINOR < $min_allowed_uct_minor) + #error "UCT MINOR VERNO < $min_allowed_uct_minor" #endif], [])], [AC_MSG_RESULT([UCT version compatible])], - [AC_MSG_RESULT([UCT version not compatible - need UCX $max_allowed_uct_major.$max_allowed_uct_minor or older]) + [AC_MSG_RESULT([UCT version not compatible - need UCX $min_allowed_uct_major.$min_allowed_uct_minor or newer]) btl_uct_happy="no"]) CPPFLAGS="$CPPFLAGS_save" OPAL_VAR_SCOPE_POP diff --git a/opal/mca/btl/ugni/btl_ugni_component.c b/opal/mca/btl/ugni/btl_ugni_component.c index b9441b51b0d..5af4e396507 100644 --- a/opal/mca/btl/ugni/btl_ugni_component.c +++ b/opal/mca/btl/ugni/btl_ugni_component.c @@ -469,6 +469,7 @@ static int btl_ugni_component_register(void) mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_ATOMIC_OPS | MCA_BTL_FLAGS_ATOMIC_FOPS; + mca_btl_ugni_module.super.btl_flags |= MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION; mca_btl_ugni_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR diff --git a/opal/mca/btl/usnic/btl_usnic_cagent.c b/opal/mca/btl/usnic/btl_usnic_cagent.c index 7d821f2e591..e55aa02d603 100644 --- a/opal/mca/btl/usnic/btl_usnic_cagent.c +++ b/opal/mca/btl/usnic/btl_usnic_cagent.c @@ -47,7 +47,7 @@ static opal_event_t ipc_event; static struct timeval ack_timeout; static opal_list_t udp_port_listeners; static opal_list_t ipc_listeners; -static volatile int ipc_accepts = 0; +static volatile uint32_t ipc_accepts = 0; /* JMS The pings_pending and ping_results should probably both be hash tables for more efficient lookups */ static opal_list_t pings_pending; diff --git a/opal/mca/btl/usnic/btl_usnic_stats.c b/opal/mca/btl/usnic/btl_usnic_stats.c index 5d1e1e8a19e..3c756d7748d 100644 --- a/opal/mca/btl/usnic/btl_usnic_stats.c +++ b/opal/mca/btl/usnic/btl_usnic_stats.c @@ -380,7 +380,7 @@ static void setup_mpit_pvars_enum(void) /* Free the strings (mca_base_var_enum_create() strdup()'ed them into private storage, so we don't need them any more) */ - for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) { + for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) { free((char *) devices[i].string); } free(devices); diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c index b6826408df0..2fdc4b100e3 100644 --- a/opal/mca/common/cuda/common_cuda.c +++ b/opal/mca/common/cuda/common_cuda.c @@ -456,6 +456,7 @@ int mca_common_cuda_stage_one_init(void) #if OPAL_CUDA_GET_ATTRIBUTES OPAL_CUDA_DLSYM(libcuda_handle, cuPointerGetAttributes); #endif /* OPAL_CUDA_GET_ATTRIBUTES */ + opal_cuda_runtime_initialized = true; return 0; } diff --git a/opal/mca/common/ofi/common_ofi.c b/opal/mca/common/ofi/common_ofi.c index c68f134e5af..e081ea250d2 100644 --- a/opal/mca/common/ofi/common_ofi.c +++ b/opal/mca/common/ofi/common_ofi.c @@ -4,8 +4,10 @@ * reserved. * Copyright (c) 2020-2021 Triad National Security, LLC. All rights * reserved. - * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2020-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,28 +15,237 @@ * $HEADER$ */ + +#include "opal_config.h" + #include #include +#include +#include +#ifdef HAVE_RDMA_FI_EXT_H +#include +#endif -#include "opal_config.h" #include "common_ofi.h" #include "opal/constants.h" #include "opal/mca/base/mca_base_framework.h" #include "opal/mca/base/mca_base_var.h" #include "opal/mca/hwloc/base/base.h" +#include "opal/mca/memory/base/base.h" #include "opal/mca/pmix/base/base.h" #include "opal/util/argv.h" #include "opal/util/show_help.h" -OPAL_DECLSPEC opal_common_ofi_module_t opal_common_ofi = {.prov_include = NULL, - .prov_exclude = NULL, - .registered = 0, - .verbose = 0}; - -static const char default_prov_exclude_list[] = "shm,sockets,tcp,udp,rstream"; +opal_common_ofi_module_t opal_common_ofi = {.prov_include = NULL, + .prov_exclude = NULL, + .output = -1}; +static const char default_prov_exclude_list[] = "shm,sockets,tcp,udp,rstream,usnic"; static opal_mutex_t opal_common_ofi_mutex = OPAL_MUTEX_STATIC_INIT; +static int opal_common_ofi_verbose_level = 0; +static int opal_common_ofi_init_ref_cnt = 0; +#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR +static bool opal_common_ofi_installed_memory_monitor = false; +#endif + +#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR + +/* + * Monitor object to export into Libfabric to provide memory release + * notifications using our own memory hooks framework. Monitors may + * use the subscribe/unsubscribe notifications to reduce unnecessary + * notifications, but are not required to do so. Because patcher + * notifies about all releases, it is cheaper for us to not filter and + * this monitor can safely ignore subscribe/unsubscribe notifications. + * + * Libfabric requires the object to be fully defined. Unlike most of + * Open MPI, it does not have NULL function pointer checks in calling + * code. + */ +static int opal_common_ofi_monitor_start(struct fid_mem_monitor *monitor) +{ + return 0; +} + +static void opal_common_ofi_monitor_stop(struct fid_mem_monitor *monitor) +{ + return; +} + +static int opal_common_ofi_monitor_subscribe(struct fid_mem_monitor *monitor, + const void *addr, size_t len) +{ + return 0; +} + +static void opal_common_ofi_monitor_unsubscribe(struct fid_mem_monitor *monitor, + const void *addr, size_t len) +{ + return; +} + +static bool opal_common_ofi_monitor_valid(struct fid_mem_monitor *monitor, + const void *addr, size_t len) +{ + return true; +} + +static struct fid_mem_monitor *opal_common_ofi_monitor = NULL; +static struct fid *opal_common_ofi_cache_fid = NULL; +static struct fi_ops_mem_monitor opal_common_ofi_export_ops = { + .size = sizeof(struct fi_ops_mem_monitor), + .start = opal_common_ofi_monitor_start, + .stop = opal_common_ofi_monitor_stop, + .subscribe = opal_common_ofi_monitor_subscribe, + .unsubscribe = opal_common_ofi_monitor_unsubscribe, + .valid = opal_common_ofi_monitor_valid, +}; + +/** + * Callback function from Open MPI memory monitor + * + * Translation function between the callback function from Open MPI's + * memory notifier to the Libfabric memory monitor. + */ +static void opal_common_ofi_mem_release_cb(void *buf, size_t length, + void *cbdata, bool from_alloc) +{ + opal_common_ofi_monitor->import_ops->notify(opal_common_ofi_monitor, + buf, length); +} + +#endif /* HAVE_STRUCT_FI_OPS_MEM_MONITOR */ + +int opal_common_ofi_export_memory_monitor(void) +{ + int ret = -FI_ENOSYS; + +#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR + OPAL_THREAD_LOCK(&opal_common_ofi_mutex); + + if (NULL != opal_common_ofi_cache_fid) { + return 0; + } + + /* + * While the memory import functionality was introduced in 1.13, + * some deadlock bugs exist in the 1.13 series. Require version + * 1.14 before this code is activated. Not activating the code + * should not break any functionality directly, but may lead to + * sub-optimal memory monitors being used in Libfabric, as Open + * MPI will almost certainly install a patcher first. + */ + if (FI_VERSION_LT(fi_version(), FI_VERSION(1, 14))) { + ret = -FI_ENOSYS; + goto err; + } + + ret = mca_base_framework_open(&opal_memory_base_framework, 0); + if (OPAL_SUCCESS != ret) { + ret = -FI_ENOSYS; + goto err; + } + if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) + != (((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT)) + & opal_mem_hooks_support_level())) { + ret = -FI_ENOSYS; + goto err; + } + + /* + * The monitor import object has the well known name "mr_cache" + * and was introduced in Libfabric 1.13 + */ + ret = fi_open(FI_VERSION(1,13), "mr_cache", NULL, 0, 0, + &opal_common_ofi_cache_fid, NULL); + if (0 != ret) { + goto err; + } + + opal_common_ofi_monitor = calloc(1, sizeof(*opal_common_ofi_monitor)); + if (NULL == opal_common_ofi_monitor) { + ret = -FI_ENOMEM; + goto err; + } + + opal_common_ofi_monitor->fid.fclass = FI_CLASS_MEM_MONITOR; + opal_common_ofi_monitor->export_ops = &opal_common_ofi_export_ops; + ret = fi_import_fid(opal_common_ofi_cache_fid, + &opal_common_ofi_monitor->fid, 0); + if (0 != ret) { + goto err; + } + opal_mem_hooks_register_release(opal_common_ofi_mem_release_cb, NULL); + opal_common_ofi_installed_memory_monitor = true; + + ret = 0; + +err: + if (0 != ret) { + if (NULL != opal_common_ofi_cache_fid) { + fi_close(opal_common_ofi_cache_fid); + } + if (NULL != opal_common_ofi_monitor) { + free(opal_common_ofi_monitor); + } + + opal_common_ofi_installed_memory_monitor = false; + } + + OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex); +#endif + + return ret; +} + +static int opal_common_ofi_remove_memory_monitor(void) +{ +#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR + if (opal_common_ofi_installed_memory_monitor) { + opal_mem_hooks_unregister_release(opal_common_ofi_mem_release_cb); + fi_close(opal_common_ofi_cache_fid); + fi_close(&opal_common_ofi_monitor->fid); + free(opal_common_ofi_monitor); + opal_common_ofi_installed_memory_monitor = false; + } +#endif + + return OPAL_SUCCESS; +} + +int opal_common_ofi_open(void) +{ + if ((opal_common_ofi_init_ref_cnt++) > 0) { + return OPAL_SUCCESS; + } + + return OPAL_SUCCESS; +} -OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item) +int opal_common_ofi_close(void) +{ + int ret; + + if ((--opal_common_ofi_init_ref_cnt) > 0) { + return OPAL_SUCCESS; + } + + ret = opal_common_ofi_remove_memory_monitor(); + if (OPAL_SUCCESS != ret) { + return ret; + } + + if (-1 != opal_common_ofi.output) { + opal_output_close(opal_common_ofi.output); + opal_common_ofi.output = -1; + if (OPAL_SUCCESS != ret) { + return ret; + } + } + + return OPAL_SUCCESS; +} + +int opal_common_ofi_is_in_list(char **list, char *item) { int i = 0; @@ -53,12 +264,12 @@ OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item) return 0; } -OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_component_t *component) +int opal_common_ofi_mca_register(const mca_base_component_t *component) { - static int include_index; - static int exclude_index; - static int verbose_index; - int param; + static int include_index = -1; + static int exclude_index = -1; + static int verbose_index = -1; + int ret; if (fi_version() < FI_VERSION(1, 0)) { return OPAL_ERROR; @@ -66,8 +277,7 @@ OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_componen OPAL_THREAD_LOCK(&opal_common_ofi_mutex); - param = mca_base_var_find("opal", "opal_common", "ofi", "provider_incude"); - if (0 > param) { + if (0 > include_index) { /* * this monkey business is needed because of the way the MCA VARs stuff tries to handle * pointers to strings when when destructing the MCA var database. If you don't do @@ -86,10 +296,13 @@ OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_componen "exclusive with mtl_ofi_provider_exclude.", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_1, MCA_BASE_VAR_SCOPE_READONLY, opal_common_ofi.prov_include); + if (0 > include_index) { + ret = include_index; + goto err; + } } - param = mca_base_var_find("opal", "opal_common", "ofi", "provider_exclude"); - if (0 > param) { + if (0 > exclude_index) { if (NULL == opal_common_ofi.prov_exclude) { opal_common_ofi.prov_exclude = (char **) malloc(sizeof(char *)); assert(NULL != opal_common_ofi.prov_exclude); @@ -102,56 +315,73 @@ OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_componen "exclusive with mtl_ofi_provider_include.", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_1, MCA_BASE_VAR_SCOPE_READONLY, opal_common_ofi.prov_exclude); + if (0 > exclude_index) { + ret = exclude_index; + goto err; + } } - param = mca_base_var_find("opal", "opal_common", "ofi", "verbose"); - if (0 > param) { + if (0 > verbose_index) { verbose_index = mca_base_var_register("opal", "opal_common", "ofi", "verbose", "Verbose level of the OFI components", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, - &opal_common_ofi.verbose); + &opal_common_ofi_verbose_level); + if (0 > verbose_index) { + ret = verbose_index; + goto err; + } } if (component) { - mca_base_var_register_synonym(include_index, component->mca_project_name, - component->mca_type_name, component->mca_component_name, - "provider_include", 0); - mca_base_var_register_synonym(exclude_index, component->mca_project_name, - component->mca_type_name, component->mca_component_name, - "provider_exclude", 0); - mca_base_var_register_synonym(verbose_index, component->mca_project_name, - component->mca_type_name, component->mca_component_name, - "verbose", 0); + ret = mca_base_var_register_synonym(include_index, + component->mca_project_name, + component->mca_type_name, + component->mca_component_name, + "provider_include", 0); + if (0 > ret) { + goto err; + } + ret = mca_base_var_register_synonym(exclude_index, + component->mca_project_name, + component->mca_type_name, + component->mca_component_name, + "provider_exclude", 0); + if (0 > ret) { + goto err; + } + ret = mca_base_var_register_synonym(verbose_index, + component->mca_project_name, + component->mca_type_name, + component->mca_component_name, + "verbose", 0); + if (0 > ret) { + goto err; + } } - OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex); - - return OPAL_SUCCESS; -} - -OPAL_DECLSPEC void opal_common_ofi_mca_register(void) -{ - opal_common_ofi.registered++; - if (opal_common_ofi.registered > 1) { - opal_output_set_verbosity(opal_common_ofi.output, opal_common_ofi.verbose); - return; + /* The frameworks initialize their output streams during + * register(), so we similarly try to initialize the output stream + * as early as possible. Because we may register synonyms for + * each dependent component, we don't necessarily have all the + * data to set verbosity during the first call to + * common_ofi_register(). The MCA infrastructure has rules on + * synonym value evaluation, so our rubric is to re-set verbosity + * after every call to register() (which has registered a new + * synonym). This is not perfect, but it's not horrible, either. + */ + if (opal_common_ofi.output == -1) { + opal_common_ofi.output = opal_output_open(NULL); } + opal_output_set_verbosity(opal_common_ofi.output, opal_common_ofi_verbose_level); - opal_common_ofi.output = opal_output_open(NULL); - opal_output_set_verbosity(opal_common_ofi.output, opal_common_ofi.verbose); -} + ret = OPAL_SUCCESS; -OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void) -{ - /* unregister only on last deregister */ - opal_common_ofi.registered--; - assert(opal_common_ofi.registered >= 0); - if (opal_common_ofi.registered) { - return; - } - opal_output_close(opal_common_ofi.output); +err: + OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex); + + return ret; } /* check that the tx attributes match */ @@ -196,6 +426,7 @@ static int check_provider_attr(struct fi_info *provider_info, struct fi_info *pr { /* make sure both info are the same provider and provide the same attributes */ if (0 == strcmp(provider_info->fabric_attr->prov_name, provider->fabric_attr->prov_name) + && 0 == strcmp(provider_info->fabric_attr->name, provider->fabric_attr->name) && !check_tx_attr(provider_info->tx_attr, provider->tx_attr) && !check_rx_attr(provider_info->rx_attr, provider->rx_attr) && !check_ep_attr(provider_info->ep_attr, provider->ep_attr) @@ -362,61 +593,6 @@ static uint32_t get_package_rank(opal_process_info_t *process_info) return (uint32_t) package_ranks[process_info->my_local_rank]; } -/* Selects a NIC based on hardware locality between process cpuset and device BDF. - * - * Initializes opal_hwloc_topology to access hardware topology if not previously - * initialized - * - * There are 3 main cases that this covers: - * - * 1. If the first provider passed into this function is the only valid - * provider, this provider is returned. - * - * 2. If there is more than 1 provider that matches the type of the first - * provider in the list, and the BDF data - * is available then a provider is selected based on locality of device - * cpuset and process cpuset and tries to ensure that processes are distributed - * evenly across NICs. This has two separate cases: - * - * i. There is one or more provider local to the process: - * - * (local rank % number of providers of the same type that share the process cpuset) - * is used to select one of these providers. - * - * ii. There is no provider that is local to the process: - * - * (local rank % number of providers of the same type) - * is used to select one of these providers - * - * 3. If there is more than 1 providers of the same type in the list, and the BDF data - * is not available (the ofi version does not support fi_info.nic or the - * provider does not support BDF) then (local rank % number of providers of the same type) - * is used to select one of these providers - * - * @param provider_list (IN) struct fi_info* An initially selected - * provider NIC. The provider name and - * attributes are used to restrict NIC - * selection. This provider is returned if the - * NIC selection fails. - * - * @param package_rank (IN) uint32_t The rank of the process. Used to - * select one valid NIC if there is a case - * where more than one can be selected. This - * could occur when more than one provider - * shares the same cpuset as the process. - * This could either be a package_rank if one is - * successfully calculated, or the process id. - * - * @param provider (OUT) struct fi_info* object with the selected - * provider if the selection succeeds - * if the selection fails, returns the fi_info - * object that was initially provided. - * - * All errors should be recoverable and will return the initially provided - * provider. However, if an error occurs we can no longer guarantee - * that the provider returned is local to the process or that the processes will - * balance across available NICs. - */ struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_list, opal_process_info_t *process_info) { @@ -426,7 +602,6 @@ struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_lis struct fi_pci_attr pci; #endif int ret; - uint32_t package_rank; unsigned int num_provider = 0, provider_limit = 0; bool provider_found = false, cpusets_match = false; @@ -456,7 +631,9 @@ struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_lis if (!check_provider_attr(provider, current_provider)) { cpusets_match = false; #if OPAL_OFI_PCI_DATA_AVAILABLE - if (NULL != current_provider->nic) { + if (NULL != current_provider->nic + && NULL != current_provider->nic->bus_attr + && current_provider->nic->bus_attr->bus_type == FI_BUS_PCI) { pci = current_provider->nic->bus_attr->attr.pci; cpusets_match = compare_cpusets(opal_hwloc_topology, pci); } @@ -482,16 +659,18 @@ struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_lis } /* Select provider from local rank % number of providers */ + uint32_t package_rank = get_package_rank(process_info); if (num_provider >= 2) { // If there are multiple NICs "close" to the process, try to calculate package_rank - package_rank = get_package_rank(process_info); provider = provider_table[package_rank % num_provider]; } else if (num_provider == 1) { provider = provider_table[num_provider - 1]; } #if OPAL_OFI_PCI_DATA_AVAILABLE - if (NULL != provider->nic) { + if (NULL != provider->nic + && NULL != provider->nic->bus_attr + && provider->nic->bus_attr->bus_type == FI_BUS_PCI) { pci = provider->nic->bus_attr->attr.pci; cpusets_match = compare_cpusets(opal_hwloc_topology, pci); } diff --git a/opal/mca/common/ofi/common_ofi.h b/opal/mca/common/ofi/common_ofi.h index 2ec8ae5db10..ec21fd732b6 100644 --- a/opal/mca/common/ofi/common_ofi.h +++ b/opal/mca/common/ofi/common_ofi.h @@ -5,6 +5,8 @@ * reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights + * reserved. * * $COPYRIGHT$ * @@ -16,29 +18,74 @@ #ifndef OPAL_MCA_COMMON_OFI_H #define OPAL_MCA_COMMON_OFI_H -#include "opal_config.h" -#include "opal/mca/base/mca_base_framework.h" -#include "opal/mca/base/mca_base_var.h" #include "opal/util/proc.h" -#include +#include "opal/memoryhooks/memory.h" BEGIN_C_DECLS typedef struct opal_common_ofi_module { char **prov_include; char **prov_exclude; - int verbose; - int registered; int output; } opal_common_ofi_module_t; extern opal_common_ofi_module_t opal_common_ofi; -OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_component_t *component); -OPAL_DECLSPEC void opal_common_ofi_mca_register(void); -OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void); +/** + * Common MCA registration + * + * Common MCA registration handlinge. After calling this function, + * \code opal_common_ofi.output will be properly initialized. + * + * @param component (IN) OFI component being initialized + * + * @returns OPAL_SUCCESS on success, OPAL error code on failure + */ +OPAL_DECLSPEC int opal_common_ofi_mca_register(const mca_base_component_t *component); -/* +/** + * Initializes common objects for libfabric + * + * Initialize common libfabric interface. This should be called from + * any other OFI component's component_open() call. + * + * @note This function is not thread safe and must be called in a + * serial portion of the code. + */ +OPAL_DECLSPEC int opal_common_ofi_open(void); + +/** + * Cleans up common objects for libfabric + * + * Clean up common libfabric interface. This should be called from + * any other OFI component's component_close() call. Resource cleanup + * is reference counted, so any successful call to + * opal_common_ofi_init(). + * + * @note This function is not thread safe and must be called in a + * serial portion of the code. + */ +OPAL_DECLSPEC int opal_common_ofi_close(void); + +/** + * Export our memory hooks into Libfabric monitor + * + * Use Open MPI's memory hooks to provide monitor notifications to + * Libfabric via the external mr_cache facility. This must be called + * before any domain is initialized (ie, before any Libfabric memory + * monitor is configured). + * + * @returns A libfabric error code is returned on error + */ +OPAL_DECLSPEC int opal_common_ofi_export_memory_monitor(void); + +/** + * Search function for provider names + * + * This function will take a provider name string and a list of lower + * provider name strings as inputs. It will return true if the lower + * provider in the item string matches a lower provider in the list. + * * @param list (IN) List of strings corresponding to lower providers. * @param item (IN) Single string corresponding to a provider. * @@ -47,16 +94,63 @@ OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void); * @return 1 The lower provider of the item string matches * a string in the item list. * - * This function will take a provider name string and a list of lower - * provider name strings as inputs. It will return true if the lower - * provider in the item string matches a lower provider in the list. - * */ OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item); -END_C_DECLS +/** + * Selects NIC (provider) based on hardware locality + * + * In multi-nic situations, use hardware topology to pick the "best" + * of the selected NICs. + * There are 3 main cases that this covers: + * + * 1. If the first provider passed into this function is the only valid + * provider, this provider is returned. + * + * 2. If there is more than 1 provider that matches the type of the first + * provider in the list, and the BDF data + * is available then a provider is selected based on locality of device + * cpuset and process cpuset and tries to ensure that processes + * are distributed evenly across NICs. This has two separate + * cases: + * + * i. There is one or more provider local to the process: + * + * (local rank % number of providers of the same type + * that share the process cpuset) is used to select one + * of these providers. + * + * ii. There is no provider that is local to the process: + * + * (local rank % number of providers of the same type) + * is used to select one of these providers + * + * 3. If there is more than 1 providers of the same type in the + * list, and the BDF data is not available (the ofi version does + * not support fi_info.nic or the provider does not support BDF) + * then (local rank % number of providers of the same type) is + * used to select one of these providers + * + * @param provider_list (IN) struct fi_info* An initially selected + * provider NIC. The provider name and + * attributes are used to restrict NIC + * selection. This provider is returned if the + * NIC selection fails. + * + * @param provider (OUT) struct fi_info* object with the selected + * provider if the selection succeeds + * if the selection fails, returns the fi_info + * object that was initially provided. + * + * All errors should be recoverable and will return the initially provided + * provider. However, if an error occurs we can no longer guarantee + * that the provider returned is local to the process or that the processes will + * balance across available NICs. + * + */ +OPAL_DECLSPEC struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_list, + opal_process_info_t *process_info); -struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_list, - opal_process_info_t *process_info); +END_C_DECLS #endif /* OPAL_MCA_COMMON_OFI_H */ diff --git a/opal/mca/common/ucx/common_ucx.c b/opal/mca/common/ucx/common_ucx.c index 0d44a468972..f5a4dc15800 100644 --- a/opal/mca/common/ucx/common_ucx.c +++ b/opal/mca/common/ucx/common_ucx.c @@ -26,6 +26,8 @@ #include #include +static int use_safety_valve = 0; + /***********************************************************************/ extern mca_base_framework_t opal_memory_base_framework; @@ -45,7 +47,7 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length, void *cbdat OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component) { - static const char *default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,cuda_ipc,rocm_ipc"; + static const char *default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc"; static const char *default_devices = "mlx*"; static int hook_index; static int verbose_index; @@ -179,6 +181,7 @@ OPAL_DECLSPEC void opal_common_ucx_mca_register(void) MCA_COMMON_UCX_VERBOSE(1, "%s", "using OPAL memory hooks as external events"); ucm_set_external_event(UCM_EVENT_VM_UNMAPPED); opal_mem_hooks_register_release(opal_common_ucx_mem_release_cb, NULL); + use_safety_valve = 1; } } } @@ -216,12 +219,12 @@ static bool opal_common_ucx_check_device(const char *device_name, char **device_ "/sys/class/infiniband/%s/device/driver", ib_device_name); free(ib_device_name); - driver_path[sizeof(driver_path) - 1] = '\0'; ret = readlink(sysfs_driver_link, driver_path, sizeof(driver_path) - 1); if (ret < 0) { MCA_COMMON_UCX_VERBOSE(2, "readlink(%s) failed: %s", sysfs_driver_link, strerror(errno)); return false; } + driver_path[ret] = '\0'; /* readlink does not append \0 */ driver_name = basename(driver_path); for (list_item = device_list; *list_item != NULL; ++list_item) { @@ -259,8 +262,7 @@ OPAL_DECLSPEC opal_common_ucx_support_level_t opal_common_ucx_support_level(ucp_ /* Check for special value "any" */ if (is_any_tl && is_any_device) { - MCA_COMMON_UCX_VERBOSE(1, "ucx is enabled on any transport or device", - *opal_common_ucx.tls); + MCA_COMMON_UCX_VERBOSE(1, "ucx is enabled on any transport or device"); support_level = OPAL_COMMON_UCX_SUPPORT_DEVICE; goto out; } @@ -500,3 +502,10 @@ OPAL_DECLSPEC int opal_common_ucx_del_procs(opal_common_ucx_del_proc_t *procs, s return opal_common_ucx_mca_pmix_fence(worker); } + +static void safety_valve(void) __attribute__((destructor)); +void safety_valve(void) { + if (use_safety_valve) { + opal_mem_hooks_unregister_release(opal_common_ucx_mem_release_cb); + } +} diff --git a/opal/mca/common/ucx/common_ucx.h b/opal/mca/common/ucx/common_ucx.h index a7312b27f9e..1e85e196e9f 100644 --- a/opal/mca/common/ucx/common_ucx.h +++ b/opal/mca/common/ucx/common_ucx.h @@ -61,7 +61,7 @@ BEGIN_C_DECLS #define MCA_COMMON_UCX_PROGRESS_LOOP(_worker) \ for (unsigned iter = 0;; (++iter % opal_common_ucx.progress_iterations) \ ? (void) ucp_worker_progress(_worker) \ - : opal_progress()) + : (void) opal_progress()) #define MCA_COMMON_UCX_WAIT_LOOP(_request, _worker, _msg, _completed) \ do { \ diff --git a/opal/mca/common/ucx/common_ucx_wpool.c b/opal/mca/common/ucx/common_ucx_wpool.c index ef261f5e5e3..a8ebb9475f0 100644 --- a/opal/mca/common/ucx/common_ucx_wpool.c +++ b/opal/mca/common/ucx/common_ucx_wpool.c @@ -419,6 +419,7 @@ OPAL_DECLSPEC int opal_common_ucx_wpmem_create(opal_common_ucx_ctx_t *ctx, void **mem_base, size_t mem_size, opal_common_ucx_mem_type_t mem_type, opal_common_ucx_exchange_func_t exchange_func, + opal_common_ucx_exchange_mode_t exchange_mode, void *exchange_metadata, char **my_mem_addr, int *my_mem_addr_size, opal_common_ucx_wpmem_t **mem_ptr) { @@ -447,12 +448,13 @@ int opal_common_ucx_wpmem_create(opal_common_ucx_ctx_t *ctx, void **mem_base, si goto error_rkey_pack; } - ret = exchange_func(rkey_addr, rkey_addr_len, &mem->mem_addrs, &mem->mem_displs, - exchange_metadata); - if (ret != OPAL_SUCCESS) { - goto error_rkey_pack; + if (exchange_mode == OPAL_COMMON_UCX_WPMEM_ADDR_EXCHANGE_FULL) { + ret = exchange_func(rkey_addr, rkey_addr_len, &mem->mem_addrs, &mem->mem_displs, + exchange_metadata); + if (ret != OPAL_SUCCESS) { + goto error_rkey_pack; + } } - OBJ_CONSTRUCT(&mem->tls_key, opal_tsd_tracked_key_t); opal_tsd_tracked_key_set_destructor(&mem->tls_key, _mem_rec_destructor); @@ -519,8 +521,6 @@ static int _comm_ucx_wpmem_map(opal_common_ucx_wpool_t *wpool, void **base, size void opal_common_ucx_wpmem_free(opal_common_ucx_wpmem_t *mem) { - _mem_record_t *mem_rec = NULL, *next; - if (NULL == mem) { return; } @@ -634,7 +634,6 @@ static int _tlocal_ctx_connect(_ctx_record_t *ctx_rec, int target) ep_params.address = (ucp_address_t *) &(gctx->recv_worker_addrs[displ]); status = ucp_ep_create(winfo->worker, &ep_params, &winfo->endpoints[target]); if (status != UCS_OK) { - opal_mutex_unlock(&winfo->mutex); MCA_COMMON_UCX_VERBOSE(1, "ucp_ep_create failed: %d", status); opal_mutex_unlock(&winfo->mutex); return OPAL_ERROR; diff --git a/opal/mca/common/ucx/common_ucx_wpool.h b/opal/mca/common/ucx/common_ucx_wpool.h index 4fbe4f93408..448ba5b3092 100644 --- a/opal/mca/common/ucx/common_ucx_wpool.h +++ b/opal/mca/common/ucx/common_ucx_wpool.h @@ -32,6 +32,11 @@ BEGIN_C_DECLS /* fordward declaration */ typedef struct opal_common_ucx_winfo opal_common_ucx_winfo_t; +typedef enum { + OPAL_COMMON_UCX_WPMEM_ADDR_EXCHANGE_FULL, + OPAL_COMMON_UCX_WPMEM_ADDR_EXCHANGE_DIRECT +} opal_common_ucx_exchange_mode_t; + /* Worker pool is a global object that that is allocated per component or can be * shared between multiple compatible components. * The lifetime of this object is normally equal to the lifetime of a component[s]. @@ -235,6 +240,7 @@ static inline int opal_common_ucx_tlocal_fetch(opal_common_ucx_wpmem_t *mem, int OPAL_DECLSPEC int opal_common_ucx_wpmem_create(opal_common_ucx_ctx_t *ctx, void **mem_base, size_t mem_size, opal_common_ucx_mem_type_t mem_type, opal_common_ucx_exchange_func_t exchange_func, + opal_common_ucx_exchange_mode_t exchange_mode, void *exchange_metadata, char **my_mem_addr, int *my_mem_addr_size, opal_common_ucx_wpmem_t **mem_ptr); @@ -298,9 +304,6 @@ static inline int _periodical_flush_nb(opal_common_ucx_wpmem_t *mem, opal_common { int rc = OPAL_SUCCESS; - winfo->inflight_ops[target]++; - winfo->global_inflight_ops++; - if (OPAL_UNLIKELY(winfo->inflight_ops[target] >= MCA_COMMON_UCX_PER_TARGET_OPS_THRESHOLD) || OPAL_UNLIKELY(winfo->global_inflight_ops >= MCA_COMMON_UCX_GLOBAL_OPS_THRESHOLD)) { opal_common_ucx_flush_scope_t scope; @@ -375,6 +378,11 @@ static inline int opal_common_ucx_wpmem_putget(opal_common_ucx_wpmem_t *mem, goto out; } + if (status == UCS_INPROGRESS) { + winfo->inflight_ops[target]++; + winfo->global_inflight_ops++; + } + rc = _periodical_flush_nb(mem, winfo, target); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { MCA_COMMON_UCX_VERBOSE(1, "_incr_and_check_inflight_ops failed: %d", rc); @@ -445,8 +453,9 @@ static inline int opal_common_ucx_wpmem_cmpswp_nb(opal_common_ucx_wpmem_t *mem, opal_mutex_lock(&winfo->mutex); req = opal_common_ucx_atomic_cswap_nb(ep, compare, value, buffer, len, rem_addr, rkey, opal_common_ucx_req_completion, winfo->worker); - if (UCS_PTR_IS_PTR(req)) { + winfo->inflight_ops[target]++; + winfo->global_inflight_ops++; req->ext_req = user_req_ptr; req->ext_cb = user_req_cb; req->winfo = winfo; @@ -562,6 +571,8 @@ static inline int opal_common_ucx_wpmem_fetch_nb(opal_common_ucx_wpmem_t *mem, req = opal_common_ucx_atomic_fetch_nb(ep, opcode, value, buffer, len, rem_addr, rkey, opal_common_ucx_req_completion, winfo->worker); if (UCS_PTR_IS_PTR(req)) { + winfo->inflight_ops[target]++; + winfo->global_inflight_ops++; req->ext_req = user_req_ptr; req->ext_cb = user_req_cb; req->winfo = winfo; diff --git a/opal/mca/hwloc/base/base.h b/opal/mca/hwloc/base/base.h index 646476c3aa6..d242bf1243b 100644 --- a/opal/mca/hwloc/base/base.h +++ b/opal/mca/hwloc/base/base.h @@ -39,10 +39,7 @@ BEGIN_C_DECLS * hwloc_topology_load() can be expensive (and/or serialized by the * OS); it may not be desireable to call this function in every MPI * process on a machine. Hence, it is the responsibility for an upper - * layer to both fill opal_hwloc_topology in some scalable way, as - * well as to invoke opal_hwloc_base_set_process_membind_policy() - * (after opal_hwloc_topology has been loaded) to set the process-wide - * memory affinity policy. + * layer to both fill opal_hwloc_topology in some scalable way. */ /** @@ -53,84 +50,6 @@ OPAL_DECLSPEC extern bool opal_hwloc_topology_inited; OPAL_DECLSPEC extern mca_base_framework_t opal_hwloc_base_framework; -/* we always must have some minimal locality support */ -#define OPAL_HWLOC_PRINT_MAX_SIZE 50 -#define OPAL_HWLOC_PRINT_NUM_BUFS 16 -typedef struct { - char *buffers[OPAL_HWLOC_PRINT_NUM_BUFS]; - int cntr; -} opal_hwloc_print_buffers_t; -opal_hwloc_print_buffers_t *opal_hwloc_get_print_buffer(void); -extern char *opal_hwloc_print_null; -OPAL_DECLSPEC char *opal_hwloc_base_print_locality(opal_hwloc_locality_t locality); - -OPAL_DECLSPEC extern char *opal_hwloc_base_cpu_list; -OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_base_given_cpus; -OPAL_DECLSPEC extern char *opal_hwloc_base_topo_file; - -/* convenience macro for debugging */ -#define OPAL_HWLOC_SHOW_BINDING(n, v, t) \ - do { \ - char tmp1[1024]; \ - hwloc_cpuset_t bind; \ - bind = opal_hwloc_alloc(); \ - if (hwloc_get_cpubind(t, bind, HWLOC_CPUBIND_PROCESS) < 0) { \ - opal_output_verbose(n, v, "CANNOT DETERMINE BINDING AT %s:%d", __FILE__, __LINE__); \ - } else { \ - opal_hwloc_base_cset2mapstr(tmp1, sizeof(tmp1), t, bind); \ - opal_output_verbose(n, v, "BINDINGS AT %s:%d: %s", __FILE__, __LINE__, tmp1); \ - } \ - hwloc_bitmap_free(bind); \ - } while (0); - -#if HWLOC_API_VERSION < 0x20000 -# define OPAL_HWLOC_MAKE_OBJ_CACHE(level, obj, cache_level) \ - do { \ - obj = HWLOC_OBJ_CACHE; \ - cache_level = level; \ - } while (0) -#else -# define OPAL_HWLOC_MAKE_OBJ_CACHE(level, obj, cache_level) \ - do { \ - obj = HWLOC_OBJ_L##level##CACHE; \ - cache_level = 0; \ - } while (0) -#endif - -OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo, - char *cpuset1, - char *cpuset2); - -OPAL_DECLSPEC int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec); - -/** - * Loads opal_hwloc_my_cpuset (global variable in - * opal/mca/hwloc/hwloc-internal.h) for this process. opal_hwloc_my_cpuset - * will be loaded with this process' binding, or, if the process is - * not bound, use the hwloc root object's (available and online) - * cpuset. - */ -OPAL_DECLSPEC void opal_hwloc_base_get_local_cpuset(void); - -struct opal_rmaps_numa_node_t { - opal_list_item_t super; - int index; - float dist_from_closed; -}; -typedef struct opal_rmaps_numa_node_t opal_rmaps_numa_node_t; -OBJ_CLASS_DECLARATION(opal_rmaps_numa_node_t); - -/** - * Enum for what memory allocation policy we want for user allocations. - * MAP = memory allocation policy. - */ -typedef enum { OPAL_HWLOC_BASE_MAP_NONE, OPAL_HWLOC_BASE_MAP_LOCAL_ONLY } opal_hwloc_base_map_t; - -/** - * Global reflecting the MAP (set by MCA param). - */ -OPAL_DECLSPEC extern opal_hwloc_base_map_t opal_hwloc_base_map; - /** * Enum for what to do if the hwloc framework tries to bind memory * and fails. BFA = bind failure action. @@ -141,28 +60,12 @@ typedef enum { OPAL_HWLOC_BASE_MBFA_ERROR } opal_hwloc_base_mbfa_t; -/** - * Global reflecting the BFA (set by MCA param). - */ -OPAL_DECLSPEC extern opal_hwloc_base_mbfa_t opal_hwloc_base_mbfa; - /** * Discover / load the hwloc topology (i.e., call hwloc_topology_init() and * hwloc_topology_load()). */ OPAL_DECLSPEC int opal_hwloc_base_get_topology(void); -/** - * Set the hwloc topology to that from the given topo file - */ -OPAL_DECLSPEC int opal_hwloc_base_set_topology(char *topofile); - -OPAL_DECLSPEC int opal_hwloc_base_filter_cpus(hwloc_topology_t topo); - -/** - * Free the hwloc topology. - */ -OPAL_DECLSPEC void opal_hwloc_base_free_topology(hwloc_topology_t topo); OPAL_DECLSPEC unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, hwloc_obj_type_t target, unsigned cache_level, @@ -174,72 +77,18 @@ OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_obj_by_type(hwloc_topology_t topo, unsigned int instance, opal_hwloc_resource_type_t rtype); -OPAL_DECLSPEC char *opal_hwloc_base_print_binding(opal_binding_policy_t binding); - -/** - * Determine if there is a single cpu in a bitmap. - */ -OPAL_DECLSPEC bool opal_hwloc_base_single_cpu(hwloc_cpuset_t cpuset); - -/** - * Report a bind failure using the normal mechanisms if a component - * fails to bind memory -- according to the value of the - * hwloc_base_bind_failure_action MCA parameter. - */ -OPAL_DECLSPEC int opal_hwloc_base_report_bind_failure(const char *file, int line, const char *msg, - int rc); - -/** - * This function sets the process-wide memory affinity policy - * according to opal_hwloc_base_map and opal_hwloc_base_mbfa. It needs - * to be a separate, standalone function (as opposed to being done - * during opal_hwloc_base_open()) because opal_hwloc_topology is not - * loaded by opal_hwloc_base_open(). Hence, an upper layer needs to - * invoke this function after opal_hwloc_topology has been loaded. - */ -OPAL_DECLSPEC int opal_hwloc_base_set_process_membind_policy(void); - OPAL_DECLSPEC int opal_hwloc_base_membind(opal_hwloc_base_memory_segment_t *segs, size_t count, int node_id); -OPAL_DECLSPEC int opal_hwloc_base_node_name_to_id(char *node_name, int *id); - OPAL_DECLSPEC int opal_hwloc_base_memory_set(opal_hwloc_base_memory_segment_t *segments, size_t num_segments); -/** - * Make a prettyprint string for a hwloc_cpuset_t (e.g., "socket - * 2[core 3]"). - */ -OPAL_DECLSPEC int opal_hwloc_base_cset2str(char *str, int len, hwloc_topology_t topo, - hwloc_cpuset_t cpuset); - -/** - * Make a prettyprint string for a cset in a map format. - * Example: [B./..] - * Key: [] - signifies socket - * / - divider between cores - * . - signifies PU a process not bound to - * B - signifies PU a process is bound to - */ -OPAL_DECLSPEC int opal_hwloc_base_cset2mapstr(char *str, int len, hwloc_topology_t topo, - hwloc_cpuset_t cpuset); - -/* get the hwloc object that corresponds to the given processor id and type */ -OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid, - opal_hwloc_resource_type_t rtype); - -/* get a string describing the locality of a given process */ -OPAL_DECLSPEC char *opal_hwloc_base_get_locality_string(hwloc_topology_t topo, char *bitmap); - /* extract a location from the locality string */ OPAL_DECLSPEC char *opal_hwloc_base_get_location(char *locality, hwloc_obj_type_t type, unsigned index); OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc2); -OPAL_DECLSPEC int opal_hwloc_base_topology_set_flags(hwloc_topology_t topology, unsigned long flags, - bool io); END_C_DECLS #endif /* OPAL_HWLOC_BASE_H */ diff --git a/opal/mca/hwloc/base/help-opal-hwloc-base.txt b/opal/mca/hwloc/base/help-opal-hwloc-base.txt index a2c6af0c444..f105de9964e 100644 --- a/opal/mca/hwloc/base/help-opal-hwloc-base.txt +++ b/opal/mca/hwloc/base/help-opal-hwloc-base.txt @@ -19,43 +19,4 @@ message will only be reported at most once per process. File: %s:%d Message: %s Severity: %s -# -[invalid binding_policy] -The specified %s policy is not recognized: - - Policy: %s - -Please check for a typo or ensure that the option is a supported -one. -# -[redefining-policy] -Conflicting directives for binding policy are causing the policy -to be redefined: - - New policy: %s - Prior policy: %s - -Please check that only one policy is defined. -# -[deprecated] -The following command line option and corresponding MCA parameter have -been deprecated and replaced as follows: - - Command line option: - Deprecated: %s - Replacement: %s - - Equivalent MCA parameter: - Deprecated: %s - Replacement: %s - -The deprecated forms *will* disappear in a future version of Open MPI. -Please update to the new syntax. -# -[obj-idx-failed] -Open MPI failed to find a cache of a specified type. This is a highly -unusual error; it may indicate a system configuration error. This -additional information may be of help: - Message: %s - Cache level: %d diff --git a/opal/mca/hwloc/base/hwloc_base_frame.c b/opal/mca/hwloc/base/hwloc_base_frame.c index 75a6220851a..2ac049f3523 100644 --- a/opal/mca/hwloc/base/hwloc_base_frame.c +++ b/opal/mca/hwloc/base/hwloc_base_frame.c @@ -20,7 +20,6 @@ #include "opal/mca/threads/tsd.h" #include "opal/util/argv.h" #include "opal/util/output.h" -#include "opal/util/show_help.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/hwloc/hwloc-internal.h" @@ -38,21 +37,9 @@ bool opal_hwloc_base_inited = false; hwloc_topology_t opal_hwloc_topology = NULL; hwloc_cpuset_t opal_hwloc_my_cpuset = NULL; -hwloc_cpuset_t opal_hwloc_base_given_cpus = NULL; -opal_hwloc_base_map_t opal_hwloc_base_map = OPAL_HWLOC_BASE_MAP_NONE; opal_hwloc_base_mbfa_t opal_hwloc_base_mbfa = OPAL_HWLOC_BASE_MBFA_WARN; -opal_binding_policy_t opal_hwloc_binding_policy = 0; -char *opal_hwloc_base_cpu_list = NULL; -bool opal_hwloc_report_bindings = false; -hwloc_obj_type_t opal_hwloc_levels[] = {HWLOC_OBJ_MACHINE, HWLOC_OBJ_NODE, HWLOC_OBJ_SOCKET, - HWLOC_OBJ_L3CACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L1CACHE, - HWLOC_OBJ_CORE, HWLOC_OBJ_PU}; -bool opal_hwloc_use_hwthreads_as_cpus = false; -char *opal_hwloc_base_topo_file = NULL; -static mca_base_var_enum_value_t hwloc_base_map[] = {{OPAL_HWLOC_BASE_MAP_NONE, "none"}, - {OPAL_HWLOC_BASE_MAP_LOCAL_ONLY, "local_only"}, - {0, NULL}}; +extern bool opal_hwloc_topo_in_shmem; static mca_base_var_enum_value_t hwloc_failure_action[] = {{OPAL_HWLOC_BASE_MBFA_SILENT, "silent"}, {OPAL_HWLOC_BASE_MBFA_WARN, "warn"}, @@ -66,34 +53,10 @@ static int opal_hwloc_base_close(void); MCA_BASE_FRAMEWORK_DECLARE(opal, hwloc, NULL, opal_hwloc_base_register, opal_hwloc_base_open, opal_hwloc_base_close, mca_hwloc_base_static_components, 0); -static char *opal_hwloc_base_binding_policy = NULL; -static bool opal_hwloc_base_bind_to_core = false; -static bool opal_hwloc_base_bind_to_socket = false; - static int opal_hwloc_base_register(mca_base_register_flag_t flags) { mca_base_var_enum_t *new_enum; - int ret, varid; - - /* hwloc_base_mbind_policy */ - - opal_hwloc_base_map = OPAL_HWLOC_BASE_MAP_NONE; - mca_base_var_enum_create("hwloc memory allocation policy", hwloc_base_map, &new_enum); - ret = mca_base_var_register( - "opal", "hwloc", "base", "mem_alloc_policy", - "General memory allocations placement policy (this is not memory binding). " - "\"none\" means that no memory policy is applied. \"local_only\" means that a process' " - "memory allocations will be restricted to its local NUMA node. " - "If using direct launch, this policy will not be in effect until after MPI_INIT. " - "Note that operating system paging policies are unaffected by this setting. For example, " - "if \"local_only\" is used and local NUMA node memory is exhausted, a new memory " - "allocation may cause paging.", - MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &opal_hwloc_base_map); - OBJ_RELEASE(new_enum); - if (0 > ret) { - return ret; - } + int ret; /* hwloc_base_bind_failure_action */ opal_hwloc_base_mbfa = OPAL_HWLOC_BASE_MBFA_WARN; @@ -113,141 +76,69 @@ static int opal_hwloc_base_register(mca_base_register_flag_t flags) return ret; } - opal_hwloc_base_binding_policy = NULL; - (void) mca_base_var_register( - "opal", "hwloc", "base", "binding_policy", - "Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, " - "l3cache, socket, numa, board, cpu-list (\"none\" is the default when oversubscribed, " - "\"core\" is " - "the default when np<=2, and \"numa\" is the default when np>2). Allowed qualifiers: " - "overload-allowed, if-supported, ordered", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &opal_hwloc_base_binding_policy); - - /* backward compatibility */ - opal_hwloc_base_bind_to_core = false; - (void) mca_base_var_register("opal", "hwloc", "base", "bind_to_core", "Bind processes to cores", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_bind_to_core); - - opal_hwloc_base_bind_to_socket = false; - (void) mca_base_var_register("opal", "hwloc", "base", "bind_to_socket", - "Bind processes to sockets", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &opal_hwloc_base_bind_to_socket); - opal_hwloc_report_bindings = false; - (void) mca_base_var_register("opal", "hwloc", "base", "report_bindings", - "Report bindings to stderr", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &opal_hwloc_report_bindings); - - opal_hwloc_base_cpu_list = NULL; - varid = mca_base_var_register("opal", "hwloc", "base", "cpu_list", - "Comma-separated list of ranges specifying logical cpus to be " - "used by these processes [default: none]", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_cpu_list); - mca_base_var_register_synonym(varid, "opal", "hwloc", "base", "slot_list", - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - mca_base_var_register_synonym(varid, "opal", "hwloc", "base", "cpu_set", - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - - /* declare hwthreads as independent cpus */ - opal_hwloc_use_hwthreads_as_cpus = false; - (void) mca_base_var_register("opal", "hwloc", "base", "use_hwthreads_as_cpus", - "Use hardware threads as independent cpus", MCA_BASE_VAR_TYPE_BOOL, - NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &opal_hwloc_use_hwthreads_as_cpus); - - opal_hwloc_base_topo_file = NULL; - (void) mca_base_var_register("opal", "hwloc", "base", "topo_file", - "Read local topology from file instead of directly sensing it", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_topo_file); - - /* register parameters */ return OPAL_SUCCESS; } static int opal_hwloc_base_open(mca_base_open_flag_t flags) { - int rc; - if (opal_hwloc_base_inited) { return OPAL_SUCCESS; } opal_hwloc_base_inited = true; - if (OPAL_SUCCESS - != (rc = opal_hwloc_base_set_binding_policy(&opal_hwloc_binding_policy, - opal_hwloc_base_binding_policy))) { - return rc; + /* to support tools such as ompi_info, add the components + * to a list + */ + if (OPAL_SUCCESS != mca_base_framework_components_open(&opal_hwloc_base_framework, flags)) { + return OPAL_ERROR; } - if (opal_hwloc_base_bind_to_core) { - opal_show_help("help-opal-hwloc-base.txt", "deprecated", true, "--bind-to-core", - "--bind-to core", "hwloc_base_bind_to_core", - "hwloc_base_binding_policy=core"); - /* set binding policy to core - error if something else already set */ - if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) - && OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) != OPAL_BIND_TO_CORE) { - /* error - cannot redefine the default ranking policy */ - opal_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, "core", - opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); - return OPAL_ERR_BAD_PARAM; - } - OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE); - } + return OPAL_SUCCESS; +} - if (opal_hwloc_base_bind_to_socket) { - opal_show_help("help-opal-hwloc-base.txt", "deprecated", true, "--bind-to-socket", - "--bind-to socket", "hwloc_base_bind_to_socket", - "hwloc_base_binding_policy=socket"); - /* set binding policy to socket - error if something else already set */ - if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) - && OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) != OPAL_BIND_TO_SOCKET) { - /* error - cannot redefine the default ranking policy */ - opal_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, "socket", - opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); - return OPAL_ERR_SILENT; - } - OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_SOCKET); - } +static void free_object(hwloc_obj_t obj) +{ + opal_hwloc_obj_data_t *data; + unsigned k; - /* did the user provide a slot list? */ - if (NULL != opal_hwloc_base_cpu_list) { - /* it is okay if a binding policy was already given - just ensure that - * we do bind to the given cpus if provided, otherwise this would be - * ignored if someone didn't also specify a binding policy - */ - // Restoring pre ef86707fbe3392c8ed15f79cc4892f0313b409af behavior. - // Formerly -cpu-set #,#,# along with -use_hwthread-cpus resulted - // in the binding policy staying OPAL_BIND_TO_HWTHREAD - // I think that should be right because I thought -cpu-set was a contraint you put - // on another binding policy, not a binding policy in itself. - if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { - OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET); - } + /* free any data hanging on this object */ + if (NULL != obj->userdata) { + data = (opal_hwloc_obj_data_t *) obj->userdata; + OBJ_RELEASE(data); + obj->userdata = NULL; } - /* if we are binding to hwthreads, then we must use hwthreads as cpus */ - if (OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) == OPAL_BIND_TO_HWTHREAD) { - opal_hwloc_use_hwthreads_as_cpus = true; + /* loop thru our children */ + for (k = 0; k < obj->arity; k++) { + free_object(obj->children[k]); } +} - /* to support tools such as ompi_info, add the components - * to a list - */ - if (OPAL_SUCCESS != mca_base_framework_components_open(&opal_hwloc_base_framework, flags)) { - return OPAL_ERROR; +static void free_topology(hwloc_topology_t topo) +{ + hwloc_obj_t obj; + opal_hwloc_topo_data_t *rdata; + unsigned k; + + if (!opal_hwloc_topo_in_shmem) { + obj = hwloc_get_root_obj(topo); + /* release the root-level userdata */ + if (NULL != obj->userdata) { + rdata = (opal_hwloc_topo_data_t *) obj->userdata; + OBJ_RELEASE(rdata); + obj->userdata = NULL; + } + /* now recursively descend and release userdata + * in the rest of the objects + */ + for (k = 0; k < obj->arity; k++) { + free_object(obj->children[k]); + } } - - return OPAL_SUCCESS; + hwloc_topology_destroy(topo); } -static opal_tsd_tracked_key_t *print_tsd_key = NULL; - static int opal_hwloc_base_close(void) { int ret; @@ -255,11 +146,6 @@ static int opal_hwloc_base_close(void) return OPAL_SUCCESS; } - if (NULL != print_tsd_key) { - OBJ_RELEASE(print_tsd_key); - print_tsd_key = NULL; - } - /* no need to close the component as it was statically opened */ /* for support of tools such as ompi_info */ @@ -276,7 +162,7 @@ static int opal_hwloc_base_close(void) /* destroy the topology */ if (NULL != opal_hwloc_topology) { - opal_hwloc_base_free_topology(opal_hwloc_topology); + free_topology(opal_hwloc_topology); opal_hwloc_topology = NULL; } @@ -285,138 +171,6 @@ static int opal_hwloc_base_close(void) return OPAL_SUCCESS; } -static bool fns_init = false; -char *opal_hwloc_print_null = "NULL"; - -static void buffer_cleanup(void *value) -{ - int i; - opal_hwloc_print_buffers_t *ptr; - - if (NULL != value) { - ptr = (opal_hwloc_print_buffers_t *) value; - for (i = 0; i < OPAL_HWLOC_PRINT_NUM_BUFS; i++) { - free(ptr->buffers[i]); - } - free(ptr); - } -} - -opal_hwloc_print_buffers_t *opal_hwloc_get_print_buffer(void) -{ - opal_hwloc_print_buffers_t *ptr; - int ret, i; - - if (!fns_init) { - /* setup the print_args function */ - print_tsd_key = OBJ_NEW(opal_tsd_tracked_key_t); - opal_tsd_tracked_key_set_destructor(print_tsd_key, buffer_cleanup); - fns_init = true; - } - - ret = opal_tsd_tracked_key_get(print_tsd_key, (void **) &ptr); - if (OPAL_SUCCESS != ret) { - return NULL; - } - - if (NULL == ptr) { - ptr = (opal_hwloc_print_buffers_t *) malloc(sizeof(opal_hwloc_print_buffers_t)); - for (i = 0; i < OPAL_HWLOC_PRINT_NUM_BUFS; i++) { - ptr->buffers[i] = (char *) malloc((OPAL_HWLOC_PRINT_MAX_SIZE + 1) * sizeof(char)); - } - ptr->cntr = 0; - ret = opal_tsd_tracked_key_set(print_tsd_key, (void *) ptr); - } - - return (opal_hwloc_print_buffers_t *) ptr; -} - -char *opal_hwloc_base_print_locality(opal_hwloc_locality_t locality) -{ - opal_hwloc_print_buffers_t *ptr; - int idx; - - ptr = opal_hwloc_get_print_buffer(); - if (NULL == ptr) { - return opal_hwloc_print_null; - } - /* cycle around the ring */ - if (OPAL_HWLOC_PRINT_NUM_BUFS == ptr->cntr) { - ptr->cntr = 0; - } - - idx = 0; - - if (OPAL_PROC_ON_LOCAL_CLUSTER(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'C'; - ptr->buffers[ptr->cntr][idx++] = 'L'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_CU(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'C'; - ptr->buffers[ptr->cntr][idx++] = 'U'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_NODE(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'N'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_BOARD(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'B'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_NUMA(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'N'; - ptr->buffers[ptr->cntr][idx++] = 'u'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_SOCKET(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'S'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_L3CACHE(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'L'; - ptr->buffers[ptr->cntr][idx++] = '3'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_L2CACHE(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'L'; - ptr->buffers[ptr->cntr][idx++] = '2'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_L1CACHE(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'L'; - ptr->buffers[ptr->cntr][idx++] = '1'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_CORE(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'C'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (OPAL_PROC_ON_LOCAL_HWTHREAD(locality)) { - ptr->buffers[ptr->cntr][idx++] = 'H'; - ptr->buffers[ptr->cntr][idx++] = 'w'; - ptr->buffers[ptr->cntr][idx++] = 't'; - ptr->buffers[ptr->cntr][idx++] = ':'; - } - if (0 < idx) { - ptr->buffers[ptr->cntr][idx - 1] = '\0'; - } else if (OPAL_PROC_NON_LOCAL & locality) { - ptr->buffers[ptr->cntr][idx++] = 'N'; - ptr->buffers[ptr->cntr][idx++] = 'O'; - ptr->buffers[ptr->cntr][idx++] = 'N'; - ptr->buffers[ptr->cntr][idx++] = '\0'; - } else { - /* must be an unknown locality */ - ptr->buffers[ptr->cntr][idx++] = 'U'; - ptr->buffers[ptr->cntr][idx++] = 'N'; - ptr->buffers[ptr->cntr][idx++] = 'K'; - ptr->buffers[ptr->cntr][idx++] = '\0'; - } - - return ptr->buffers[ptr->cntr]; -} - static void obj_data_const(opal_hwloc_obj_data_t *ptr) { ptr->npus_calculated = false; @@ -461,93 +215,3 @@ static void topo_data_dest(opal_hwloc_topo_data_t *ptr) ptr->userdata = NULL; } OBJ_CLASS_INSTANCE(opal_hwloc_topo_data_t, opal_object_t, topo_data_const, topo_data_dest); - -OBJ_CLASS_INSTANCE(opal_rmaps_numa_node_t, opal_list_item_t, NULL, NULL); - -int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec) -{ - int i; - opal_binding_policy_t tmp; - char **tmpvals, **quals; - - /* set default */ - tmp = 0; - - /* binding specification */ - if (NULL == spec) { - if (opal_hwloc_use_hwthreads_as_cpus) { - /* default to bind-to hwthread */ - OPAL_SET_DEFAULT_BINDING_POLICY(tmp, OPAL_BIND_TO_HWTHREAD); - } else { - /* default to bind-to core */ - OPAL_SET_DEFAULT_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE); - } - } else if (0 == strncasecmp(spec, "none", strlen("none"))) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NONE); - } else { - tmpvals = opal_argv_split(spec, ':'); - if (1 < opal_argv_count(tmpvals) || ':' == spec[0]) { - if (':' == spec[0]) { - quals = opal_argv_split(&spec[1], ','); - } else { - quals = opal_argv_split(tmpvals[1], ','); - } - for (i = 0; NULL != quals[i]; i++) { - if (0 == strncasecmp(quals[i], "if-supported", strlen(quals[i]))) { - tmp |= OPAL_BIND_IF_SUPPORTED; - } else if (0 == strncasecmp(quals[i], "overload-allowed", strlen(quals[i])) - || 0 - == strncasecmp(quals[i], "oversubscribe-allowed", - strlen(quals[i]))) { - tmp |= OPAL_BIND_ALLOW_OVERLOAD; - } else if (0 == strncasecmp(quals[i], "ordered", strlen(quals[i]))) { - tmp |= OPAL_BIND_ORDERED; - } else { - /* unknown option */ - opal_output(0, "Unknown qualifier to binding policy: %s", spec); - opal_argv_free(quals); - opal_argv_free(tmpvals); - return OPAL_ERR_BAD_PARAM; - } - } - opal_argv_free(quals); - } - if (NULL == tmpvals[0] || ':' == spec[0]) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE); - tmp &= ~OPAL_BIND_GIVEN; - } else { - if (0 == strcasecmp(tmpvals[0], "hwthread")) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_HWTHREAD); - } else if (0 == strcasecmp(tmpvals[0], "core")) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE); - } else if (0 == strcasecmp(tmpvals[0], "l1cache")) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L1CACHE); - } else if (0 == strcasecmp(tmpvals[0], "l2cache")) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L2CACHE); - } else if (0 == strcasecmp(tmpvals[0], "l3cache")) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L3CACHE); - } else if (0 == strcasecmp(tmpvals[0], "socket")) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_SOCKET); - } else if (0 == strcasecmp(tmpvals[0], "numa")) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA); - } else if (0 == strcasecmp(tmpvals[0], "board")) { - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD); - } else if (0 == strcasecmp(tmpvals[0], "cpu-list") - || 0 == strcasecmp(tmpvals[0], "cpulist")) { - // Accept both "cpu-list" (which matches the - // "--cpu-list" CLI option) and "cpulist" (because - // people will be lazy) - OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CPUSET); - } else { - opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, - "binding", spec); - opal_argv_free(tmpvals); - return OPAL_ERR_BAD_PARAM; - } - } - opal_argv_free(tmpvals); - } - - *policy = tmp; - return OPAL_SUCCESS; -} diff --git a/opal/mca/hwloc/base/hwloc_base_maffinity.c b/opal/mca/hwloc/base/hwloc_base_maffinity.c index 3447dc42375..1064aa8c653 100644 --- a/opal/mca/hwloc/base/hwloc_base_maffinity.c +++ b/opal/mca/hwloc/base/hwloc_base_maffinity.c @@ -11,61 +11,39 @@ #include "opal_config.h" #include "opal/constants.h" +#include "opal/util/show_help.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/hwloc/hwloc-internal.h" -/* - * Don't use show_help() here (or print any error message at all). - * Let the upper layer output a relevant message, because doing so may - * be complicated. +/** + * Global reflecting the BFA (set by MCA param). */ -int opal_hwloc_base_set_process_membind_policy(void) -{ - int rc = 0, flags; - hwloc_membind_policy_t policy; - hwloc_cpuset_t cpuset; - - /* Make sure opal_hwloc_topology has been set by the time we've - been called */ - if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) { - return OPAL_ERR_BAD_PARAM; - } +extern opal_hwloc_base_mbfa_t opal_hwloc_base_mbfa; - /* Set the default memory allocation policy according to MCA - param */ - switch (opal_hwloc_base_map) { - case OPAL_HWLOC_BASE_MAP_LOCAL_ONLY: - policy = HWLOC_MEMBIND_BIND; - flags = HWLOC_MEMBIND_STRICT; - break; - - case OPAL_HWLOC_BASE_MAP_NONE: - default: - policy = HWLOC_MEMBIND_DEFAULT; - flags = 0; - break; - } - - cpuset = hwloc_bitmap_alloc(); - if (NULL == cpuset) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - } else { - int e; - hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0); - rc = hwloc_set_membind(opal_hwloc_topology, cpuset, policy, flags); - e = errno; - hwloc_bitmap_free(cpuset); - - /* See if hwloc was able to do it. If hwloc failed due to - ENOSYS, but the base_map == NONE, then it's not really an - error. */ - if (0 != rc && ENOSYS == e && OPAL_HWLOC_BASE_MAP_NONE == opal_hwloc_base_map) { - rc = 0; - } +/** + * Report a bind failure using the normal mechanisms if a component + * fails to bind memory -- according to the value of the + * hwloc_base_bind_failure_action MCA parameter. + */ +static int opal_hwloc_base_report_bind_failure(const char *file, int line, const char *msg, int rc) +{ + static int already_reported = 0; + + if (!already_reported && OPAL_HWLOC_BASE_MBFA_SILENT != opal_hwloc_base_mbfa) { + char hostname[OPAL_MAXHOSTNAMELEN]; + gethostname(hostname, sizeof(hostname)); + + opal_show_help( + "help-opal-hwloc-base.txt", "mbind failure", true, hostname, getpid(), file, line, msg, + (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa) + ? "Warning -- your job will continue, but possibly with degraded performance" + : "ERROR -- your job may abort or behave erraticly"); + already_reported = 1; + return rc; } - return (0 == rc) ? OPAL_SUCCESS : OPAL_ERROR; + return OPAL_SUCCESS; } int opal_hwloc_base_memory_set(opal_hwloc_base_memory_segment_t *segments, size_t num_segments) @@ -112,14 +90,6 @@ int opal_hwloc_base_memory_set(opal_hwloc_base_memory_segment_t *segments, size_ return OPAL_SUCCESS; } -int opal_hwloc_base_node_name_to_id(char *node_name, int *id) -{ - /* GLB: fix me */ - *id = atoi(node_name + 3); - - return OPAL_SUCCESS; -} - int opal_hwloc_base_membind(opal_hwloc_base_memory_segment_t *segs, size_t count, int node_id) { size_t i; diff --git a/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c b/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c deleted file mode 100644 index 596eb2d3567..00000000000 --- a/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/constants.h" - -#include "opal/mca/hwloc/base/base.h" -#include "opal/mca/hwloc/hwloc-internal.h" - -/* - * Don't use show_help() here (or print any error message at all). - * Let the upper layer output a relevant message, because doing so may - * be complicated. - */ -int opal_hwloc_base_set_process_membind_policy(void) -{ - int rc = 0, flags; - hwloc_membind_policy_t policy; - hwloc_cpuset_t cpuset; - - /* Make sure opal_hwloc_topology has been set by the time we've - been called */ - if (NULL == opal_hwloc_topology) { - return OPAL_ERR_BAD_PARAM; - } - - /* Set the default memory allocation policy according to MCA - param */ - switch (opal_hwloc_base_map) { - case OPAL_HWLOC_BASE_MAP_LOCAL_ONLY: - policy = HWLOC_MEMBIND_BIND; - flags = HWLOC_MEMBIND_STRICT; - break; - - case OPAL_HWLOC_BASE_MAP_NONE: - default: - policy = HWLOC_MEMBIND_DEFAULT; - flags = 0; - break; - } - - cpuset = hwloc_bitmap_alloc(); - if (NULL == cpuset) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - } else { - int e; - hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0); - rc = hwloc_set_membind(opal_hwloc_topology, cpuset, policy, flags); - e = errno; - hwloc_bitmap_free(cpuset); - - /* See if hwloc was able to do it. If hwloc failed due to - ENOSYS, but the base_map == NONE, then it's not really an - error. */ - if (0 != rc && ENOSYS == e && OPAL_HWLOC_BASE_MAP_NONE == opal_hwloc_base_map) { - rc = 0; - } - } - - return (0 == rc) ? OPAL_SUCCESS : OPAL_ERROR; -} diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 05bdbbda144..7e43b476c09 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -62,79 +62,72 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/mca/hwloc/hwloc-internal.h" -static bool topo_in_shmem = false; +bool opal_hwloc_topo_in_shmem = false; -/* - * Provide the hwloc object that corresponds to the given - * processor id of the given type. Remember: "processor" here [usually] means "core" -- - * except that on some platforms, hwloc won't find any cores; it'll - * only find PUs (!). On such platforms, then do the same calculation - * but with PUs instead of COREs. - */ -hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid, opal_hwloc_resource_type_t rtype) +static void fill_cache_line_size(void) { - hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE; + int i = 0, cache_level = 2; + unsigned size; + unsigned int cache_object = HWLOC_OBJ_L2CACHE; hwloc_obj_t obj; + bool found = false; - /* hwloc isn't able to find cores on all platforms. Example: - PPC64 running RHEL 5.4 (linux kernel 2.6.18) only reports NUMA - nodes and PU's. Fine. - - However, note that hwloc_get_obj_by_type() will return NULL in - 2 (effectively) different cases: - - - no objects of the requested type were found - - the Nth object of the requested type was not found + /* Look for the smallest L2 cache size */ + size = 4096; + while (cache_level > 0 && !found) { + i = 0; + while (1) { + obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, cache_object, cache_level, i, + OPAL_HWLOC_LOGICAL); + if (NULL == obj) { + --cache_level; + cache_object = HWLOC_OBJ_L1CACHE; + break; + } else { + if (NULL != obj->attr && obj->attr->cache.linesize > 0 + && size > obj->attr->cache.linesize) { + size = obj->attr->cache.linesize; + found = true; + } + } + ++i; + } + } - So first we have to see if we can find *any* cores by looking - for the 0th core. If we find it, then try to find the Nth - core. Otherwise, try to find the Nth PU. */ - if (opal_hwloc_use_hwthreads_as_cpus - || (NULL == hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0))) { - obj_type = HWLOC_OBJ_PU; + /* If we found an L2 cache size in the hwloc data, save it in + opal_cache_line_size. Otherwise, we'll leave whatever default + was set in opal_init.c */ + if (found) { + opal_cache_line_size = (int) size; } +} - if (OPAL_HWLOC_PHYSICAL == rtype) { - /* find the pu - note that we can only find physical PUs - * as cores do not have unique physical numbers (they are - * numbered within their sockets instead). So we find the - * specified PU, and then return the core object that contains it */ - obj = hwloc_get_pu_obj_by_os_index(topo, lid); - OPAL_OUTPUT_VERBOSE( - (5, opal_hwloc_base_framework.framework_output, "physical cpu %d %s found in cpuset %s", - lid, (NULL == obj) ? "not" : "is", - (NULL == opal_hwloc_base_cpu_list) ? "None" : opal_hwloc_base_cpu_list)); - /* we now need to shift upward to the core including this PU */ - if (NULL != obj && HWLOC_OBJ_CORE == obj_type) { - obj = obj->parent; +static int opal_hwloc_base_topology_set_flags(hwloc_topology_t topology, unsigned long flags, bool io) +{ + if (io) { +#if HWLOC_API_VERSION < 0x20000 + flags |= HWLOC_TOPOLOGY_FLAG_IO_DEVICES; +#else + int ret = hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); + if (0 != ret) { + return ret; } - return obj; +#endif } - - opal_output_verbose(5, opal_hwloc_base_framework.framework_output, - "Searching for %d LOGICAL PU", lid); - - /* Now do the actual lookup. */ - obj = hwloc_get_obj_by_type(topo, obj_type, lid); - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "logical cpu %d %s found in cpuset %s", lid, (NULL == obj) ? "not" : "is", - (NULL == opal_hwloc_base_cpu_list) ? "None" : opal_hwloc_base_cpu_list)); - - /* Found the right core (or PU). Return the object */ - return obj; + return hwloc_topology_set_flags(topology, flags); } /* determine the node-level available cpuset based on - * online vs allowed vs user-specified cpus + * online vs allowed vs user-specified cpus. + * + * Only used in 'self discovery' case, when the topology + * could not be retrieved from the RM. */ -int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) +static int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) { - hwloc_obj_t root, pu; - hwloc_cpuset_t avail = NULL, pucpus, res; + hwloc_obj_t root; + hwloc_cpuset_t avail = NULL; opal_hwloc_topo_data_t *sum; - opal_hwloc_obj_data_t *data; - char **ranges = NULL, **range = NULL; - int idx, cpu, start, end; root = hwloc_get_root_obj(topo); @@ -148,125 +141,50 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) return OPAL_SUCCESS; } - /* process any specified default cpu set against this topology */ - if (NULL == opal_hwloc_base_cpu_list) { /* get the root available cpuset */ #if HWLOC_API_VERSION < 0x20000 - avail = hwloc_bitmap_alloc(); - hwloc_bitmap_and(avail, root->online_cpuset, root->allowed_cpuset); -#else + if (NULL == root->online_cpuset || NULL == root->allowed_cpuset) { + if (NULL == root->cpuset) { + /* we have a really bad topology */ + return OPAL_ERR_NOT_SUPPORTED; + } avail = hwloc_bitmap_dup(root->cpuset); -#endif - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "hwloc:base: no cpus specified - using root available cpuset")); } else { - OPAL_OUTPUT_VERBOSE( - (5, opal_hwloc_base_framework.framework_output, "hwloc:base: filtering cpuset")); - /* find the specified logical cpus */ - ranges = opal_argv_split(opal_hwloc_base_cpu_list, ','); avail = hwloc_bitmap_alloc(); - hwloc_bitmap_zero(avail); - res = hwloc_bitmap_alloc(); - pucpus = hwloc_bitmap_alloc(); - for (idx = 0; idx < opal_argv_count(ranges); idx++) { - range = opal_argv_split(ranges[idx], '-'); - switch (opal_argv_count(range)) { - case 1: - /* only one cpu given - get that object */ - cpu = strtoul(range[0], NULL, 10); - if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { -#if HWLOC_API_VERSION < 0x20000 - hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); -#else - hwloc_bitmap_free(pucpus); - pucpus = hwloc_bitmap_dup(pu->cpuset); -#endif - hwloc_bitmap_or(res, avail, pucpus); - hwloc_bitmap_copy(avail, res); - data = (opal_hwloc_obj_data_t *) pu->userdata; - if (NULL == data) { - pu->userdata = (void *) OBJ_NEW(opal_hwloc_obj_data_t); - data = (opal_hwloc_obj_data_t *) pu->userdata; - } - data->npus++; - } - break; - case 2: - /* range given */ - start = strtoul(range[0], NULL, 10); - end = strtoul(range[1], NULL, 10); - for (cpu = start; cpu <= end; cpu++) { - if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { -#if HWLOC_API_VERSION < 0x20000 - hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); + hwloc_bitmap_and(avail, root->online_cpuset, root->allowed_cpuset); + } #else - hwloc_bitmap_free(pucpus); - pucpus = hwloc_bitmap_dup(pu->cpuset); + avail = hwloc_bitmap_dup(root->cpuset); #endif - hwloc_bitmap_or(res, avail, pucpus); - hwloc_bitmap_copy(avail, res); - data = (opal_hwloc_obj_data_t *) pu->userdata; - if (NULL == data) { - pu->userdata = (void *) OBJ_NEW(opal_hwloc_obj_data_t); - data = (opal_hwloc_obj_data_t *) pu->userdata; - } - data->npus++; - } - } - break; - default: - break; - } - opal_argv_free(range); - } - if (NULL != ranges) { - opal_argv_free(ranges); - } - hwloc_bitmap_free(res); - hwloc_bitmap_free(pucpus); - } - /* cache this info */ sum->available = avail; return OPAL_SUCCESS; } -static void fill_cache_line_size(void) +/** + * Initializes opal_hwloc_my_cpuset (global variable in + * opal/mca/hwloc/hwloc-internal.h) for this process. opal_hwloc_my_cpuset + * will be loaded with this process' binding, or, if the process is + * not bound, use the hwloc root object's (available and online) + * cpuset. + */ +static void opal_hwloc_base_set_local_cpuset(void) { - int i = 0, cache_level = 2; - unsigned size; - unsigned int cache_object = HWLOC_OBJ_L2CACHE; - hwloc_obj_t obj; - bool found = false; + hwloc_obj_t root; - /* Look for the smallest L2 cache size */ - size = 4096; - while (cache_level > 0 && !found) { - i = 0; - while (1) { - obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, cache_object, cache_level, i, - OPAL_HWLOC_LOGICAL); - if (NULL == obj) { - --cache_level; - cache_object = HWLOC_OBJ_L1CACHE; - break; - } else { - if (NULL != obj->attr && obj->attr->cache.linesize > 0 - && size > obj->attr->cache.linesize) { - size = obj->attr->cache.linesize; - found = true; - } - } - ++i; + if (NULL != opal_hwloc_topology) { + if (NULL == opal_hwloc_my_cpuset) { + opal_hwloc_my_cpuset = hwloc_bitmap_alloc(); } - } - /* If we found an L2 cache size in the hwloc data, save it in - opal_cache_line_size. Otherwise, we'll leave whatever default - was set in opal_init.c */ - if (found) { - opal_cache_line_size = (int) size; + /* get the cpus we are bound to */ + if (hwloc_get_cpubind(opal_hwloc_topology, opal_hwloc_my_cpuset, HWLOC_CPUBIND_PROCESS) + < 0) { + /* we are not bound - use the root's available cpuset */ + root = hwloc_get_root_obj(opal_hwloc_topology); + hwloc_bitmap_copy(opal_hwloc_my_cpuset, root->cpuset); + } } } @@ -290,17 +208,6 @@ int opal_hwloc_base_get_topology(void) wildcard_rank.jobid = OPAL_PROC_MY_NAME.jobid; wildcard_rank.vpid = OPAL_VPID_WILDCARD; - // Did the user ask for a topo file at the mca line? - // Check this first, before main methods. - if (NULL != opal_hwloc_base_topo_file) { - opal_output_verbose(1, opal_hwloc_base_framework.framework_output, - "hwloc:base loading topology from file %s", opal_hwloc_base_topo_file); - if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) { - return rc; - } - goto done; - } - #if HWLOC_API_VERSION >= 0x20000 opal_output_verbose(2, opal_hwloc_base_framework.framework_output, "hwloc:base: looking for topology in shared memory"); @@ -342,7 +249,7 @@ int opal_hwloc_base_get_topology(void) } else { opal_output_verbose(2, opal_hwloc_base_framework.framework_output, "hwloc:base: topology in shared memory"); - topo_in_shmem = true; + opal_hwloc_topo_in_shmem = true; goto done; } } @@ -394,11 +301,6 @@ int opal_hwloc_base_get_topology(void) goto discover; } free(val); - /* filter the cpus thru any default cpu set */ - if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) { - hwloc_topology_destroy(opal_hwloc_topology); - return rc; - } } else { discover: opal_output_verbose(1, opal_hwloc_base_framework.framework_output, @@ -422,173 +324,14 @@ int opal_hwloc_base_get_topology(void) line size */ fill_cache_line_size(); - /* get or update our local cpuset - it will get used multiple - * times, so it's more efficient to keep a global copy + /* Set or update our local cpuset - it could get used multiple + * times, so it's more efficient to keep a global copy. */ - opal_hwloc_base_get_local_cpuset(); - - return OPAL_SUCCESS; -} - -int opal_hwloc_base_set_topology(char *topofile) -{ - struct hwloc_topology_support *support; - - OPAL_OUTPUT_VERBOSE( - (5, opal_hwloc_base_framework.framework_output, "hwloc:base:set_topology %s", topofile)); - - if (NULL != opal_hwloc_topology) { - hwloc_topology_destroy(opal_hwloc_topology); - } - if (0 != hwloc_topology_init(&opal_hwloc_topology)) { - return OPAL_ERR_NOT_SUPPORTED; - } - if (0 != hwloc_topology_set_xml(opal_hwloc_topology, topofile)) { - hwloc_topology_destroy(opal_hwloc_topology); - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "hwloc:base:set_topology bad topo file")); - return OPAL_ERR_NOT_SUPPORTED; - } - /* since we are loading this from an external source, we have to - * explicitly set a flag so hwloc sets things up correctly - */ - if (0 - != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, - HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, true)) { - hwloc_topology_destroy(opal_hwloc_topology); - return OPAL_ERR_NOT_SUPPORTED; - } - if (0 != hwloc_topology_load(opal_hwloc_topology)) { - hwloc_topology_destroy(opal_hwloc_topology); - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "hwloc:base:set_topology failed to load")); - return OPAL_ERR_NOT_SUPPORTED; - } - - /* unfortunately, hwloc does not include support info in its - * xml output :-(( We default to assuming it is present as - * systems that use this option are likely to provide - * binding support - */ - support = (struct hwloc_topology_support *) hwloc_topology_get_support(opal_hwloc_topology); - support->cpubind->set_thisproc_cpubind = true; - support->membind->set_thisproc_membind = true; - - /* fill opal_cache_line_size global with the smallest L1 cache - line size */ - fill_cache_line_size(); - - /* all done */ - return OPAL_SUCCESS; -} - -static void free_object(hwloc_obj_t obj) -{ - opal_hwloc_obj_data_t *data; - unsigned k; - - /* free any data hanging on this object */ - if (NULL != obj->userdata) { - data = (opal_hwloc_obj_data_t *) obj->userdata; - OBJ_RELEASE(data); - obj->userdata = NULL; - } - - /* loop thru our children */ - for (k = 0; k < obj->arity; k++) { - free_object(obj->children[k]); - } -} - -void opal_hwloc_base_free_topology(hwloc_topology_t topo) -{ - hwloc_obj_t obj; - opal_hwloc_topo_data_t *rdata; - unsigned k; - - if (!topo_in_shmem) { - obj = hwloc_get_root_obj(topo); - /* release the root-level userdata */ - if (NULL != obj->userdata) { - rdata = (opal_hwloc_topo_data_t *) obj->userdata; - OBJ_RELEASE(rdata); - obj->userdata = NULL; - } - /* now recursively descend and release userdata - * in the rest of the objects - */ - for (k = 0; k < obj->arity; k++) { - free_object(obj->children[k]); - } - } - hwloc_topology_destroy(topo); -} - -void opal_hwloc_base_get_local_cpuset(void) -{ - hwloc_obj_t root; - - if (NULL != opal_hwloc_topology) { - if (NULL == opal_hwloc_my_cpuset) { - opal_hwloc_my_cpuset = hwloc_bitmap_alloc(); - } - - /* get the cpus we are bound to */ - if (hwloc_get_cpubind(opal_hwloc_topology, opal_hwloc_my_cpuset, HWLOC_CPUBIND_PROCESS) - < 0) { - /* we are not bound - use the root's available cpuset */ - root = hwloc_get_root_obj(opal_hwloc_topology); - hwloc_bitmap_copy(opal_hwloc_my_cpuset, root->cpuset); - } - } -} - -int opal_hwloc_base_report_bind_failure(const char *file, int line, const char *msg, int rc) -{ - static int already_reported = 0; - - if (!already_reported && OPAL_HWLOC_BASE_MBFA_SILENT != opal_hwloc_base_mbfa) { - char hostname[OPAL_MAXHOSTNAMELEN]; - gethostname(hostname, sizeof(hostname)); - - opal_show_help( - "help-opal-hwloc-base.txt", "mbind failure", true, hostname, getpid(), file, line, msg, - (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa) - ? "Warning -- your job will continue, but possibly with degraded performance" - : "ERROR -- your job may abort or behave erraticly"); - already_reported = 1; - return rc; - } + opal_hwloc_base_set_local_cpuset(); return OPAL_SUCCESS; } -/* determine if there is a single cpu in a bitmap */ -bool opal_hwloc_base_single_cpu(hwloc_cpuset_t cpuset) -{ - int i; - bool one = false; - - /* count the number of bits that are set - there is - * one bit for each available pu. We could just - * subtract the first and last indices, but there - * may be "holes" in the bitmap corresponding to - * offline or unallowed cpus - so we have to - * search for them. Return false if we anything - * other than one - */ - for (i = hwloc_bitmap_first(cpuset); i <= hwloc_bitmap_last(cpuset); i++) { - if (hwloc_bitmap_isset(cpuset, i)) { - if (one) { - return false; - } - one = true; - } - } - - return one; -} - /* hwloc treats cache objects as special * cases. Instead of having a unique type for each cache level, * there is a single cache object type, and the level is encoded @@ -657,7 +400,7 @@ static hwloc_obj_t df_search(hwloc_topology_t topo, hwloc_obj_t start, hwloc_obj hwloc_obj_t root; opal_hwloc_topo_data_t *rdata = NULL; root = hwloc_get_root_obj(topo); - if (false == topo_in_shmem) { + if (false == opal_hwloc_topo_in_shmem) { rdata = (opal_hwloc_topo_data_t *) root->userdata; } hwloc_cpuset_t constrained_cpuset; @@ -729,12 +472,12 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, hwloc_obj obj = hwloc_get_root_obj(topo); /* first see if the topology already has this summary */ - if (false == topo_in_shmem) { + if (false == opal_hwloc_topo_in_shmem) { data = (opal_hwloc_topo_data_t *) obj->userdata; } if (NULL == data) { data = OBJ_NEW(opal_hwloc_topo_data_t); - if (false == topo_in_shmem) { + if (false == opal_hwloc_topo_in_shmem) { // Can't touch userdata if in read-only shmem! // We have to protect here for the case where obj->userdata // is in shmem and it is NULL. @@ -770,7 +513,7 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, hwloc_obj return num_objs; } -/* as above, only return the Nth instance of the specified object +/* Return the Nth instance of the specified object * type from inside the topology */ hwloc_obj_t opal_hwloc_base_get_obj_by_type(hwloc_topology_t topo, hwloc_obj_type_t target, @@ -801,459 +544,6 @@ hwloc_obj_t opal_hwloc_base_get_obj_by_type(hwloc_topology_t topo, hwloc_obj_typ return df_search(topo, obj, target, cache_level, instance, rtype, NULL); } -static void opal_hwloc_base_get_relative_locality_by_depth(hwloc_topology_t topo, unsigned d, - hwloc_cpuset_t loc1, hwloc_cpuset_t loc2, - opal_hwloc_locality_t *locality, - bool *shared) -{ - unsigned width, w; - hwloc_obj_t obj; - int sect1, sect2; - - /* get the width of the topology at this depth */ - width = hwloc_get_nbobjs_by_depth(topo, d); - - /* scan all objects at this depth to see if - * our locations overlap with them - */ - for (w = 0; w < width; w++) { - /* get the object at this depth/index */ - obj = hwloc_get_obj_by_depth(topo, d, w); - /* see if our locations intersect with the cpuset for this obj */ - sect1 = hwloc_bitmap_intersects(obj->cpuset, loc1); - sect2 = hwloc_bitmap_intersects(obj->cpuset, loc2); - /* if both intersect, then we share this level */ - if (sect1 && sect2) { - *shared = true; - switch (obj->type) { - case HWLOC_OBJ_NODE: - *locality |= OPAL_PROC_ON_NUMA; - break; - case HWLOC_OBJ_SOCKET: - *locality |= OPAL_PROC_ON_SOCKET; - break; -#if HWLOC_API_VERSION < 0x20000 - case HWLOC_OBJ_CACHE: - if (3 == obj->attr->cache.depth) { - *locality |= OPAL_PROC_ON_L3CACHE; - } else if (2 == obj->attr->cache.depth) { - *locality |= OPAL_PROC_ON_L2CACHE; - } else { - *locality |= OPAL_PROC_ON_L1CACHE; - } - break; -#else - case HWLOC_OBJ_L3CACHE: - *locality |= OPAL_PROC_ON_L3CACHE; - break; - case HWLOC_OBJ_L2CACHE: - *locality |= OPAL_PROC_ON_L2CACHE; - break; - case HWLOC_OBJ_L1CACHE: - *locality |= OPAL_PROC_ON_L1CACHE; - break; -#endif - case HWLOC_OBJ_CORE: - *locality |= OPAL_PROC_ON_CORE; - break; - case HWLOC_OBJ_PU: - *locality |= OPAL_PROC_ON_HWTHREAD; - break; - default: - /* just ignore it */ - break; - } - break; - } - /* otherwise, we don't share this - * object - but we still might share another object - * on this level, so we have to keep searching - */ - } -} - -opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo, char *cpuset1, - char *cpuset2) -{ - opal_hwloc_locality_t locality; - hwloc_cpuset_t loc1, loc2; - unsigned depth, d; - bool shared; - hwloc_obj_type_t type; - - /* start with what we know - they share a node on a cluster - * NOTE: we may alter that latter part as hwloc's ability to - * sense multi-cu, multi-cluster systems grows - */ - locality = OPAL_PROC_ON_NODE | OPAL_PROC_ON_HOST | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER; - - /* if either cpuset is NULL, then that isn't bound */ - if (NULL == cpuset1 || NULL == cpuset2) { - return locality; - } - - /* get the max depth of the topology */ - depth = hwloc_topology_get_depth(topo); - - /* convert the strings to cpusets */ - loc1 = hwloc_bitmap_alloc(); - hwloc_bitmap_list_sscanf(loc1, cpuset1); - loc2 = hwloc_bitmap_alloc(); - hwloc_bitmap_list_sscanf(loc2, cpuset2); - - /* start at the first depth below the top machine level */ - for (d = 1; d < depth; d++) { - shared = false; - /* get the object type at this depth */ - type = hwloc_get_depth_type(topo, d); - /* if it isn't one of interest, then ignore it */ - if (HWLOC_OBJ_NODE != type && HWLOC_OBJ_SOCKET != type && -#if HWLOC_API_VERSION < 0x20000 - HWLOC_OBJ_CACHE != type && -#else - HWLOC_OBJ_L3CACHE != type && HWLOC_OBJ_L2CACHE != type && HWLOC_OBJ_L1CACHE != type && -#endif - HWLOC_OBJ_CORE != type && HWLOC_OBJ_PU != type) { - continue; - } - opal_hwloc_base_get_relative_locality_by_depth(topo, d, loc1, loc2, &locality, &shared); - - /* if we spanned the entire width without finding - * a point of intersection, then no need to go - * deeper - */ - if (!shared) { - break; - } - } -#if HWLOC_API_VERSION >= 0x20000 - opal_hwloc_base_get_relative_locality_by_depth(topo, HWLOC_TYPE_DEPTH_NUMANODE, loc1, loc2, - &locality, &shared); -#endif - - opal_output_verbose(5, opal_hwloc_base_framework.framework_output, "locality: %s", - opal_hwloc_base_print_locality(locality)); - hwloc_bitmap_free(loc1); - hwloc_bitmap_free(loc2); - - return locality; -} - -char *opal_hwloc_base_print_binding(opal_binding_policy_t binding) -{ - char *ret, *bind; - opal_hwloc_print_buffers_t *ptr; - - switch (OPAL_GET_BINDING_POLICY(binding)) { - case OPAL_BIND_TO_NONE: - bind = "NONE"; - break; - case OPAL_BIND_TO_BOARD: - bind = "BOARD"; - break; - case OPAL_BIND_TO_NUMA: - bind = "NUMA"; - break; - case OPAL_BIND_TO_SOCKET: - bind = "SOCKET"; - break; - case OPAL_BIND_TO_L3CACHE: - bind = "L3CACHE"; - break; - case OPAL_BIND_TO_L2CACHE: - bind = "L2CACHE"; - break; - case OPAL_BIND_TO_L1CACHE: - bind = "L1CACHE"; - break; - case OPAL_BIND_TO_CORE: - bind = "CORE"; - break; - case OPAL_BIND_TO_HWTHREAD: - bind = "HWTHREAD"; - break; - case OPAL_BIND_TO_CPUSET: - bind = "CPUSET"; - break; - default: - bind = "UNKNOWN"; - } - ptr = opal_hwloc_get_print_buffer(); - if (NULL == ptr) { - return opal_hwloc_print_null; - } - /* cycle around the ring */ - if (OPAL_HWLOC_PRINT_NUM_BUFS == ptr->cntr) { - ptr->cntr = 0; - } - if (!OPAL_BINDING_REQUIRED(binding) && OPAL_BIND_OVERLOAD_ALLOWED(binding)) { - snprintf(ptr->buffers[ptr->cntr], OPAL_HWLOC_PRINT_MAX_SIZE, - "%s:IF-SUPPORTED:OVERLOAD-ALLOWED", bind); - } else if (OPAL_BIND_OVERLOAD_ALLOWED(binding)) { - snprintf(ptr->buffers[ptr->cntr], OPAL_HWLOC_PRINT_MAX_SIZE, "%s:OVERLOAD-ALLOWED", bind); - } else if (!OPAL_BINDING_REQUIRED(binding)) { - snprintf(ptr->buffers[ptr->cntr], OPAL_HWLOC_PRINT_MAX_SIZE, "%s:IF-SUPPORTED", bind); - } else { - snprintf(ptr->buffers[ptr->cntr], OPAL_HWLOC_PRINT_MAX_SIZE, "%s", bind); - } - ret = ptr->buffers[ptr->cntr]; - ptr->cntr++; - - return ret; -} - -/* - * Turn an int bitmap to a "a-b,c" range kind of string - */ -static char *bitmap2rangestr(int bitmap) -{ - size_t i; - int range_start, range_end; - bool first, isset; - char tmp[BUFSIZ]; - const int stmp = sizeof(tmp) - 1; - static char ret[BUFSIZ]; - - memset(ret, 0, sizeof(ret)); - - first = true; - range_start = -999; - for (i = 0; i < sizeof(int) * 8; ++i) { - isset = (bitmap & (1 << i)); - - /* Do we have a running range? */ - if (range_start >= 0) { - if (isset) { - continue; - } else { - /* A range just ended; output it */ - if (!first) { - strncat(ret, ",", sizeof(ret) - strlen(ret) - 1); - } else { - first = false; - } - - range_end = i - 1; - if (range_start == range_end) { - snprintf(tmp, stmp, "%d", range_start); - } else { - snprintf(tmp, stmp, "%d-%d", range_start, range_end); - } - strncat(ret, tmp, sizeof(ret) - strlen(ret) - 1); - - range_start = -999; - } - } - - /* No running range */ - else { - if (isset) { - range_start = i; - } - } - } - - /* If we ended the bitmap with a range open, output it */ - if (range_start >= 0) { - if (!first) { - strncat(ret, ",", sizeof(ret) - strlen(ret) - 1); - first = false; - } - - range_end = i - 1; - if (range_start == range_end) { - snprintf(tmp, stmp, "%d", range_start); - } else { - snprintf(tmp, stmp, "%d-%d", range_start, range_end); - } - strncat(ret, tmp, sizeof(ret) - strlen(ret) - 1); - } - - return ret; -} - -/* - * Make a map of socket/core/hwthread tuples - */ -static int build_map(int *num_sockets_arg, int *num_cores_arg, hwloc_cpuset_t cpuset, int ***map, - hwloc_topology_t topo) -{ - int num_sockets, num_cores; - int socket_index, core_index, pu_index; - hwloc_obj_t socket, core, pu; - int **data; - - /* Find out how many sockets we have */ - num_sockets = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET); - /* some systems (like the iMac) only have one - * socket and so don't report a socket - */ - if (0 == num_sockets) { - num_sockets = 1; - } - /* Lazy: take the total number of cores that we have in the - topology; that'll be more than the max number of cores - under any given socket */ - num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); - *num_sockets_arg = num_sockets; - *num_cores_arg = num_cores; - - /* Alloc a 2D array: sockets x cores. */ - data = malloc(num_sockets * sizeof(int *)); - if (NULL == data) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - data[0] = calloc(num_sockets * num_cores, sizeof(int)); - if (NULL == data[0]) { - free(data); - return OPAL_ERR_OUT_OF_RESOURCE; - } - for (socket_index = 1; socket_index < num_sockets; ++socket_index) { - data[socket_index] = data[socket_index - 1] + num_cores; - } - - /* Iterate the PUs in this cpuset; fill in the data[][] array with - the socket/core/pu triples */ - for (pu_index = 0, - pu = hwloc_get_obj_inside_cpuset_by_type(topo, cpuset, HWLOC_OBJ_PU, pu_index); - NULL != pu; - pu = hwloc_get_obj_inside_cpuset_by_type(topo, cpuset, HWLOC_OBJ_PU, ++pu_index)) { - /* Go upward and find the core this PU belongs to */ - core = pu; - while (NULL != core && core->type != HWLOC_OBJ_CORE) { - core = core->parent; - } - core_index = 0; - if (NULL != core) { - core_index = core->logical_index; - } - - /* Go upward and find the socket this PU belongs to */ - socket = pu; - while (NULL != socket && socket->type != HWLOC_OBJ_SOCKET) { - socket = socket->parent; - } - socket_index = 0; - if (NULL != socket) { - socket_index = socket->logical_index; - } - - /* Save this socket/core/pu combo. LAZY: Assuming that we - won't have more PU's per core than (sizeof(int)*8). */ - data[socket_index][core_index] |= (1 << pu->sibling_rank); - } - - *map = data; - return OPAL_SUCCESS; -} - -/* - * Make a prettyprint string for a hwloc_cpuset_t - */ -int opal_hwloc_base_cset2str(char *str, int len, hwloc_topology_t topo, hwloc_cpuset_t cpuset) -{ - bool first; - int num_sockets, num_cores; - int ret, socket_index, core_index; - char tmp[BUFSIZ]; - const int stmp = sizeof(tmp) - 1; - int **map = NULL; - - str[0] = tmp[stmp] = '\0'; - - /* if the cpuset is all zero, then not bound */ - if (hwloc_bitmap_iszero(cpuset)) { - return OPAL_ERR_NOT_BOUND; - } - - if (OPAL_SUCCESS != (ret = build_map(&num_sockets, &num_cores, cpuset, &map, topo))) { - return ret; - } - /* Iterate over the data matrix and build up the string */ - first = true; - for (socket_index = 0; socket_index < num_sockets; ++socket_index) { - for (core_index = 0; core_index < num_cores; ++core_index) { - if (map[socket_index][core_index] > 0) { - if (!first) { - strncat(str, ", ", len - strlen(str) - 1); - } - first = false; - - snprintf(tmp, stmp, "socket %d[core %d[hwt %s]]", socket_index, core_index, - bitmap2rangestr(map[socket_index][core_index])); - strncat(str, tmp, len - strlen(str) - 1); - } - } - } - if (NULL != map) { - if (NULL != map[0]) { - free(map[0]); - } - free(map); - } - - return OPAL_SUCCESS; -} - -/* - * Make a prettyprint string for a cset in a map format. - * Example: [B./..] - * Key: [] - signifies socket - * / - divider between cores - * . - signifies PU a process not bound to - * B - signifies PU a process is bound to - */ -int opal_hwloc_base_cset2mapstr(char *str, int len, hwloc_topology_t topo, hwloc_cpuset_t cpuset) -{ - char tmp[BUFSIZ]; - int core_index, pu_index; - const int stmp = sizeof(tmp) - 1; - hwloc_obj_t socket, core, pu; - - str[0] = tmp[stmp] = '\0'; - - /* if the cpuset is all zero, then not bound */ - if (hwloc_bitmap_iszero(cpuset)) { - return OPAL_ERR_NOT_BOUND; - } - - /* Iterate over all existing sockets */ - for (socket = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0); NULL != socket; - socket = socket->next_cousin) { - strncat(str, "[", len - strlen(str) - 1); - - /* Iterate over all existing cores in this socket */ - core_index = 0; - for (core = hwloc_get_obj_inside_cpuset_by_type(topo, socket->cpuset, HWLOC_OBJ_CORE, - core_index); - NULL != core; - core = hwloc_get_obj_inside_cpuset_by_type(topo, socket->cpuset, HWLOC_OBJ_CORE, - ++core_index)) { - if (core_index > 0) { - strncat(str, "/", len - strlen(str) - 1); - } - - /* Iterate over all existing PUs in this core */ - pu_index = 0; - for (pu = hwloc_get_obj_inside_cpuset_by_type(topo, core->cpuset, HWLOC_OBJ_PU, - pu_index); - NULL != pu; pu = hwloc_get_obj_inside_cpuset_by_type(topo, core->cpuset, - HWLOC_OBJ_PU, ++pu_index)) { - - /* Is this PU in the cpuset? */ - if (hwloc_bitmap_isset(cpuset, pu->os_index)) { - strncat(str, "B", len - strlen(str) - 1); - } else { - strncat(str, ".", len - strlen(str) - 1); - } - } - } - strncat(str, "]", len - strlen(str) - 1); - } - - return OPAL_SUCCESS; -} - char *opal_hwloc_base_get_location(char *locality, hwloc_obj_type_t type, unsigned index) { char **loc; @@ -1376,18 +666,3 @@ opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc hwloc_bitmap_free(bit2); return locality; } - -int opal_hwloc_base_topology_set_flags(hwloc_topology_t topology, unsigned long flags, bool io) -{ - if (io) { -#if HWLOC_API_VERSION < 0x20000 - flags |= HWLOC_TOPOLOGY_FLAG_IO_DEVICES; -#else - int ret = hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); - if (0 != ret) { - return ret; - } -#endif - } - return hwloc_topology_set_flags(topology, flags); -} diff --git a/opal/mca/hwloc/hwloc-internal.h b/opal/mca/hwloc/hwloc-internal.h index 5407afaf5b6..ea160dc3b45 100644 --- a/opal/mca/hwloc/hwloc-internal.h +++ b/opal/mca/hwloc/hwloc-internal.h @@ -188,59 +188,9 @@ typedef struct { } opal_hwloc_topo_data_t; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_hwloc_topo_data_t); -/* define binding policies */ -typedef uint16_t opal_binding_policy_t; -#define OPAL_BINDING_POLICY OPAL_UINT16 - -/* binding directives */ -#define OPAL_BIND_IF_SUPPORTED 0x1000 -/* allow assignment of multiple procs to - * same cpu */ -#define OPAL_BIND_ALLOW_OVERLOAD 0x2000 -/* the binding policy was specified by the user */ -#define OPAL_BIND_GIVEN 0x4000 -/* bind each rank to the cpu in the given - * cpu list based on its node-local-rank */ -#define OPAL_BIND_ORDERED 0x8000 - -/* - * binding policies - */ -#define OPAL_BIND_TO_NONE 1 -#define OPAL_BIND_TO_BOARD 2 -#define OPAL_BIND_TO_NUMA 3 -#define OPAL_BIND_TO_SOCKET 4 -#define OPAL_BIND_TO_L3CACHE 5 -#define OPAL_BIND_TO_L2CACHE 6 -#define OPAL_BIND_TO_L1CACHE 7 -#define OPAL_BIND_TO_CORE 8 -#define OPAL_BIND_TO_HWTHREAD 9 -#define OPAL_BIND_TO_CPUSET 10 -#define OPAL_GET_BINDING_POLICY(pol) ((pol) &0x0fff) -#define OPAL_SET_BINDING_POLICY(target, pol) \ - (target) = (pol) | (((target) &0xf000) | OPAL_BIND_GIVEN) -#define OPAL_SET_DEFAULT_BINDING_POLICY(target, pol) \ - do { \ - if (!OPAL_BINDING_POLICY_IS_SET((target))) { \ - (target) = (pol) | (((target) &0xf000) | OPAL_BIND_IF_SUPPORTED); \ - } \ - } while (0); - -/* check if policy is set */ -#define OPAL_BINDING_POLICY_IS_SET(pol) ((pol) &0x4000) -/* macro to detect if binding was qualified */ -#define OPAL_BINDING_REQUIRED(n) (!(OPAL_BIND_IF_SUPPORTED & (n))) -/* macro to detect if binding is forced */ -#define OPAL_BIND_OVERLOAD_ALLOWED(n) (OPAL_BIND_ALLOW_OVERLOAD & (n)) -#define OPAL_BIND_ORDERED_REQUESTED(n) (OPAL_BIND_ORDERED & (n)) - /* some global values */ OPAL_DECLSPEC extern hwloc_topology_t opal_hwloc_topology; -OPAL_DECLSPEC extern opal_binding_policy_t opal_hwloc_binding_policy; OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_my_cpuset; -OPAL_DECLSPEC extern bool opal_hwloc_report_bindings; -OPAL_DECLSPEC extern hwloc_obj_type_t opal_hwloc_levels[]; -OPAL_DECLSPEC extern bool opal_hwloc_use_hwthreads_as_cpus; END_C_DECLS diff --git a/opal/mca/memory/patcher/memory_patcher_component.c b/opal/mca/memory/patcher/memory_patcher_component.c index d48bf65f716..f1321ba1ab8 100644 --- a/opal/mca/memory/patcher/memory_patcher_component.c +++ b/opal/mca/memory/patcher/memory_patcher_component.c @@ -303,7 +303,7 @@ static int _intercept_madvise(void *start, size_t length, int advice) advice == MADV_REMOVE || # endif advice == POSIX_MADV_DONTNEED) { - opal_mem_hooks_release_hook(start, length, false); + opal_mem_hooks_release_hook(start, length, true); } if (!original_madvise) { @@ -607,7 +607,7 @@ static int patcher_open(void) rc = opal_patcher->patch_symbol("mmap", (uintptr_t) intercept_mmap, (uintptr_t *) &original_mmap); if (OPAL_SUCCESS != rc) { - return rc; + goto err_patching; } #endif @@ -615,7 +615,7 @@ static int patcher_open(void) rc = opal_patcher->patch_symbol("munmap", (uintptr_t) intercept_munmap, (uintptr_t *) &original_munmap); if (OPAL_SUCCESS != rc) { - return rc; + goto err_patching; } #endif @@ -623,7 +623,7 @@ static int patcher_open(void) rc = opal_patcher->patch_symbol("mremap", (uintptr_t) intercept_mremap, (uintptr_t *) &original_mremap); if (OPAL_SUCCESS != rc) { - return rc; + goto err_patching; } #endif @@ -631,7 +631,7 @@ static int patcher_open(void) rc = opal_patcher->patch_symbol("madvise", (uintptr_t) intercept_madvise, (uintptr_t *) &original_madvise); if (OPAL_SUCCESS != rc) { - return rc; + goto err_patching; } #endif @@ -640,7 +640,7 @@ static int patcher_open(void) rc = opal_patcher->patch_symbol("shmat", (uintptr_t) intercept_shmat, (uintptr_t *) &original_shmat); if (OPAL_SUCCESS != rc) { - return rc; + goto err_patching; } # endif #endif @@ -650,7 +650,7 @@ static int patcher_open(void) rc = opal_patcher->patch_symbol("shmdt", (uintptr_t) intercept_shmdt, (uintptr_t *) &original_shmdt); if (OPAL_SUCCESS != rc) { - return rc; + goto err_patching; } # endif #endif @@ -659,6 +659,18 @@ static int patcher_open(void) rc = opal_patcher->patch_symbol("brk", (uintptr_t) intercept_brk, (uintptr_t *) &original_brk); #endif + if (OPAL_SUCCESS != rc) { + goto err_patching; + } + + return OPAL_SUCCESS; + +err_patching: + /* In the case we had a problem patching, set this flag to 0 so we do not + directly return OPAL_SUCCESS if we call patcher_open() again. */ + was_executed_already = 0; + opal_patcher_base_restore_all(); + return rc; } diff --git a/opal/mca/mpool/base/mpool_base_alloc.c b/opal/mca/mpool/base/mpool_base_alloc.c index 1bd7dfc4445..09e5ae07edf 100644 --- a/opal/mca/mpool/base/mpool_base_alloc.c +++ b/opal/mca/mpool/base/mpool_base_alloc.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -62,26 +62,40 @@ void *mca_mpool_base_alloc(size_t size, opal_info_t *info, const char *hints) mca_mpool_base_tree_item_t *mpool_tree_item = NULL; mca_mpool_base_module_t *mpool; void *mem = NULL; -#if defined(TODO_BTL_GB) - int flag = 0; -#endif /* defined(TODO_BTL_GB) */ + opal_cstring_t *align_info_str; + long long memory_alignment = OPAL_ALIGN_MIN; mpool_tree_item = mca_mpool_base_tree_item_get(); if (!mpool_tree_item) { return NULL; } + if (NULL != info) { + int flag; + opal_info_get(info, "mpi_minimum_memory_alignment", + &align_info_str, &flag); + + if (flag) { + long long tmp_align = atoll(align_info_str->string); + OBJ_RELEASE(align_info_str); + if (tmp_align > memory_alignment) { + memory_alignment = tmp_align; + } + } + } + mpool_tree_item->num_bytes = size; mpool_tree_item->count = 0; mpool = mca_mpool_base_module_lookup(hints); if (NULL != mpool) { - mem = mpool->mpool_alloc(mpool, size, OPAL_ALIGN_MIN, 0); + mem = mpool->mpool_alloc (mpool, size, memory_alignment, 0); } if (NULL == mem) { - /* fall back on malloc */ - mem = malloc(size); + /* fall back to default mpool */ + mem = mca_mpool_base_default_module->mpool_alloc(mca_mpool_base_default_module, + size, memory_alignment, 0); mca_mpool_base_tree_item_put(mpool_tree_item); } else { @@ -114,7 +128,7 @@ int mca_mpool_base_free(void *base) if (!mpool_tree_item) { /* nothing in the tree this was just plain old malloc'd memory */ - free(base); + mca_mpool_base_default_module->mpool_free(mca_mpool_base_default_module, base); return OPAL_SUCCESS; } diff --git a/opal/mca/mpool/base/mpool_base_default.c b/opal/mca/mpool/base/mpool_base_default.c index e2251330846..e3816b1192f 100644 --- a/opal/mca/mpool/base/mpool_base_default.c +++ b/opal/mca/mpool/base/mpool_base_default.c @@ -34,7 +34,12 @@ static void *mca_mpool_default_alloc(mca_mpool_base_module_t *mpool, size_t size if (align <= sizeof(void *)) { addr = malloc(size); } else { - (void) posix_memalign(&addr, align, size); + int ret = posix_memalign(&addr, align, size); + if (ret < 0) { + /* old systems may not guarantee that addr wasn't modified + on failure */ + addr = NULL; + } } return addr; #else diff --git a/opal/mca/mpool/hugepage/mpool_hugepage_module.c b/opal/mca/mpool/hugepage/mpool_hugepage_module.c index 874e8996fd1..338dc92fb50 100644 --- a/opal/mca/mpool/hugepage/mpool_hugepage_module.c +++ b/opal/mca/mpool/hugepage/mpool_hugepage_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -245,11 +245,11 @@ static void mca_mpool_hugepage_finalize(struct mca_mpool_base_module_t *mpool) { mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool; - OBJ_DESTRUCT(&hugepage_module->lock); - OBJ_DESTRUCT(&hugepage_module->allocation_tree); - if (hugepage_module->allocator) { (void) hugepage_module->allocator->alc_finalize(hugepage_module->allocator); hugepage_module->allocator = NULL; } + OBJ_DESTRUCT(&hugepage_module->lock); + OBJ_DESTRUCT(&hugepage_module->allocation_tree); + } diff --git a/opal/mca/patcher/base/base.h b/opal/mca/patcher/base/base.h index cbc32120e37..66f4ce8516a 100644 --- a/opal/mca/patcher/base/base.h +++ b/opal/mca/patcher/base/base.h @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +26,7 @@ #define OPAL_PATCHER_BASE_H #include "opal_config.h" +#include "opal/opal_portable_platform.h" #include "opal/mca/base/mca_base_framework.h" #include "opal/mca/patcher/patcher.h" @@ -63,12 +66,13 @@ OBJ_CLASS_DECLARATION(mca_patcher_base_patch_t); */ OPAL_DECLSPEC extern mca_base_framework_t opal_patcher_base_framework; OPAL_DECLSPEC int opal_patcher_base_select(void); +OPAL_DECLSPEC int opal_patcher_base_restore_all(void); OPAL_DECLSPEC int mca_patcher_base_patch_hook(mca_patcher_base_module_t *module, uintptr_t hook); OPAL_DECLSPEC void mca_base_patcher_patch_apply_binary(mca_patcher_base_patch_t *patch); static inline uintptr_t mca_patcher_base_addr_text(uintptr_t addr) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) && (!defined(_CALL_ELF) || (_CALL_ELF != 2)) +#if defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) && (!defined(_CALL_ELF) || (_CALL_ELF != 2)) struct odp_t { uintptr_t text; uintptr_t toc; diff --git a/opal/mca/patcher/base/patcher_base_frame.c b/opal/mca/patcher/base/patcher_base_frame.c index d44ffc78e42..6229f6aed80 100644 --- a/opal/mca/patcher/base/patcher_base_frame.c +++ b/opal/mca/patcher/base/patcher_base_frame.c @@ -54,17 +54,35 @@ int opal_patcher_base_select(void) return OPAL_SUCCESS; } -static int opal_patcher_base_close(void) +int opal_patcher_base_restore_all(void) { + mca_patcher_base_patch_t *patch, *patch_next; + if (opal_patcher == &empty_module) { return OPAL_SUCCESS; } - mca_patcher_base_patch_t *patch; - OPAL_LIST_FOREACH_REV (patch, &opal_patcher->patch_list, mca_patcher_base_patch_t) { - patch->patch_restore(patch); + opal_mutex_lock(&opal_patcher->patch_list_mutex); + + OPAL_LIST_FOREACH_SAFE_REV(patch, patch_next, &opal_patcher->patch_list, mca_patcher_base_patch_t) { + patch->patch_restore (patch); + opal_list_remove_item(&opal_patcher->patch_list, &patch->super); + OBJ_RELEASE(patch); + } + + opal_mutex_unlock(&opal_patcher->patch_list_mutex); + + return OPAL_SUCCESS; +} + +static int opal_patcher_base_close(void) +{ + if (opal_patcher == &empty_module) { + return OPAL_SUCCESS; } + opal_patcher_base_restore_all(); + OPAL_LIST_DESTRUCT(&opal_patcher->patch_list); OBJ_DESTRUCT(&opal_patcher->patch_list_mutex); diff --git a/opal/mca/patcher/base/patcher_base_patch.c b/opal/mca/patcher/base/patcher_base_patch.c index 8e84847733e..57ee1d291a2 100644 --- a/opal/mca/patcher/base/patcher_base_patch.c +++ b/opal/mca/patcher/base/patcher_base_patch.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,6 +17,7 @@ #include "opal/mca/patcher/base/base.h" #include "opal/mca/patcher/patcher.h" +#include "opal/opal_portable_platform.h" #include "opal/prefetch.h" #include "opal/util/sys_limits.h" #include @@ -33,7 +36,7 @@ static void mca_patcher_base_patch_destruct(mca_patcher_base_patch_t *patch) OBJ_CLASS_INSTANCE(mca_patcher_base_patch_t, opal_list_item_t, mca_patcher_base_patch_construct, mca_patcher_base_patch_destruct); -#if defined(__PPC__) +#if defined(PLATFORM_ARCH_POWERPC) // PowerPC instructions used in patching // Reference: "PowerPC User Instruction Set Architecture" @@ -65,7 +68,7 @@ static unsigned int rldicr(unsigned int RT, unsigned int RS, unsigned int SH, un static int PatchLoadImm(uintptr_t addr, unsigned int reg, size_t value) { -# if defined(__PPC64__) +# if defined(PLATFORM_ARCH_64) *(unsigned int *) (addr + 0) = addis(reg, 0, (value >> 48)); *(unsigned int *) (addr + 4) = ori(reg, reg, (value >> 32)); *(unsigned int *) (addr + 8) = rldicr(reg, reg, 32, 31); @@ -84,7 +87,7 @@ static int PatchLoadImm(uintptr_t addr, unsigned int reg, size_t value) #if !HAVE___CLEAR_CACHE static void flush_and_invalidate_cache(unsigned long a) { -# if OPAL_ASSEMBLY_ARCH == OPAL_IA32 +# if defined(PLATFORM_ARCH_X86) static int have_clflush = -1; if (OPAL_UNLIKELY(-1 == have_clflush)) { @@ -107,9 +110,9 @@ static void flush_and_invalidate_cache(unsigned long a) /* does not work with AMD processors */ __asm__ volatile("mfence;clflush %0;mfence" : : "m"(*(char *) a)); } -# elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +# elif defined(PLATFORM_ARCH_X86_64) __asm__ volatile("mfence;clflush %0;mfence" : : "m"(*(char *) a)); -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +# elif defined(PLATFORM_ARCH_AARCH64) __asm__ volatile("dc cvau, %0\n\t" "dsb ish\n\t" "ic ivau, %0\n\t" @@ -128,7 +131,7 @@ static void ModifyMemoryProtection(uintptr_t addr, size_t length, int prot) length = bound - base; -#if defined(__PPC__) +#if defined(PLATFORM_ARCH_POWERPC) /* NTH: is a loop necessary here? */ do { if (mprotect((void *) base, page_size, prot)) @@ -154,7 +157,7 @@ static inline void apply_patch(unsigned char *patch_data, uintptr_t address, siz #else size_t offset_jump = 16; -# if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +# if defined(PLATFORM_ARCH_AARCH64) offset_jump = 32; # endif @@ -184,7 +187,7 @@ void mca_base_patcher_patch_apply_binary(mca_patcher_base_patch_t *patch) int mca_patcher_base_patch_hook(mca_patcher_base_module_t *module, uintptr_t hook_addr) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +#if defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) mca_patcher_base_patch_t *hook_patch; const unsigned int nop = 0x60000000; diff --git a/opal/mca/patcher/overwrite/patcher_overwrite_module.c b/opal/mca/patcher/overwrite/patcher_overwrite_module.c index bb6d2344a55..0b305d04181 100644 --- a/opal/mca/patcher/overwrite/patcher_overwrite_module.c +++ b/opal/mca/patcher/overwrite/patcher_overwrite_module.c @@ -5,6 +5,8 @@ * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +22,7 @@ #include "opal/prefetch.h" #include "opal/util/output.h" #include "opal/util/sys_limits.h" +#include "opal/opal_portable_platform.h" #include #include @@ -30,19 +33,19 @@ #include #include -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) { uintptr_t func_new_addr = patch->patch_value; { -# if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) +# if defined(PLATFORM_ARCH_32) patch->patch_data_size = 5; *(unsigned char *) (patch->patch_data + 0) = 0xe9; *(unsigned int *) (patch->patch_data + 1) = (unsigned int) (func_new_addr - patch->patch_orig - 5); -# elif (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +# elif defined(PLATFORM_ARCH_64) patch->patch_data_size = 13; *(unsigned short *) (patch->patch_data + 0) = 0xbb49; *(unsigned long *) (patch->patch_data + 2) = (unsigned long) func_new_addr; @@ -57,9 +60,9 @@ static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) return OPAL_SUCCESS; } -/* end of #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) */ +/* end of #if defined(__i386__) || defined(__x86_64__) */ // ------------------------------------------------- PPC equivalent: -#elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) || (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +#elif defined(PLATFORM_ARCH_POWERPC) // PowerPC instructions used in patching // Reference: "PowerPC User Instruction Set Architecture" @@ -91,7 +94,7 @@ static unsigned int rldicr(unsigned int RT, unsigned int RS, unsigned int SH, un static int PatchLoadImm(uintptr_t addr, unsigned int reg, size_t value) { -# if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +# if defined(PLATFORM_ARCH_64) *(unsigned int *) (addr + 0) = addis(reg, 0, (value >> 48)); *(unsigned int *) (addr + 4) = ori(reg, reg, (value >> 32)); *(unsigned int *) (addr + 8) = rldicr(reg, reg, 32, 31); @@ -115,7 +118,7 @@ static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) hook_addr = mca_patcher_base_addr_text(patch->patch_value); // Patch for hook function: -# if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +# if defined(PLATFORM_ARCH_64) rc = mca_patcher_base_patch_hook(&mca_patcher_overwrite_module, hook_addr); if (OPAL_SUCCESS != rc) { return rc; @@ -142,7 +145,7 @@ static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) return OPAL_SUCCESS; } -#elif defined(__aarch64__) +#elif defined(PLATFORM_ARCH_AARCH64) /** * @brief Generate a mov immediate instruction @@ -214,6 +217,78 @@ static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) #endif +/* + * The logic in this function for each platform is based on code from + * mca_patcher_overwrite_apply_patch(). There are 2 general approaches: + * 1: Directly check constant instructions (ignoring addresses as parameters) + * 2: Generate a bit mask by passing min and max values to underlying helper + * functions and negate the XOR'ed results. These results can be used to + * mask off transient values (like addresess) and non-instruction values + * (like register contents). Once the masks are applied, the results are + * compared against the min values directly to check for equality. If equal, + * we consider the memory to be previously patched. + */ +static bool mca_patcher_is_function_patched(unsigned char *target) +{ + +#if defined(PLATFORM_ARCH_X86) + return (*(unsigned char *)target == 0xe9); +#elif defined(PLATFORM_ARCH_X86_64) + return ( + (*(unsigned short*)(target + 0) == 0xbb49) && + (*(unsigned char* )(target +10) == 0x41 ) && + (*(unsigned char* )(target +11) == 0xff ) && + (*(unsigned char* )(target +12) == 0xe3 ) + ); +#elif defined(PLATFORM_ARCH_POWERPC) + const unsigned int gr_max = 0xF; //11 is used in our patching code, but is the max 4 or 5 bits? + const unsigned int addr_max = 0xFFFF; + unsigned int addis_base = addis( 0, 0, 0); + unsigned int addis_mask = ~(addis_base ^ addis( gr_max, 0, addr_max)); + unsigned int ori_base = ori( 0, 0, 0); + unsigned int ori_mask = ~( ori_base ^ ori( gr_max, gr_max, addr_max)); + unsigned int mtspr_base = mtspr( 9, 0); // 9 = CTR + unsigned int mtspr_mask = ~(mtspr_base ^ mtspr( 9, gr_max)); + unsigned int bcctr_base = bcctr(20, 0, 0); // 20 = always + unsigned int bcctr_mask = ~(bcctr_base ^ bcctr(20, 0, 0)); +#if defined(PLATFORM_ARCH_32) + + return ( + ((*(unsigned int *) (target + 0 )) & addis_mask) == addis_base && + ((*(unsigned int *) (target + 4 )) & ori_mask) == ori_base && + ((*(unsigned int *) (target + 8 )) & mtspr_mask) == mtspr_base && + ((*(unsigned int *) (target + 12)) & bcctr_mask) == bcctr_base + ); +#else + unsigned int rldicr_base = rldicr( 0, 0, 32, 31); + unsigned int rldicr_mask = ~(rldicr_base ^ rldicr( gr_max, gr_max, 32, 31)); + unsigned int oris_base = oris( 0, 0, 0); + unsigned int oris_mask = ~(oris_base ^ oris( gr_max, gr_max, addr_max)); + + return ( + ((*(unsigned int *) (target + 0 )) & addis_mask) == addis_base && + ((*(unsigned int *) (target + 4 )) & ori_mask) == ori_base && + ((*(unsigned int *) (target + 8 )) & rldicr_mask) == rldicr_base && + ((*(unsigned int *) (target + 12)) & oris_mask) == oris_base && + ((*(unsigned int *) (target + 16)) & ori_mask) == ori_base && + ((*(unsigned int *) (target + 20)) & mtspr_mask) == mtspr_base && + ((*(unsigned int *) (target + 24)) & bcctr_mask) == bcctr_base + ); +#endif +#elif defined(PLATFORM_ARCH_AARCH64) + uint32_t mov_mask=~((0xFFFF << 5) | 0x1F); + uint32_t br_mask=~(0x1F << 5); + + return ( + ((*(uint32_t *) (target + 0)) & mov_mask) == mov(0, 3, 0) && + ((*(uint32_t *) (target + 4)) & mov_mask) == movk(0, 2, 0) && + ((*(uint32_t *) (target + 8)) & mov_mask) == movk(0, 1, 0) && + ((*(uint32_t *) (target + 12)) & mov_mask) == movk(0, 0, 0) && + ((*(uint32_t *) (target + 16)) & br_mask) == br(0) + ); +#endif +} + static int mca_patcher_overwrite_patch_address(uintptr_t sys_addr, uintptr_t hook_addr) { mca_patcher_base_patch_t *patch; @@ -268,7 +343,13 @@ static int mca_patcher_overwrite_patch_symbol(const char *func_symbol_name, uint *func_old_addr = 0; } - return mca_patcher_overwrite_patch_address(old_addr, func_new_addr); + if (mca_patcher_is_function_patched((unsigned char*)old_addr)) { + opal_output_verbose(10, 0, "function %s is already patched; stopping further patching\n", + func_symbol_name); + return OPAL_ERR_RESOURCE_BUSY; + } else { + return mca_patcher_overwrite_patch_address(old_addr, func_new_addr); + } } mca_patcher_base_module_t mca_patcher_overwrite_module = { diff --git a/opal/mca/patcher/patcher.h b/opal/mca/patcher/patcher.h index 2ebf752e6d1..8779a583f77 100644 --- a/opal/mca/patcher/patcher.h +++ b/opal/mca/patcher/patcher.h @@ -2,6 +2,8 @@ /* * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,11 +19,12 @@ #include "opal/class/opal_list.h" #include "opal/mca/base/base.h" #include "opal/mca/mca.h" +#include "opal/opal_portable_platform.h" /* Any function being patched in as a hook must use SYMBOLPATCH_BEGIN at the top, * and SYMBOLPATCH_END before it returns (this is just for PPC). */ -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +#if defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) /* special processing for ppc64 to save and restore TOC (r2) * Reference: "64-bit PowerPC ELF Application Binary Interface Supplement 1.9" */ diff --git a/opal/mca/pmix/pmix-internal.h b/opal/mca/pmix/pmix-internal.h index 277a46ec751..4e10393f60f 100644 --- a/opal/mca/pmix/pmix-internal.h +++ b/opal/mca/pmix/pmix-internal.h @@ -5,7 +5,7 @@ * reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights + * Copyright (c) 2020-2021 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. * All Rights reserved. @@ -98,6 +98,7 @@ typedef struct { opal_pmix_condition_t cond; volatile bool active; int status; + size_t errhandler_ref; char *msg; } opal_pmix_lock_t; diff --git a/opal/mca/rcache/base/rcache_base_create.c b/opal/mca/rcache/base/rcache_base_create.c index 7f2ce5e5b01..cd7dc5b8336 100644 --- a/opal/mca/rcache/base/rcache_base_create.c +++ b/opal/mca/rcache/base/rcache_base_create.c @@ -37,6 +37,8 @@ #include "opal/memoryhooks/memory.h" #include "opal/runtime/opal_params.h" +static int use_safety_valve = 0; + mca_rcache_base_module_t * mca_rcache_base_module_create(const char *name, void *user_data, struct mca_rcache_base_resources_t *resources) @@ -70,6 +72,7 @@ mca_rcache_base_module_create(const char *name, void *user_data, opal_leave_pinned = !opal_leave_pinned_pipeline; } opal_mem_hooks_register_release(mca_rcache_base_mem_cb, NULL); + use_safety_valve = 1; } else if (1 == opal_leave_pinned || opal_leave_pinned_pipeline) { opal_show_help("help-rcache-base.txt", "leave pinned failed", true, name, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), opal_process_info.nodename); @@ -121,3 +124,10 @@ int mca_rcache_base_module_destroy(mca_rcache_base_module_t *module) return OPAL_ERR_NOT_FOUND; } + +static void safety_valve(void) __attribute__((destructor)); +void safety_valve(void) { + if (use_safety_valve) { + opal_mem_hooks_unregister_release(mca_rcache_base_mem_cb); + } +} diff --git a/opal/mca/rcache/base/rcache_base_mem_cb.c b/opal/mca/rcache/base/rcache_base_mem_cb.c index ed7393c3ed3..f77037fc43e 100644 --- a/opal/mca/rcache/base/rcache_base_mem_cb.c +++ b/opal/mca/rcache/base/rcache_base_mem_cb.c @@ -30,6 +30,7 @@ #endif #include "opal/runtime/opal_params.h" +#include "opal/util/output.h" #include "opal/util/proc.h" #include "opal/util/show_help.h" @@ -72,7 +73,7 @@ void mca_rcache_base_mem_cb(void *base, size_t size, void *cbdata, bool from_all "will now abort.\n", opal_process_info.nodename, getpid(), base, (unsigned long) size); msg[sizeof(msg) - 1] = '\0'; - write(2, msg, len); + opal_best_effort_write(2, msg, len); } else { opal_show_help("help-rcache-base.txt", "cannot deregister in-use memory", true, current->rcache_component->rcache_version.mca_component_name, diff --git a/opal/include/opal/sys/powerpc/update.sh b/opal/mca/smsc/Makefile.am similarity index 51% rename from opal/include/opal/sys/powerpc/update.sh rename to opal/mca/smsc/Makefile.am index 095868d4fb5..975eee7705f 100644 --- a/opal/include/opal/sys/powerpc/update.sh +++ b/opal/mca/smsc/Makefile.am @@ -1,4 +1,3 @@ -#!/bin/sh # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology @@ -10,6 +9,8 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2021 Google, LLC. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -17,23 +18,21 @@ # $HEADER$ # -CFILE=/tmp/opal_asm_$$.c +# main library setup +noinst_LTLIBRARIES = libmca_smsc.la +libmca_smsc_la_SOURCES = -trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 +# local files +headers = smsc.h +libmca_smsc_la_SOURCES += $(headers) -echo Updating asm.s from atomic.h and timer.h using gcc +# Conditionally install the header files +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/$(subdir) +nobase_opal_HEADERS = $(headers) +endif -cat > $CFILE< -#include -#define static -#define inline -#define OPAL_GCC_INLINE_ASSEMBLY 1 -#include "../architecture.h" -#include "atomic.h" -#include "timer.h" -EOF +include base/Makefile.am -gcc -O1 -mpowerpc64 -mcpu=970 -DOPAL_ASSEMBLY_ARCH=POWERPC32 -DOPAL_ASM_SUPPORT_64BIT=1 -I. -S $CFILE -o asm-32-64.s -gcc -O1 -DOPAL_ASSEMBLY_ARCH=OPAL_POWERPC32 -DOPAL_ASM_SUPPORT_64BIT=0 -I. -S $CFILE -o asm-32.s -gcc -m64 -O1 -finline-functions -DOPAL_ASSEMBLY_ARCH=OPAL_POWERPC64 -DOPAL_ASM_SUPPORT64BIT=1 -I. -S $CFILE -o asm-64.s +distclean-local: + rm -f base/static-components.h diff --git a/opal/include/opal/sys/x86_64/update.sh b/opal/mca/smsc/base/Makefile.am similarity index 67% rename from opal/include/opal/sys/x86_64/update.sh rename to opal/mca/smsc/base/Makefile.am index dbef4d61cd0..28a0678cd17 100644 --- a/opal/include/opal/sys/x86_64/update.sh +++ b/opal/mca/smsc/base/Makefile.am @@ -1,4 +1,3 @@ -#!/bin/sh # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology @@ -10,6 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2021 Google, LLC. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -17,20 +17,10 @@ # $HEADER$ # -CFILE=/tmp/opal_atomic_$$.c +AM_CPPFLAGS = ${smsc_@DIRECT_smsc@_CPPFLAGS} -trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 +headers += \ + base/base.h -echo Updating asm.s from atomic.h and timer.h using gcc - -cat > $CFILE< -#include -#define static -#define inline -#define OPAL_GCC_INLINE_ASSEMBLY 1 -#include "atomic.h" -#include "timer.h" -EOF - -gcc -O3 -I. -S $CFILE -o asm.s +libmca_smsc_la_SOURCES += \ + base/smsc_base_frame.c diff --git a/opal/mca/smsc/base/base.h b/opal/mca/smsc/base/base.h new file mode 100644 index 00000000000..430f855842f --- /dev/null +++ b/opal/mca/smsc/base/base.h @@ -0,0 +1,23 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_SMSC_BASE_BASE_H +#define OPAL_MCA_SMSC_BASE_BASE_H + +#include "opal/mca/smsc/smsc.h" + +extern mca_base_framework_t opal_smsc_base_framework; +extern mca_smsc_component_t *selected_component; +extern mca_smsc_module_t *selected_module; + +int mca_smsc_base_select(void); +void mca_smsc_base_register_default_params(mca_smsc_component_t *component, int default_priority); + +#endif /* OPAL_MCA_SMSC_BASE_BASE_H */ diff --git a/opal/mca/smsc/base/smsc_base_frame.c b/opal/mca/smsc/base/smsc_base_frame.c new file mode 100644 index 00000000000..db99074b0e0 --- /dev/null +++ b/opal/mca/smsc/base/smsc_base_frame.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2021 Google, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include +#include + +#include "opal/class/opal_list.h" +#include "opal/mca/base/base.h" +#include "opal/mca/mca.h" +#include "opal/mca/smsc/base/base.h" +#include "opal/mca/smsc/smsc.h" +#include "opal/util/printf.h" + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ +#include "opal/mca/smsc/base/static-components.h" + +mca_smsc_component_t *selected_component = NULL; +mca_smsc_module_t *mca_smsc = NULL; + +/* + * Global variables + */ +MCA_BASE_FRAMEWORK_DECLARE(opal, smsc, NULL, NULL, NULL, NULL, mca_smsc_base_static_components, 0); + +static int mca_smsc_compare_components(opal_list_item_t **a, opal_list_item_t **b) +{ + mca_smsc_component_t *componenta + = (mca_smsc_component_t *) ((mca_base_component_list_item_t *) *a)->cli_component; + mca_smsc_component_t *componentb + = (mca_smsc_component_t *) ((mca_base_component_list_item_t *) *b)->cli_component; + + return (componenta->priority > componentb->priority) + ? -1 + : ((componenta->priority < componentb->priority) ? 1 : 0); +} + +int mca_smsc_base_select(void) +{ + mca_base_component_list_item_t *cli, *next; + + OPAL_LIST_FOREACH_SAFE (cli, next, &opal_smsc_base_framework.framework_components, + mca_base_component_list_item_t) { + mca_smsc_component_t *component = (mca_smsc_component_t *) cli->cli_component; + + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output, + "mca_smsc_base_select: checking component %s", + component->smsc_version.mca_component_name); + + int ret = component->query(); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, + opal_smsc_base_framework.framework_output, + "mca_smsc_base_select: could not select component %s. query " + "returned error code %d", + component->smsc_version.mca_component_name, ret); + opal_list_remove_item(&opal_smsc_base_framework.framework_components, &cli->super); + OBJ_RELEASE(cli); + mca_base_component_close(&component->smsc_version, + opal_smsc_base_framework.framework_output); + continue; + } + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output, + "mca_smsc_base_select: component %s priority=%d", + component->smsc_version.mca_component_name, component->priority); + } + + opal_list_sort(&opal_smsc_base_framework.framework_components, mca_smsc_compare_components); + + if (opal_list_get_size(&opal_smsc_base_framework.framework_components) > 0) { + cli = (mca_base_component_list_item_t *) opal_list_get_first( + &opal_smsc_base_framework.framework_components); + + selected_component = (mca_smsc_component_t *) cli->cli_component; + mca_smsc = selected_component->enable(); + + opal_output_verbose( + MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output, + "mca_smsc_base_select: selected shared-memory single-copy component: %s", + selected_component->smsc_version.mca_component_name); + } else { + opal_output_verbose( + MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output, + "mca_smsc_base_select: no shared-memory single-copy component available for selection"); + } + + return OPAL_SUCCESS; +} + +void mca_smsc_base_register_default_params(mca_smsc_component_t *component, int default_priority) +{ + + char *tmp; + (void) opal_asprintf(&tmp, "Priority of the %s component (default: %d)", + component->smsc_version.mca_component_name, default_priority); + component->priority = default_priority; + (void) mca_base_component_var_register(&component->smsc_version, "priority", /*help_msg=*/tmp, + MCA_BASE_VAR_TYPE_INT, /*enumerator=*/NULL, /*bind=*/0, + MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_ALL_EQ, &component->priority); + free(tmp); +} diff --git a/opal/mca/smsc/cma/Makefile.am b/opal/mca/smsc/cma/Makefile.am new file mode 100644 index 00000000000..02b539f0dcd --- /dev/null +++ b/opal/mca/smsc/cma/Makefile.am @@ -0,0 +1,56 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2009 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2020-2021 Google, LLC. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST = post_configure.sh + +AM_CPPFLAGS = $(smsc_cma_CPPFLAGS) + +libmca_smsc_cma_la_sources = \ + smsc_cma_component.c \ + smsc_cma_module.c \ + smsc_cma_internal.h \ + smsc_cma.h + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_smsc_cma_DSO +component_noinst = +component_install = mca_smsc_cma.la +else +component_noinst = libmca_smsc_cma.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_smsc_cma_la_SOURCES = $(libmca_smsc_cma_la_sources) +mca_smsc_cma_la_LDFLAGS = -module -avoid-version $(smsc_cma_LDFLAGS) +mca_smsc_cma_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \ + $(smsc_cma_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_smsc_cma_la_SOURCES = $(libmca_smsc_cma_la_sources) +libmca_smsc_cma_la_LIBADD = $(smsc_cma_LIBS) +libmca_smsc_cma_la_LDFLAGS = -module -avoid-version $(smsc_cma_LDFLAGS) diff --git a/opal/mca/smsc/cma/configure.m4 b/opal/mca/smsc/cma/configure.m4 new file mode 100644 index 00000000000..fc8ff2a09f4 --- /dev/null +++ b/opal/mca/smsc/cma/configure.m4 @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2021 Google, LLC. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_smsc_cma_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_smsc_cma_CONFIG],[ + AC_CONFIG_FILES([opal/mca/smsc/cma/Makefile]) + + OPAL_CHECK_CMA([smsc_cma], [AC_CHECK_HEADER([sys/prctl.h]) $1], [$2]) + + AC_SUBST([smsc_cma_CFLAGS]) + AC_SUBST([smsc_cma_CPPFLAGS]) + AC_SUBST([smsc_cma_LDFLAGS]) + AC_SUBST([smsc_cma_LIBS]) +])dnl diff --git a/opal/mca/smsc/cma/post_configure.sh b/opal/mca/smsc/cma/post_configure.sh new file mode 100644 index 00000000000..3059fa936d4 --- /dev/null +++ b/opal/mca/smsc/cma/post_configure.sh @@ -0,0 +1 @@ +DIRECT_CALL_HEADER="opal/mca/smsc/cma/smsc_cma.h" diff --git a/opal/mca/smsc/cma/smsc_cma.h b/opal/mca/smsc/cma/smsc_cma.h new file mode 100644 index 00000000000..64aa9dd9497 --- /dev/null +++ b/opal/mca/smsc/cma/smsc_cma.h @@ -0,0 +1,33 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_SMSC_CMA_SMSC_CMA_H +#define OPAL_MCA_SMSC_CMA_SMSC_CMA_H + +#include "opal_config.h" + +#include "opal/mca/smsc/smsc.h" + +mca_smsc_endpoint_t *mca_smsc_cma_get_endpoint(opal_proc_t *peer_proc); +void mca_smsc_cma_return_endpoint(mca_smsc_endpoint_t *endpoint); + +int mca_smsc_cma_copy_to(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address, + size_t size, void *reg_handle); +int mca_smsc_cma_copy_from(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address, + size_t size, void *reg_handle); + +/* unsupported interfaces defined to support MCA direct */ +void *mca_smsc_cma_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t flags, + void *remote_address, size_t size, void **local_mapping); +void mca_smsc_cma_unmap_peer_region(void *ctx); +void *mca_smsc_cma_register_region(void *local_address, size_t size); +void mca_smsc_cma_deregister_region(void *reg_data); + +#endif /* OPAL_MCA_SMSC_CMA_SMSC_CMA_H */ diff --git a/opal/mca/smsc/cma/smsc_cma_component.c b/opal/mca/smsc/cma/smsc_cma_component.c new file mode 100644 index 00000000000..4ccd731ba24 --- /dev/null +++ b/opal/mca/smsc/cma/smsc_cma_component.c @@ -0,0 +1,147 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "opal_config.h" + +#include "opal/mca/smsc/base/base.h" +#include "opal/mca/smsc/cma/smsc_cma_internal.h" + +#include +#include +#include +#include +#include +#include + +static int mca_smsc_cma_component_register(void); +static int mca_smsc_cma_component_open(void); +static int mca_smsc_cma_component_close(void); +static int mca_smsc_cma_component_query(void); +static mca_smsc_module_t *mca_smsc_cma_component_enable(void); + +#define MCA_SMSC_CMA_DEFAULT_PRIORITY 37 +static const int mca_smsc_cma_default_priority = MCA_SMSC_CMA_DEFAULT_PRIORITY; + +mca_smsc_component_t mca_smsc_cma_component = { + .smsc_version = { + MCA_SMSC_DEFAULT_VERSION("cma"), + .mca_open_component = mca_smsc_cma_component_open, + .mca_close_component = mca_smsc_cma_component_close, + .mca_register_component_params = mca_smsc_cma_component_register, + }, + .priority = MCA_SMSC_CMA_DEFAULT_PRIORITY, + .query = mca_smsc_cma_component_query, + .enable = mca_smsc_cma_component_enable, +}; + +static int mca_smsc_cma_component_register(void) +{ + mca_smsc_base_register_default_params(&mca_smsc_cma_component, mca_smsc_cma_default_priority); + return OPAL_SUCCESS; +} + +static int mca_smsc_cma_component_open(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +static int mca_smsc_cma_component_close(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +/* + * mca_btl_sm_parse_proc_ns_user() tries to get the user namespace ID + * of the current process. + * Returns the ID of the user namespace. In the case of an error '0' is returned. + */ +ino_t mca_smsc_cma_get_user_ns_id(void) +{ + struct stat buf; + + if (0 > stat("/proc/self/ns/user", &buf)) { + /* + * Something went wrong, probably an old kernel that does not support namespaces + * simply assume all processes are in the same user namespace and return 0 + */ + return 0; + } + + return buf.st_ino; +} + +static int mca_smsc_cma_send_modex(void) +{ + mca_smsc_cma_modex_t modex; + + modex.pid = getpid(); + modex.user_ns_id = mca_smsc_cma_get_user_ns_id(); + + int rc; + OPAL_MODEX_SEND(rc, PMIX_LOCAL, &mca_smsc_cma_component.smsc_version, &modex, sizeof(modex)); + return rc; +} + +static int mca_smsc_cma_component_query(void) +{ + /* Check if we have the proper permissions for CMA */ + char buffer = '0'; + bool cma_happy = false; + + /* check system setting for current ptrace scope */ + int fd = open("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY); + if (0 <= fd) { + int ret = read(fd, &buffer, 1); + if (ret < 0) { + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, + opal_smsc_base_framework.framework_output, + "mca_smsc_cma_component_query: could not read ptrace_scope. " + "assuming ptrace scope is 0"); + } + close(fd); + } + + /* ptrace scope 0 will allow an attach from any of the process owner's + * processes. ptrace scope 1 limits attachers to the process tree + * starting at the parent of this process. */ + if ('0' != buffer) { +#if defined PR_SET_PTRACER + /* try setting the ptrace scope to allow attach */ + int ret = prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0); + if (0 == ret) { + cma_happy = true; + } +#endif + } else { + cma_happy = true; + } + + if (!cma_happy) { + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output, + "mca_smsc_cma_component_query: could not select for use. insufficient " + "ptrace permissions."); + mca_smsc_cma_component.priority = -1; + return OPAL_ERR_NOT_AVAILABLE; + } + + mca_smsc_cma_send_modex(); + + return OPAL_SUCCESS; +} + +static mca_smsc_module_t *mca_smsc_cma_component_enable(void) +{ + if (0 > mca_smsc_cma_component.priority) { + return NULL; + } + + return &mca_smsc_cma_module; +} diff --git a/opal/mca/smsc/cma/smsc_cma_internal.h b/opal/mca/smsc/cma/smsc_cma_internal.h new file mode 100644 index 00000000000..7de42b64a0c --- /dev/null +++ b/opal/mca/smsc/cma/smsc_cma_internal.h @@ -0,0 +1,37 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_SMSC_CMA_SMSC_CMA_INTERNAL_H +#define OPAL_MCA_SMSC_CMA_SMSC_CMA_INTERNAL_H + +#include "opal/mca/smsc/cma/smsc_cma.h" + +struct mca_smsc_cma_modex_t { + pid_t pid; + ino_t user_ns_id; +}; + +typedef struct mca_smsc_cma_modex_t mca_smsc_cma_modex_t; + +struct mca_smsc_cma_endpoint_t { + mca_smsc_endpoint_t super; + pid_t pid; +}; + +typedef struct mca_smsc_cma_endpoint_t mca_smsc_cma_endpoint_t; + +OBJ_CLASS_DECLARATION(mca_smsc_cma_endpoint_t); + +extern mca_smsc_module_t mca_smsc_cma_module; +extern mca_smsc_component_t mca_smsc_cma_component; + +ino_t mca_smsc_cma_get_user_ns_id(void); + +#endif /* OPAL_MCA_SMSC_CMA_SMSC_CMA_INTERNAL_H */ diff --git a/opal/mca/smsc/cma/smsc_cma_module.c b/opal/mca/smsc/cma/smsc_cma_module.c new file mode 100644 index 00000000000..4e536090152 --- /dev/null +++ b/opal/mca/smsc/cma/smsc_cma_module.c @@ -0,0 +1,213 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2010-2014 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "opal_config.h" + +#include "opal/mca/pmix/pmix-internal.h" +#include "opal/mca/smsc/base/base.h" +#include "opal/mca/smsc/cma/smsc_cma_internal.h" + +#if OPAL_CMA_NEED_SYSCALL_DEFS +# include "opal/sys/cma.h" +#else +# include +#endif /* OPAL_CMA_NEED_SYSCALL_DEFS */ + +OBJ_CLASS_INSTANCE(mca_smsc_cma_endpoint_t, opal_object_t, NULL, NULL); + +mca_smsc_endpoint_t *mca_smsc_cma_get_endpoint(opal_proc_t *peer_proc) +{ + mca_smsc_cma_endpoint_t *endpoint = OBJ_NEW(mca_smsc_cma_endpoint_t); + if (OPAL_UNLIKELY(NULL == endpoint)) { + return NULL; + } + + endpoint->super.proc = peer_proc; + + int rc; + size_t modex_size; + mca_smsc_cma_modex_t *modex; + OPAL_MODEX_RECV_IMMEDIATE(rc, &mca_smsc_cma_component.smsc_version, &peer_proc->proc_name, + (void **) &modex, &modex_size); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + OBJ_RELEASE(endpoint); + return NULL; + } + + ino_t my_ns_id = mca_smsc_cma_get_user_ns_id(); + if (modex->user_ns_id != my_ns_id) { + opal_output_verbose(MCA_BASE_VERBOSE_ERROR, opal_smsc_base_framework.framework_output, + "mca_smsc_cma_module_get_endpoint: can not proceed. processes are in " + "difference namespaces"); + /* can't use CMA with this peer */ + OBJ_RELEASE(endpoint); + free(modex); + return NULL; + } + + endpoint->pid = modex->pid; + return &endpoint->super; +} + +void mca_smsc_cma_return_endpoint(mca_smsc_endpoint_t *endpoint) +{ + OBJ_RELEASE(endpoint); +} + +static inline void mca_smsc_cma_iov_advance(struct iovec *iov, ssize_t length) +{ + iov->iov_base = (void *) ((uintptr_t) iov->iov_base + length); + iov->iov_len -= length; +} + +int mca_smsc_cma_copy_to(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address, + size_t size, void *reg_handle) +{ + /* ignore the registration handle as it is not used for CMA */ + (void) reg_handle; + + mca_smsc_cma_endpoint_t *cma_endpoint = (mca_smsc_cma_endpoint_t *) endpoint; + + /* + * According to the man page : + * "On success, process_vm_readv() returns the number of bytes read and + * process_vm_writev() returns the number of bytes written. This return + * value may be less than the total number of requested bytes, if a + * partial read/write occurred. (Partial transfers apply at the + * granularity of iovec elements. These system calls won't perform a + * partial transfer that splits a single iovec element.)". + * So since we use a single iovec element, the returned size should either + * be 0 or size, and the do loop should not be needed here. + * We tried on various Linux kernels with size > 2 GB, and surprisingly, + * the returned value is always 0x7ffff000 (fwiw, it happens to be the size + * of the larger number of pages that fits a signed 32 bits integer). + * We do not know whether this is a bug from the kernel, the libc or even + * the man page, but for the time being, we do as is process_vm_readv() could + * return any value. + */ + struct iovec src_iov = { + .iov_base = local_address, + .iov_len = size, + }; + struct iovec dst_iov = { + .iov_base = remote_address, + .iov_len = size, + }; + ssize_t ret; + do { + ret = process_vm_writev(cma_endpoint->pid, &src_iov, 1, &dst_iov, 1, 0); + if (0 > ret) { + if (ESRCH == errno) { + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_ERROR, + opal_smsc_base_framework.framework_output, + "CMA wrote %ld, expected %lu, errno = %d", (long) ret, + (unsigned long) size, errno)); + return OPAL_ERROR; + } + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_ERROR, opal_smsc_base_framework.framework_output, + "CMA wrote %ld, expected %lu, errno = %d", (long) ret, + (unsigned long) size, errno)); + return OPAL_ERROR; + } + mca_smsc_cma_iov_advance(&src_iov, ret); + mca_smsc_cma_iov_advance(&dst_iov, ret); + } while (0 < src_iov.iov_len); + + return OPAL_SUCCESS; +} + +int mca_smsc_cma_copy_from(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address, + size_t size, void *reg_handle) +{ + /* ignore the registration handle as it is not used for CMA */ + (void) reg_handle; + + mca_smsc_cma_endpoint_t *cma_endpoint = (mca_smsc_cma_endpoint_t *) endpoint; + + /* + * According to the man page : + * "On success, process_vm_readv() returns the number of bytes read and + * process_vm_writev() returns the number of bytes written. This return + * value may be less than the total number of requested bytes, if a + * partial read/write occurred. (Partial transfers apply at the + * granularity of iovec elements. These system calls won't perform a + * partial transfer that splits a single iovec element.)". + * So since we use a single iovec element, the returned size should either + * be 0 or size, and the do loop should not be needed here. + * We tried on various Linux kernels with size > 2 GB, and surprisingly, + * the returned value is always 0x7ffff000 (fwiw, it happens to be the size + * of the larger number of pages that fits a signed 32 bits integer). + * We do not know whether this is a bug from the kernel, the libc or even + * the man page, but for the time being, we do as is process_vm_readv() could + * return any value. + */ + struct iovec src_iov = { + .iov_base = remote_address, + .iov_len = size, + }; + struct iovec dst_iov = { + .iov_base = local_address, + .iov_len = size, + }; + ssize_t ret; + do { + ret = process_vm_readv(cma_endpoint->pid, &dst_iov, 1, &src_iov, 1, 0); + if (0 > ret) { + if (ESRCH == errno) { + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_ERROR, + opal_smsc_base_framework.framework_output, + "CMA read %ld, expected %lu, errno = %d", (long) ret, + (unsigned long) size, errno)); + return OPAL_ERROR; + } + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_ERROR, opal_smsc_base_framework.framework_output, + "CMA read %ld, expected %lu, errno = %d\n", (long) ret, + (unsigned long) size, errno)); + return OPAL_ERROR; + } + mca_smsc_cma_iov_advance(&src_iov, ret); + mca_smsc_cma_iov_advance(&dst_iov, ret); + } while (0 < src_iov.iov_len); + + return OPAL_SUCCESS; +} + +/* unsupported interfaces defined to support MCA direct */ +void *mca_smsc_cma_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t flags, + void *remote_address, size_t size, void **local_mapping) +{ + return NULL; +} + +void mca_smsc_cma_unmap_peer_region(void *ctx) +{ +} + +void *mca_smsc_cma_register_region(void *local_address, size_t size) +{ + return NULL; +} + +void mca_smsc_cma_deregister_region(void *reg_data) +{ +} + +mca_smsc_module_t mca_smsc_cma_module = { + .get_endpoint = mca_smsc_cma_get_endpoint, + .return_endpoint = mca_smsc_cma_return_endpoint, + .copy_to = mca_smsc_cma_copy_to, + .copy_from = mca_smsc_cma_copy_from, +}; diff --git a/opal/mca/smsc/configure.m4 b/opal/mca/smsc/configure.m4 new file mode 100644 index 00000000000..51544fedc11 --- /dev/null +++ b/opal/mca/smsc/configure.m4 @@ -0,0 +1,19 @@ +# -*- shell-script -*- +# +# Copyright (c) 2013 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2021 Google, LLC. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AC_DEFUN([MCA_opal_smsc_CONFIG],[ + # configure all the components + MCA_CONFIGURE_FRAMEWORK($1, $2, 1) + + # this is a direct callable component, so set that up. + MCA_SETUP_DIRECT_CALL($1, $2) +]) diff --git a/opal/mca/smsc/knem/Makefile.am b/opal/mca/smsc/knem/Makefile.am new file mode 100644 index 00000000000..7f53f45124b --- /dev/null +++ b/opal/mca/smsc/knem/Makefile.am @@ -0,0 +1,58 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2009 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2020-2021 Google, LLC. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST = post_configure.sh + +AM_CPPFLAGS = $(smsc_knem_CPPFLAGS) + +dist_opaldata_DATA = help-smsc-knem.txt + +libmca_smsc_knem_la_sources = \ + smsc_knem_component.c \ + smsc_knem_module.c \ + smsc_knem_internal.h \ + smsc_knem.h + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_smsc_knem_DSO +component_noinst = +component_install = mca_smsc_knem.la +else +component_noinst = libmca_smsc_knem.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_smsc_knem_la_SOURCES = $(libmca_smsc_knem_la_sources) +mca_smsc_knem_la_LDFLAGS = -module -avoid-version $(smsc_knem_LDFLAGS) +mca_smsc_knem_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \ + $(smsc_knem_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_smsc_knem_la_SOURCES = $(libmca_smsc_knem_la_sources) +libmca_smsc_knem_la_LIBADD = $(smsc_knem_LIBS) +libmca_smsc_knem_la_LDFLAGS = -module -avoid-version $(smsc_knem_LDFLAGS) diff --git a/opal/mca/smsc/knem/configure.m4 b/opal/mca/smsc/knem/configure.m4 new file mode 100644 index 00000000000..a6da2a69402 --- /dev/null +++ b/opal/mca/smsc/knem/configure.m4 @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2021 Google, LLC. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_smsc_knem_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_smsc_knem_CONFIG],[ + AC_CONFIG_FILES([opal/mca/smsc/knem/Makefile]) + + OPAL_CHECK_KNEM([smsc_knem], [$1], [$2]) + + AC_SUBST([smsc_knem_CFLAGS]) + AC_SUBST([smsc_knem_CPPFLAGS]) + AC_SUBST([smsc_knem_LDFLAGS]) + AC_SUBST([smsc_knem_LIBS]) +])dnl diff --git a/opal/mca/smsc/knem/help-smsc-knem.txt b/opal/mca/smsc/knem/help-smsc-knem.txt new file mode 100644 index 00000000000..6065cdd2192 --- /dev/null +++ b/opal/mca/smsc/knem/help-smsc-knem.txt @@ -0,0 +1,91 @@ +# -*- text -*- +# +# Copyright (c) 2004-2009 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2006-2022 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2014 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2014 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI's KNEM smsc support. +# +[knem permission denied] +WARNING: Open MPI failed to open the /dev/knem device due to a +permissions problem. Please check with your system administrator to +get the permissions fixed, or set the smsc MCA variable to "^knem" to +silence this warning and run without knem support. + + Local host: %s + /dev/knem permissions: 0%o +# +[knem fail open] +WARNING: Open MPI failed to open the /dev/knem device due to a local +error. Please check with your system administrator to get the problem +fixed, or set the smsc MCA variable to "^knem" to silence this warning +and run without knem support. + +Open MPI will try to fall back on another single-copy mechanism if one +is available. This may result in lower performance. + + Local host: %s + Errno: %d (%s) +# +[knem get ABI fail] +WARNING: Open MPI failed to retrieve the ABI version from the +/dev/knem device due to a local error. This usually indicates an +error in your knem installation; please check with your system +administrator, or set the smsc MCA variable to "^knem" to silence this +warning and run without knem support. + +Open MPI will try to fall back on another single-copy mechanism if one +is available. This may result in lower performance. + + Local host: %s + Errno: %d (%s) +# +[knem ABI mismatch] +WARNING: Open MPI was compiled with support for one version of the +knem kernel module, but it discovered a different version running in +/dev/knem. Open MPI needs to be installed with support for the same +version of knem as is in the running Linux kernel. Please check with +your system administrator, or set the smsc MCA variable to "^knem" to +silence this warning and run without knem support. + +Open MPI will try to fall back on another single-copy mechanism if one +is available. This may result in lower performance. + + Local host: %s + Open MPI's knem version: 0x%x + /dev/knem's version: 0x%x +# +[knem mmap fail] +Open MPI failed to map support from the knem Linux kernel module; this +shouldn't happen. Please check with your system administrator, or set +the smsc MCA variable to "^knem" to silence this warning and run +without knem support. + +Open MPI will try to fall back on another single-copy mechanism if one +is available. This may result in lower performance. + + Local host: %s + System call: mmap() + Errno: %d (%s) +# +[knem init error] +Open MPI encountered an error during the knem initialization. Please +check with your system administrator, or set the smsc MCA variable to +"^knem" to silence this warning and run without knem support. + +Open MPI will try to fall back on another single-copy mechanism if one +is available. This may result in lower performance. + + Local host: %s + System call: %s + Errno: %d (%s) diff --git a/opal/mca/smsc/knem/post_configure.sh b/opal/mca/smsc/knem/post_configure.sh new file mode 100644 index 00000000000..fd7a0eb1abe --- /dev/null +++ b/opal/mca/smsc/knem/post_configure.sh @@ -0,0 +1 @@ +DIRECT_CALL_HEADER="opal/mca/smsc/knem/smsc_knem.h" diff --git a/opal/mca/smsc/knem/smsc_knem.h b/opal/mca/smsc/knem/smsc_knem.h new file mode 100644 index 00000000000..824b7ad7b39 --- /dev/null +++ b/opal/mca/smsc/knem/smsc_knem.h @@ -0,0 +1,34 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_SMSC_KNEM_SMSC_KNEM_H +#define OPAL_MCA_SMSC_KNEM_SMSC_KNEM_H + +#include "opal_config.h" + +#include "opal/mca/smsc/smsc.h" + +mca_smsc_endpoint_t *mca_smsc_knem_get_endpoint(opal_proc_t *peer_proc); +void mca_smsc_knem_return_endpoint(mca_smsc_endpoint_t *endpoint); + +int mca_smsc_knem_copy_to(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address, + size_t size, void *reg_data); +int mca_smsc_knem_copy_from(mca_smsc_endpoint_t *endpoint, void *local_address, + void *remote_address, size_t size, void *reg_data); + +void *mca_smsc_knem_register_region(void *local_address, size_t size); +void mca_smsc_knem_deregister_region(void *reg_data); + +/* unsupported interfaces defined to support MCA direct */ +void *mca_smsc_knem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t flags, + void *remote_address, size_t size, void **local_mapping); +void mca_smsc_knem_unmap_peer_region(void *ctx); + +#endif /* OPAL_MCA_SMSC_KNEM_SMSC_KNEM_H */ diff --git a/opal/mca/smsc/knem/smsc_knem_component.c b/opal/mca/smsc/knem/smsc_knem_component.c new file mode 100644 index 00000000000..301fc7984db --- /dev/null +++ b/opal/mca/smsc/knem/smsc_knem_component.c @@ -0,0 +1,224 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/mca/smsc/base/base.h" +#include "opal/mca/smsc/knem/smsc_knem_internal.h" +#include "opal/util/show_help.h" + +#include +#include +#include +#include +#include +#include + +static int mca_smsc_knem_component_register(void); +static int mca_smsc_knem_component_open(void); +static int mca_smsc_knem_component_close(void); +static int mca_smsc_knem_component_query(void); +static mca_smsc_module_t *mca_smsc_knem_component_enable(void); + +static int mca_smsc_knem_fini(void); + +#define MCA_SMSC_KNEM_DEFAULT_PRIORITY 23 +static const int mca_smsc_knem_default_priority = MCA_SMSC_KNEM_DEFAULT_PRIORITY; + +mca_smsc_knem_component_t mca_smsc_knem_component = { + .super = { + .smsc_version = { + MCA_SMSC_DEFAULT_VERSION("knem"), + .mca_open_component = mca_smsc_knem_component_open, + .mca_close_component = mca_smsc_knem_component_close, + .mca_register_component_params = mca_smsc_knem_component_register, + }, + .priority = MCA_SMSC_KNEM_DEFAULT_PRIORITY, + .query = mca_smsc_knem_component_query, + .enable = mca_smsc_knem_component_enable, + }, +}; + +static int mca_smsc_knem_component_register(void) +{ + /* Currently disabling DMA mode by default; it's not clear that this is useful in all + * applications and architectures. */ + mca_smsc_knem_component.dma_min = 0; + (void) mca_base_component_var_register( + &mca_smsc_knem_component.super.smsc_version, "dma_min", + "Minimum message size (in bytes) to use the knem DMA mode; " + "ignored if knem does not support DMA mode (0 = do not use the " + "knem DMA mode, default: 0)", + MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, + &mca_smsc_knem_component.dma_min); + + mca_smsc_base_register_default_params(&mca_smsc_knem_component.super, + mca_smsc_knem_default_priority); + return OPAL_SUCCESS; +} + +static int mca_smsc_knem_component_open(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +static int mca_smsc_knem_component_close(void) +{ + return mca_smsc_knem_fini(); +} + +static int mca_smsc_knem_get_info(struct knem_cmd_info *knem_info) +{ + /* Only show the help message if this is the only component. */ + bool show_help = (1 == opal_list_get_size(&opal_smsc_base_framework.framework_components)); + + /* Check that the ABI if kernel module running is the same + * as what we were compiled against. */ + memset(knem_info, 0, sizeof(*knem_info)); + int rc = ioctl(mca_smsc_knem_component.knem_fd, KNEM_CMD_GET_INFO, knem_info); + if (rc < 0) { + if (show_help) { + opal_show_help("help-smsc-knem.txt", "knem get ABI fail", true, + opal_process_info.nodename, errno, strerror(errno)); + } + return OPAL_ERR_NOT_AVAILABLE; + } + + if (KNEM_ABI_VERSION != knem_info->abi) { + if (show_help) { + opal_show_help("help-smsc-knem.txt", "knem ABI mismatch", true, + opal_process_info.nodename, KNEM_ABI_VERSION, knem_info->abi); + } + return OPAL_ERR_NOT_AVAILABLE; + } + + return OPAL_SUCCESS; +} +static int mca_smsc_knem_reg(void *reg_data, void *base, size_t size, + mca_rcache_base_registration_t *reg) +{ + mca_smsc_knem_registration_handle_t *knem_reg = (mca_smsc_knem_registration_handle_t *) reg; + struct knem_cmd_create_region knem_cr; + struct knem_cmd_param_iovec knem_iov; + + knem_iov.base = (uintptr_t) base; + knem_iov.len = size; + + knem_cr.iovec_array = (uintptr_t) &knem_iov; + knem_cr.iovec_nr = 1; + knem_cr.protection = 0; + + if (reg->access_flags & (MCA_RCACHE_ACCESS_LOCAL_WRITE | MCA_RCACHE_ACCESS_REMOTE_WRITE)) { + knem_cr.protection |= PROT_WRITE; + } + + if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_READ) { + knem_cr.protection |= PROT_READ; + } + + /* We will explicitly destroy this cookie. Do not use the single-use flag here. */ + knem_cr.flags = 0; + if (OPAL_UNLIKELY(ioctl(mca_smsc_knem_component.knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) + < 0)) { + return OPAL_ERROR; + } + + knem_reg->data.cookie = knem_cr.cookie; + knem_reg->data.base_addr = (intptr_t) base; + + return OPAL_SUCCESS; +} + +static int mca_smsc_knem_dereg(void *reg_data, mca_rcache_base_registration_t *reg) +{ + mca_smsc_knem_registration_handle_t *knem_reg = (mca_smsc_knem_registration_handle_t *) reg; + + /* NTH: explicity ignore the return code. Don't care about this cookie anymore anyway. */ + (void) ioctl(mca_smsc_knem_component.knem_fd, KNEM_CMD_DESTROY_REGION, &knem_reg->data.cookie); + + return OPAL_SUCCESS; +} + +static int mca_smsc_knem_fini(void) +{ + if (-1 != mca_smsc_knem_component.knem_fd) { + close(mca_smsc_knem_component.knem_fd); + mca_smsc_knem_component.knem_fd = -1; + } + + if (mca_smsc_knem_module.rcache) { + (void) mca_rcache_base_module_destroy(mca_smsc_knem_module.rcache); + mca_smsc_knem_module.rcache = NULL; + } + + return OPAL_SUCCESS; +} + +static int mca_smsc_knem_component_query(void) +{ + struct knem_cmd_info knem_info; + int rc; + + /* Open the knem device. Try to print a helpful message if we + fail to open it. */ + mca_smsc_knem_component.knem_fd = open("/dev/knem", O_RDWR); + if (mca_smsc_knem_component.knem_fd < 0) { + if (EACCES == errno) { + struct stat sbuf; + if (0 != stat("/dev/knem", &sbuf)) { + sbuf.st_mode = 0; + } + opal_show_help("help-smsc-knem.txt", "knem permission denied", true, + opal_process_info.nodename, sbuf.st_mode); + } else { + opal_show_help("help-smsc-knem.txt", "knem fail open", true, opal_process_info.nodename, + errno, strerror(errno)); + } + + return OPAL_ERR_NOT_AVAILABLE; + } + + rc = mca_smsc_knem_get_info(&knem_info); + if (OPAL_SUCCESS != rc) { + mca_smsc_knem_fini(); + return rc; + } + + if (!(mca_smsc_knem_component.dma_min && (knem_info.features & KNEM_FEATURE_DMA))) { + /* disable DMA */ + mca_smsc_knem_component.dma_min = UINT_MAX; + } + + return OPAL_SUCCESS; +} + +static mca_smsc_module_t *mca_smsc_knem_component_enable(void) +{ + if (0 > mca_smsc_knem_component.super.priority) { + return NULL; + } + + mca_rcache_base_resources_t rcache_resources = {.cache_name = "smsc_knem", + .reg_data = NULL, + .sizeof_reg = sizeof( + mca_smsc_knem_registration_handle_t), + .register_mem = mca_smsc_knem_reg, + .deregister_mem = mca_smsc_knem_dereg}; + + mca_smsc_knem_module.rcache = mca_rcache_base_module_create("grdma", NULL, &rcache_resources); + if (NULL == mca_smsc_knem_module.rcache) { + return NULL; + } + + return &mca_smsc_knem_module.super; +} diff --git a/opal/mca/smsc/knem/smsc_knem_internal.h b/opal/mca/smsc/knem/smsc_knem_internal.h new file mode 100644 index 00000000000..60a8ddbf676 --- /dev/null +++ b/opal/mca/smsc/knem/smsc_knem_internal.h @@ -0,0 +1,74 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_SMSC_KNEM_SMSC_KNEM_INTERNAL_H +#define OPAL_MCA_SMSC_KNEM_SMSC_KNEM_INTERNAL_H + +#include "opal_config.h" + +#include "opal/mca/rcache/base/base.h" +#include "opal/mca/rcache/rcache.h" +#include "opal/mca/smsc/knem/smsc_knem.h" + +#include + +#include + +struct mca_smsc_knem_registration_data_t { + uint64_t cookie; + intptr_t base_addr; +}; + +typedef struct mca_smsc_knem_registration_data_t mca_smsc_knem_registration_data_t; + +struct mca_smsc_knem_registration_handle_t { + mca_rcache_base_registration_t base; + mca_smsc_knem_registration_data_t data; +}; + +typedef struct mca_smsc_knem_registration_handle_t mca_smsc_knem_registration_handle_t; + +#define MCA_SMSC_KNEM_REG_HANDLE_TO_DATA(handle) (&(handle)->data) +#define MCA_SMSC_KNEM_REG_DATA_TO_HANDLE(data_ptr) \ + ((mca_smsc_knem_registration_handle_t *) ((uintptr_t) data_ptr \ + - offsetof(mca_smsc_knem_registration_handle_t, \ + data))) + +struct mca_smsc_knem_endpoint_t { + mca_smsc_endpoint_t super; +}; + +typedef struct mca_smsc_knem_endpoint_t mca_smsc_knem_endpoint_t; + +OBJ_CLASS_DECLARATION(mca_smsc_knem_endpoint_t); + +struct mca_smsc_knem_component_t { + mca_smsc_component_t super; + + int knem_fd; + unsigned int dma_min; +}; + +typedef struct mca_smsc_knem_component_t mca_smsc_knem_component_t; + +struct mca_smsc_knem_module_t { + mca_smsc_module_t super; + + /** cache of knem attachments. this cache holds attachments for all peers. the registrations + * are differentiated by the alloc_base which is set to the endpoint. */ + mca_rcache_base_module_t *rcache; +}; + +typedef struct mca_smsc_knem_module_t mca_smsc_knem_module_t; + +extern mca_smsc_knem_module_t mca_smsc_knem_module; +extern mca_smsc_knem_component_t mca_smsc_knem_component; + +#endif /* OPAL_MCA_SMSC_KNEM_SMSC_KNEM_INTERNAL_H */ diff --git a/opal/mca/smsc/knem/smsc_knem_module.c b/opal/mca/smsc/knem/smsc_knem_module.c new file mode 100644 index 00000000000..745ca014193 --- /dev/null +++ b/opal/mca/smsc/knem/smsc_knem_module.c @@ -0,0 +1,158 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2020-2021 Google, LLC. All rights reserved. + * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/include/opal/align.h" +#include "opal/mca/memchecker/base/base.h" +#include "opal/mca/pmix/pmix-internal.h" +#include "opal/mca/rcache/rcache.h" +#include "opal/mca/smsc/base/base.h" +#include "opal/mca/smsc/knem/smsc_knem_internal.h" +#include "opal/util/minmax.h" + +OBJ_CLASS_INSTANCE(mca_smsc_knem_endpoint_t, opal_object_t, NULL, NULL); + +mca_smsc_endpoint_t *mca_smsc_knem_get_endpoint(opal_proc_t *peer_proc) +{ + mca_smsc_knem_endpoint_t *endpoint = OBJ_NEW(mca_smsc_knem_endpoint_t); + if (OPAL_UNLIKELY(NULL == endpoint)) { + return NULL; + } + + endpoint->super.proc = peer_proc; + return &endpoint->super; +} + +void mca_smsc_knem_return_endpoint(mca_smsc_endpoint_t *endpoint) +{ + OBJ_RELEASE(endpoint); +} + +void *mca_smsc_knem_register_region(void *local_address, size_t size) +{ + mca_smsc_knem_module_t *knem_module = &mca_smsc_knem_module; + mca_smsc_knem_registration_handle_t *reg = NULL; + int rc; + + rc = knem_module->rcache->rcache_register(knem_module->rcache, local_address, size, + /*flags=*/0, MCA_RCACHE_ACCESS_ANY, + (mca_rcache_base_registration_t **) ®); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + opal_output_verbose( + MCA_BASE_VERBOSE_WARN, opal_smsc_base_framework.framework_output, + "mca_smsc_knem_register_mem: failed to register memory for single-copy"); + return NULL; + } + + return MCA_SMSC_KNEM_REG_HANDLE_TO_DATA(reg); +} + +void mca_smsc_knem_deregister_region(void *reg_data) +{ + mca_smsc_knem_module_t *knem_module = &mca_smsc_knem_module; + mca_smsc_knem_registration_handle_t *reg = MCA_SMSC_KNEM_REG_DATA_TO_HANDLE(reg_data); + + knem_module->rcache->rcache_deregister(knem_module->rcache, ®->base); +} + +static int mca_smsc_knem_module_copy(mca_smsc_endpoint_t *endpoint, void *local_address, + void *remote_address, size_t size, void *reg_data, + bool is_write) +{ + if (OPAL_UNLIKELY(NULL == reg_data)) { + return OPAL_ERR_BAD_PARAM; + } + + struct knem_cmd_param_iovec send_iovec = { + .base = (uintptr_t) local_address, + .len = size, + }; + mca_smsc_knem_registration_data_t *reg = (mca_smsc_knem_registration_data_t *) reg_data; + /* Fill in the ioctl data fields. There's no async completion, so + we don't need to worry about getting a slot, etc. */ + struct knem_cmd_inline_copy icopy = { + .local_iovec_array = (uintptr_t) &send_iovec, + .local_iovec_nr = 1, + .remote_cookie = reg->cookie, + .remote_offset = (uintptr_t) remote_address - reg->base_addr, + .write = is_write, + .flags = 0, + }; + + /* Use the DMA flag if knem supports it *and* the segment length + * is greater than the cutoff. Not that if DMA is not supported + * or the user specified 0 for knem_dma_min the knem_dma_min was + * set to UINT_MAX in mca_smsc_knem_query. */ + if (mca_smsc_knem_component.dma_min <= size) { + icopy.flags = KNEM_FLAG_DMA; + } + /* synchronous flags only, no need to specify icopy.async_status_index */ + + /* When the ioctl returns, the transfer is done and we can invoke + the btl callback and return the frag */ + if (OPAL_UNLIKELY(0 != ioctl(mca_smsc_knem_component.knem_fd, KNEM_CMD_INLINE_COPY, &icopy))) { + opal_output_verbose(MCA_BASE_VERBOSE_WARN, opal_smsc_base_framework.framework_output, + "mca_smsc_knem_module_copy: failed to intiate transfer"); + return OPAL_ERROR; + } + + if (KNEM_STATUS_FAILED == icopy.current_status) { + opal_output_verbose(MCA_BASE_VERBOSE_WARN, opal_smsc_base_framework.framework_output, + "mca_smsc_knem_module_copy: transfter failed"); + return OPAL_ERROR; + } + + return OPAL_SUCCESS; +} + +int mca_smsc_knem_copy_to(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address, + size_t size, void *reg_data) +{ + return mca_smsc_knem_module_copy(endpoint, local_address, remote_address, size, reg_data, + /*is_write=*/true); +} + +int mca_smsc_knem_copy_from(mca_smsc_endpoint_t *endpoint, void *local_address, + void *remote_address, size_t size, void *reg_data) +{ + return mca_smsc_knem_module_copy(endpoint, local_address, remote_address, size, reg_data, + /*is_write=*/false); +} + +/* unsupported interfaces (for MCA direct) */ +void *mca_smsc_knem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t flags, + void *remote_address, size_t size, void **local_mapping) +{ + return NULL; +} + +void mca_smsc_knem_unmap_peer_region(void *ctx) +{ +} + +mca_smsc_knem_module_t mca_smsc_knem_module = { + .super = { + .features = MCA_SMSC_FEATURE_REQUIRE_REGISTATION, + .registration_data_size = sizeof(mca_smsc_knem_registration_data_t), + .get_endpoint = mca_smsc_knem_get_endpoint, + .return_endpoint = mca_smsc_knem_return_endpoint, + .copy_to = mca_smsc_knem_copy_to, + .copy_from = mca_smsc_knem_copy_from, + .register_region = mca_smsc_knem_register_region, + .deregister_region = mca_smsc_knem_deregister_region, + }, +}; diff --git a/opal/mca/smsc/smsc.h b/opal/mca/smsc/smsc.h new file mode 100644 index 00000000000..6d8523b253b --- /dev/null +++ b/opal/mca/smsc/smsc.h @@ -0,0 +1,258 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Shared Memory Single Copy + * + * This framework provides support for copying memory from one process to another on the same host + * system. The components expose process read, write, and may provide a way to map peer memory into + * this processes memory space. + */ + +#ifndef OPAL_MCA_SMSC_H +#define OPAL_MCA_SMSC_H + +#include "opal_config.h" +#include "opal/class/opal_object.h" +#include "opal/util/proc.h" + +#define MCA_SMSC_BASE_MAJOR_VERSION 1 +#define MCA_SMSC_BASE_MINOR_VERSION 0 +#define MCA_SMSC_BASE_PATCH_VERSION 0 + +struct mca_smsc_module_t; + +struct mca_smsc_endpoint_t { + opal_object_t super; + /** Opal proc object for this peer. */ + opal_proc_t *proc; +}; + +typedef struct mca_smsc_endpoint_t mca_smsc_endpoint_t; + +OBJ_CLASS_DECLARATION(mca_smsc_endpoint_t); + +/** + * @brief Get an endpoint for a peer proc. + * + * @param(in) module shared-memory single-copy module + * @param(in) peer_proc proc to get an endpoint for + */ +typedef mca_smsc_endpoint_t *(*mca_smsc_module_get_endpoint_fn_t)(opal_proc_t *peer_proc); + +/** + * @brief Return a shared-memory single-copy endpoint. + * + * @param(in) module shared-memory single-copy module + * @param(in) endpoint shared-memory single-copy endpoint + * + * This method returns an endpoint created by get_endpoint. The endpoint should be considered + * invalid and may be freed after this call completes. + */ +typedef void (*mca_smsc_module_return_endpoint_fn_t)(mca_smsc_endpoint_t *endpoint); + +/** + * @brief Copy to/from a peer process. + * + * @param(in) module shared-memory single-copy module + * @param(in) endpoint shared-memory single-copy endpoint + * @param(in) local_address local address to use + * @param(in) remote_address remote address to use + * @param(in) size amount to copy + * @param(in) reg_data pointer to memory containing registration data (if required) + * + * A module must provide both copy_from and copy_to function. + */ +typedef int (*mca_smsc_module_copy_fn_t)(mca_smsc_endpoint_t *endpoint, void *local_address, + void *remote_address, size_t size, void *reg_data); + +/** + * @brief Map a peer's memory onto local memory. + * + * @param(in) module shared-memory single-copy module + * @param(in) endpoint shared-memory single-copy endpoint + * @param(in) flags flags for this map operation (set to 0) + * @param(in) remote_address pointer valid in peer's address space + * @param(in) size size of region to map + * @param(out) local_mapping local address for peer region + * + * @returns a reference to the mapping + * + * This method, if provided, provides support for mapping a local peer's memory into this address + * space. The caller is responsible for verifying that the address is valid or access to the region + * may result in an access violation (SEGV). The function returns a reference (if needed) that can + * be used to clear the mapping. It is the caller's responsibility to unmap the region using the + * returned context. + */ +typedef void *(*mca_smsc_module_map_peer_region_fn_t)(mca_smsc_endpoint_t *endpoint, uint64_t flags, + void *remote_address, size_t size, + void **local_mapping); + +/** + * @brief Clear a memory mapping. + * + * @param(in) module shared-memory single-copy module + * @param(in) ctx memory mapping context + */ +typedef void (*mca_smsc_module_unmap_peer_region_fn_t)(void *ctx); + +/** + * @brief Register a memory region for remote access. + * + * @param(in) module shared-memory single-copy module + * @param(in) local_address local address to register (ideally page-aligned) + * @param(in) size size of the memory region (ideally page-aligned) + * + * @returns a pointer to registration data that can be used for copy by a peer process + * + * This method registers a region for access by a local peer. The returned data can be passed to a + * local peer and used by that peer for either copy_to or copy_from. + */ +typedef void *(*mca_smsc_module_register_region_fn_t)(void *local_address, size_t size); + +/** + * @brief Deregister a registered region. + * + * @param(in) module shared-memory single-copy module + * @param(in) reg_data registration data returned by the registration function + * + * This function deregisters a region from use by a peers copy_from and copy_to function. Once a + * region has been deregistered the data is immediately not usable by any local peer. + */ +typedef void (*mca_smsc_module_deregister_region_fn_t)(void *reg_data); + +enum { + /** Module requires the local registration of any region that will be used for single-copy + * operations. It is theresponsibility of the caller to pass this data with the pointer to the + * peer. */ + MCA_SMSC_FEATURE_REQUIRE_REGISTATION = 1, + /** Module can map peer memory into the local processes' address space. */ + MCA_SMSC_FEATURE_CAN_MAP = 2, +}; + +struct mca_smsc_module_t { + /** Module features. */ + uint64_t features; + + /** Ignore if MCA_SMSC_FEATURE_REQUIRES_REGISTRATION is not set. */ + size_t registration_data_size; + + /** Get an endpoint for a peer. This function should always return a newly-allocated endpoint. + * The base will be responsible for caching that endpoint. */ + mca_smsc_module_get_endpoint_fn_t get_endpoint; + /** Delete an endpoint and clean up all resources associated with it. */ + mca_smsc_module_return_endpoint_fn_t return_endpoint; + + /* All components must provide an implementation of the copy functions. */ + /** Copy data into a peer's memory space. */ + mca_smsc_module_copy_fn_t copy_to; + /** Copy data from a peer's memory space. */ + mca_smsc_module_copy_fn_t copy_from; + + /* Defined if MCA_SMSC_FEATURE_CAN_MAP is set. */ + /** Map a peer memory region into this processes address space. The module is allowed to cache + * the mapping and return it in subsequent calls. */ + mca_smsc_module_map_peer_region_fn_t map_peer_region; + /** Delete a mapping. This is allowed to leave the mappping in place. */ + mca_smsc_module_unmap_peer_region_fn_t unmap_peer_region; + + /* Defined if MCA_SMSC_FEATURE_REQUIRES_REGISTRATION is set. */ + /** Register a memory region for use with single-copy by a remote peer. The module may cache + * this registration for future use. */ + mca_smsc_module_register_region_fn_t register_region; + /** Deregister a memory region for use with single-copy. */ + mca_smsc_module_deregister_region_fn_t deregister_region; +}; + +typedef struct mca_smsc_module_t mca_smsc_module_t; + +/** + * @brief Query if this component can run. + * + * @returns OPAL_SUCCESS if the component can run or an opal error code otherwise + * + * This function is responsible for verifying the component can run. It should do the minimum amount + * of work to run at any time during execution. This includes sending any modex message if needed. + * It should refrain from allocating resources if possible. + */ +typedef int (*mca_smsc_component_query_fn_t)(void); + +/** + * @brief Enable the use of this component and return a module. + * + * @returns A module on success or NULL otherwise. + * + * This function should do any remaining work (not already done in query) to prepare the component + * for use. It should return a fully initialized module. + */ +typedef mca_smsc_module_t *(*mca_smsc_component_enable_fn_t)(void); + +struct mca_smsc_component_1_0_0_t { + mca_base_component_t smsc_version; + mca_base_component_data_t smsc_data; + + /** Priority of this component. Only the winning component will be used. */ + int priority; + + /** Check if this component can be used. */ + mca_smsc_component_query_fn_t query; + /** Enable the use of this component. */ + mca_smsc_component_enable_fn_t enable; +}; + +typedef struct mca_smsc_component_1_0_0_t mca_smsc_component_1_0_0_t; +typedef mca_smsc_component_1_0_0_t mca_smsc_component_t; + +OPAL_DECLSPEC extern mca_smsc_module_t *mca_smsc; + +#if MCA_opal_smsc_DIRECT_CALL +# include MCA_opal_smsc_DIRECT_CALL_HEADER + +# define MCA_SMSC_CALL_STAMP(a, b, ...) mca_smsc_##a##_##b(__VA_ARGS__) +# define MCA_SMSC_CALL_EXPANDER(a, b, ...) MCA_SMSC_CALL_STAMP(a, b, __VA_ARGS__) +# define MCA_SMSC_CALL(a, ...) \ + MCA_SMSC_CALL_EXPANDER(MCA_opal_smsc_DIRECT_CALL_COMPONENT, a, __VA_ARGS__) + +#else + +# define MCA_SMSC_CALL(a, ...) mca_smsc->a(__VA_ARGS__) + +#endif /* MCA_opal_smsc_DIRECT_CALL */ + +/** + * @brief Check if the selected component has a feature. + * + * @param(in) feature feature to check for (see smsc.h for list of features) + */ +static inline bool mca_smsc_base_has_feature(uint64_t feature) +{ + return (NULL != mca_smsc) && !!(mca_smsc->features & feature); +} + +static inline ssize_t mca_smsc_base_registration_data_size(void) +{ + if (NULL == mca_smsc || !mca_smsc_base_has_feature(MCA_SMSC_FEATURE_REQUIRE_REGISTATION)) { + return OPAL_ERR_NOT_AVAILABLE; + } + + return mca_smsc->registration_data_size; +} + +#define MCA_SMSC_BASE_VERSION_1_0_0 \ + OPAL_MCA_BASE_VERSION_2_1_0("smsc", MCA_SMSC_BASE_MAJOR_VERSION, MCA_SMSC_BASE_MINOR_VERSION, \ + MCA_SMSC_BASE_PATCH_VERSION) + +#define MCA_SMSC_DEFAULT_VERSION(name) \ + MCA_SMSC_BASE_VERSION_1_0_0, .mca_component_name = name, \ + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, \ + OPAL_MINOR_VERSION, OPAL_RELEASE_VERSION) + +#endif /* OPAL_MCA_SMSC_H */ diff --git a/opal/mca/smsc/xpmem/Makefile.am b/opal/mca/smsc/xpmem/Makefile.am new file mode 100644 index 00000000000..1944d3ef24e --- /dev/null +++ b/opal/mca/smsc/xpmem/Makefile.am @@ -0,0 +1,56 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2009 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2020-2021 Google, LLC. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST = post_configure.sh + +AM_CPPFLAGS = $(smsc_xpmem_CPPFLAGS) + +libmca_smsc_xpmem_la_sources = \ + smsc_xpmem_component.c \ + smsc_xpmem_module.c \ + smsc_xpmem_internal.h \ + smsc_xpmem.h + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_smsc_xpmem_DSO +component_noinst = +component_install = mca_smsc_xpmem.la +else +component_noinst = libmca_smsc_xpmem.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_smsc_xpmem_la_SOURCES = $(libmca_smsc_xpmem_la_sources) +mca_smsc_xpmem_la_LDFLAGS = -module -avoid-version $(smsc_xpmem_LDFLAGS) +mca_smsc_xpmem_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \ + $(smsc_xpmem_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_smsc_xpmem_la_SOURCES = $(libmca_smsc_xpmem_la_sources) +libmca_smsc_xpmem_la_LIBADD = $(smsc_xpmem_LIBS) +libmca_smsc_xpmem_la_LDFLAGS = -module -avoid-version $(smsc_xpmem_LDFLAGS) diff --git a/opal/mca/smsc/xpmem/configure.m4 b/opal/mca/smsc/xpmem/configure.m4 new file mode 100644 index 00000000000..648b514724e --- /dev/null +++ b/opal/mca/smsc/xpmem/configure.m4 @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2021 Google, LLC. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_smsc_xpmem_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_smsc_xpmem_CONFIG],[ + AC_CONFIG_FILES([opal/mca/smsc/xpmem/Makefile]) + + OPAL_CHECK_XPMEM([smsc_xpmem], [$1], [$2]) + + AC_SUBST([smsc_xpmem_CFLAGS]) + AC_SUBST([smsc_xpmem_CPPFLAGS]) + AC_SUBST([smsc_xpmem_LDFLAGS]) + AC_SUBST([smsc_xpmem_LIBS]) +])dnl diff --git a/opal/mca/smsc/xpmem/post_configure.sh b/opal/mca/smsc/xpmem/post_configure.sh new file mode 100644 index 00000000000..a4d999a1161 --- /dev/null +++ b/opal/mca/smsc/xpmem/post_configure.sh @@ -0,0 +1 @@ +DIRECT_CALL_HEADER="opal/mca/smsc/xpmem/smsc_xpmem.h" diff --git a/opal/mca/smsc/xpmem/smsc_xpmem.h b/opal/mca/smsc/xpmem/smsc_xpmem.h new file mode 100644 index 00000000000..ae8d33efbb3 --- /dev/null +++ b/opal/mca/smsc/xpmem/smsc_xpmem.h @@ -0,0 +1,43 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_H +#define OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_H + +#include "opal_config.h" + +#include "opal/mca/smsc/smsc.h" + +mca_smsc_endpoint_t *mca_smsc_xpmem_get_endpoint(opal_proc_t *peer_proc); +void mca_smsc_xpmem_return_endpoint(mca_smsc_endpoint_t *endpoint); + +int mca_smsc_xpmem_copy_to(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address, + size_t size, void *reg_handle); + +int mca_smsc_xpmem_copy_from(mca_smsc_endpoint_t *endpoint, void *local_address, + void *remote_address, size_t size, void *reg_handle); + +/** + * @brief Map a peer memory region into this processes address space. + * + * See the description in smsc.h. + * + * Caveats: XPMEM does not support futex operations within the region. Attempts to wake the + * process owning the mutex will result in an EFAULT error code. + */ +void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t flags, + void *remote_ptr, size_t size, void **local_ptr); +void mca_smsc_xpmem_unmap_peer_region(void *ctx); + +/* unsupported interfaces defined to support MCA direct */ +void *mca_smsc_xpmem_register_region(void *local_address, size_t size); +void mca_smsc_xpmem_deregister_region(void *reg_data); + +#endif /* OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_H */ diff --git a/opal/mca/smsc/xpmem/smsc_xpmem_component.c b/opal/mca/smsc/xpmem/smsc_xpmem_component.c new file mode 100644 index 00000000000..9ebced1cb76 --- /dev/null +++ b/opal/mca/smsc/xpmem/smsc_xpmem_component.c @@ -0,0 +1,167 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "opal_config.h" + +#include "opal/mca/smsc/base/base.h" +#include "opal/mca/smsc/xpmem/smsc_xpmem_internal.h" +#include "opal/util/minmax.h" + +#include +#include +#include +#include +#include +#include + +static int mca_smsc_xpmem_component_register(void); +static int mca_smsc_xpmem_component_open(void); +static int mca_smsc_xpmem_component_close(void); +static int mca_smsc_xpmem_component_query(void); +static mca_smsc_module_t *mca_smsc_xpmem_component_enable(void); + +#define MCA_SMSC_XPMEM_DEFAULT_PRIORITY 42 +static const int mca_smsc_xpmem_default_priority = MCA_SMSC_XPMEM_DEFAULT_PRIORITY; + +mca_smsc_xpmem_component_t mca_smsc_xpmem_component = { + .super = { + .smsc_version = { + MCA_SMSC_DEFAULT_VERSION("xpmem"), + .mca_open_component = mca_smsc_xpmem_component_open, + .mca_close_component = mca_smsc_xpmem_component_close, + .mca_register_component_params = mca_smsc_xpmem_component_register, + }, + .priority = MCA_SMSC_XPMEM_DEFAULT_PRIORITY, + .query = mca_smsc_xpmem_component_query, + .enable = mca_smsc_xpmem_component_enable, + }, +}; + +static int mca_smsc_xpmem_component_register(void) +{ + mca_smsc_xpmem_component.log_attach_align = 23; + (void) mca_base_component_var_register(&mca_smsc_xpmem_component.super.smsc_version, + "log_align", + "Log base 2 of the alignment to use for xpmem " + "segments (default: 23, minimum: 12, maximum: 25)", + MCA_BASE_VAR_TYPE_INT, /*enumerator=*/NULL, /*bind=*/0, + MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_LOCAL, + &mca_smsc_xpmem_component.log_attach_align); + + mca_smsc_xpmem_component.memcpy_chunk_size = 262144; + (void) mca_base_component_var_register( + &mca_smsc_xpmem_component.super.smsc_version, "memcpy_chunk_size", + "Maximum size to copy with a single call to memcpy. On some systems a smaller or larger " + "number may provide better performance (default: 256k)", + MCA_BASE_VAR_TYPE_UINT64_T, /*enumerator=*/NULL, /*bind=*/0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_smsc_xpmem_component.memcpy_chunk_size); + + mca_smsc_base_register_default_params(&mca_smsc_xpmem_component.super, + mca_smsc_xpmem_default_priority); + return OPAL_SUCCESS; +} + +static int mca_smsc_xpmem_component_open(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +static int mca_smsc_xpmem_component_close(void) +{ + if (mca_smsc_xpmem_module.vma_module) { + OBJ_RELEASE(mca_smsc_xpmem_module.vma_module); + } + + return OPAL_SUCCESS; +} + +static int mca_smsc_xpmem_send_modex(void) +{ + mca_smsc_xpmem_modex_t modex; + + modex.seg_id = mca_smsc_xpmem_component.my_seg_id; + modex.address_max = mca_smsc_xpmem_component.my_address_max; + + int rc; + OPAL_MODEX_SEND(rc, PMIX_LOCAL, &mca_smsc_xpmem_component.super.smsc_version, &modex, + sizeof(modex)); + return rc; +} + +static int mca_smsc_xpmem_component_query(void) +{ + /* Any attachment that goes past the Linux TASK_SIZE will always fail. To prevent this we need + * to determine the value of TASK_SIZE. On x86_64 the value was hard-coded in sm to be + * 0x7ffffffffffful but this approach does not work with AARCH64 (and possibly other + * architectures). Since there is really no way to directly determine the value we can (in all + * cases?) look through the mapping for this process to determine what the largest address is. + * This should be the top of the stack. No heap allocations should be larger than this value. + * Since the largest address may differ between processes the value must be shared as part of + * the modex and stored in the endpoint. */ + FILE *fh = fopen("/proc/self/maps", "r"); + if (NULL == fh) { + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output, + "mca_smsc_xpmem_component_query: could not open /proc/self/maps for " + "reading. disabling XPMEM"); + return OPAL_ERR_NOT_AVAILABLE; + } + + char buffer[1024]; + uintptr_t address_max = 0; + while (fgets(buffer, sizeof(buffer), fh)) { + uintptr_t low, high; + char *tmp; + /* each line of /proc/self/maps starts with low-high in hexidecimal (without a 0x) */ + low = strtoul(buffer, &tmp, 16); + high = strtoul(tmp + 1, NULL, 16); + if (address_max < high) { + address_max = high; + } + } + + fclose(fh); + + if (0 == address_max) { + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output, + "mca_smsc_xpmem_component_query: could not determine the address max"); + return OPAL_ERR_NOT_AVAILABLE; + } + + /* save the calcuated maximum */ + mca_smsc_xpmem_component.my_address_max = address_max - 1; + + /* it is safe to use XPMEM_MAXADDR_SIZE here (which is always (size_t)-1 even though + * it is not safe for attach */ + mca_smsc_xpmem_component.my_seg_id = xpmem_make(0, XPMEM_MAXADDR_SIZE, XPMEM_PERMIT_MODE, + (void *) 0666); + if (-1 == mca_smsc_xpmem_component.my_seg_id) { + return OPAL_ERR_NOT_AVAILABLE; + } + + mca_smsc_xpmem_send_modex(); + + return OPAL_SUCCESS; +} + +static mca_smsc_module_t *mca_smsc_xpmem_component_enable(void) +{ + if (0 > mca_smsc_xpmem_component.super.priority) { + return NULL; + } + + /* limit segment alignment to be between 4k and 16M */ + mca_smsc_xpmem_component.log_attach_align + = opal_min(opal_max(mca_smsc_xpmem_component.log_attach_align, 12), 25); + + mca_smsc_xpmem_module.vma_module = mca_rcache_base_vma_module_alloc(); + + return &mca_smsc_xpmem_module.super; +} diff --git a/opal/mca/smsc/xpmem/smsc_xpmem_internal.h b/opal/mca/smsc/xpmem/smsc_xpmem_internal.h new file mode 100644 index 00000000000..42105625c18 --- /dev/null +++ b/opal/mca/smsc/xpmem/smsc_xpmem_internal.h @@ -0,0 +1,81 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2021 Google, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_INTERNAL_H +#define OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_INTERNAL_H + +#include "opal/mca/smsc/xpmem/smsc_xpmem.h" + +#include "opal/mca/rcache/base/rcache_base_vma.h" +#if defined(HAVE_XPMEM_H) +# include + +typedef struct xpmem_addr xpmem_addr_t; +#elif defined(HAVE_SN_XPMEM_H) +# include + +typedef int64_t xpmem_segid_t; +typedef int64_t xpmem_apid_t; +#endif + +typedef struct xpmem_addr xpmem_addr_t; + +struct mca_smsc_xpmem_modex_t { + /** XPMEM segment id for this peer */ + xpmem_segid_t seg_id; + /** maximum address we can attach to on this peer */ + uintptr_t address_max; +}; + +typedef struct mca_smsc_xpmem_modex_t mca_smsc_xpmem_modex_t; + +struct mca_smsc_xpmem_endpoint_t { + mca_smsc_endpoint_t super; + /** XPMEM apid for this peer */ + xpmem_apid_t apid; + /** maximum address we can attach to on this peer */ + uintptr_t address_max; +}; + +typedef struct mca_smsc_xpmem_endpoint_t mca_smsc_xpmem_endpoint_t; + +OBJ_CLASS_DECLARATION(mca_smsc_xpmem_endpoint_t); + +struct mca_smsc_xpmem_component_t { + mca_smsc_component_t super; + + /** maximum attachment address for this process. attempts to attach past this value may fail. */ + uintptr_t my_address_max; + /** XPMEM segment id for this process */ + xpmem_segid_t my_seg_id; + /** log base 2 of the attachment alignment. this controls how big the smallest attachment is. a + * larger value will produce fewer entries in the cache but will increase attachment time. */ + unsigned int log_attach_align; + /** maximum size that will be used with a single memcpy call. on some systems we see better + * peformance if we chunk the copy into multiple memcpy calls. */ + uint64_t memcpy_chunk_size; +}; + +typedef struct mca_smsc_xpmem_component_t mca_smsc_xpmem_component_t; + +struct mca_smsc_xpmem_module_t { + mca_smsc_module_t super; + + /** cache of xpmem attachments. this cache holds attachments for all peers. the registrations + * are differentiated by the alloc_base which is set to the endpoint. */ + mca_rcache_base_vma_module_t *vma_module; +}; + +typedef struct mca_smsc_xpmem_module_t mca_smsc_xpmem_module_t; + +extern mca_smsc_xpmem_module_t mca_smsc_xpmem_module; +extern mca_smsc_xpmem_component_t mca_smsc_xpmem_component; + +#endif /* OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_INTERNAL_H */ diff --git a/opal/mca/smsc/xpmem/smsc_xpmem_module.c b/opal/mca/smsc/xpmem/smsc_xpmem_module.c new file mode 100644 index 00000000000..d2954c1e31f --- /dev/null +++ b/opal/mca/smsc/xpmem/smsc_xpmem_module.c @@ -0,0 +1,311 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2020-2021 Google, LLC. All rights reserved. + * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/include/opal/align.h" +#include "opal/mca/memchecker/base/base.h" +#include "opal/mca/pmix/pmix-internal.h" +#include "opal/mca/rcache/rcache.h" +#include "opal/mca/smsc/base/base.h" +#include "opal/mca/smsc/xpmem/smsc_xpmem_internal.h" +#include "opal/util/minmax.h" + +OBJ_CLASS_INSTANCE(mca_smsc_xpmem_endpoint_t, opal_object_t, NULL, NULL); + +mca_smsc_endpoint_t *mca_smsc_xpmem_get_endpoint(opal_proc_t *peer_proc) +{ + mca_smsc_xpmem_endpoint_t *endpoint = OBJ_NEW(mca_smsc_xpmem_endpoint_t); + if (OPAL_UNLIKELY(NULL == endpoint)) { + return NULL; + } + + endpoint->super.proc = peer_proc; + + int rc; + size_t modex_size; + mca_smsc_xpmem_modex_t *modex; + OPAL_MODEX_RECV_IMMEDIATE(rc, &mca_smsc_xpmem_component.super.smsc_version, + &peer_proc->proc_name, (void **) &modex, &modex_size); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + OBJ_RELEASE(endpoint); + return NULL; + } + + endpoint->apid = xpmem_get(modex->seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666); + endpoint->address_max = modex->address_max; + + return &endpoint->super; +} + +struct mca_smsc_xpmem_cleanup_reg_ctx_t { + mca_smsc_xpmem_endpoint_t *endpoint; + opal_list_t *registrations; +}; + +typedef struct mca_smsc_xpmem_cleanup_reg_ctx_t mca_smsc_xpmem_cleanup_reg_ctx_t; + +struct mca_smsc_xpmem_check_reg_ctx_t { + mca_smsc_xpmem_endpoint_t *endpoint; + mca_rcache_base_registration_t **reg; + uintptr_t base; + uintptr_t bound; +}; +typedef struct mca_smsc_xpmem_check_reg_ctx_t mca_smsc_xpmem_check_reg_ctx_t; + +static int mca_smsc_xpmem_check_reg(mca_rcache_base_registration_t *reg, void *ctx) +{ + mca_smsc_xpmem_check_reg_ctx_t *xpmem_ctx = (mca_smsc_xpmem_check_reg_ctx_t *) ctx; + + if (reg->alloc_base != (void *) xpmem_ctx->endpoint) { + /* ignore this registration */ + return OPAL_SUCCESS; + } + + xpmem_ctx->reg[0] = reg; + + if (xpmem_ctx->bound <= (uintptr_t) reg->bound && xpmem_ctx->base >= (uintptr_t) reg->base) { + if (0 == opal_atomic_fetch_add_32(®->ref_count, 1)) { + /* registration is being deleted by a thread in sm_return_registration. the + * VMA tree implementation will block in mca_rcache_delete until we finish + * iterating over the VMA tree so it is safe to just ignore this registration + * and continue. */ + xpmem_ctx->reg[0] = NULL; + return OPAL_SUCCESS; + } + return 1; + } + + if (MCA_RCACHE_FLAGS_INVALID & opal_atomic_fetch_or_32(®->flags, MCA_RCACHE_FLAGS_INVALID)) { + /* another thread has already marked this registration as invalid. ignore and continue. */ + xpmem_ctx->reg[0] = NULL; + return OPAL_SUCCESS; + } + + /* let the caller know we found an overlapping registration that can be coalesced into + * the requested interval. the caller will remove the last reference and delete the + * registration. */ + return 2; +} + +/* look up the remote pointer in the peer rcache and attach if + * necessary */ +void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t flags, + void *remote_ptr, size_t size, void **local_ptr) +{ + mca_smsc_xpmem_endpoint_t *xpmem_endpoint = (mca_smsc_xpmem_endpoint_t *) endpoint; + mca_rcache_base_vma_module_t *vma_module = mca_smsc_xpmem_module.vma_module; + uint64_t attach_align = 1 << mca_smsc_xpmem_component.log_attach_align; + mca_rcache_base_registration_t *reg = NULL; + mca_smsc_xpmem_check_reg_ctx_t check_ctx = {.endpoint = xpmem_endpoint, .reg = ®}; + xpmem_addr_t xpmem_addr; + uintptr_t base, bound; + int rc; + + base = OPAL_DOWN_ALIGN((uintptr_t) remote_ptr, attach_align, uintptr_t); + bound = OPAL_ALIGN((uintptr_t) remote_ptr + size - 1, attach_align, uintptr_t) + 1; + if (OPAL_UNLIKELY(bound > xpmem_endpoint->address_max)) { + bound = xpmem_endpoint->address_max; + } + + check_ctx.base = base; + check_ctx.bound = bound; + + /* several segments may match the base pointer */ + rc = mca_rcache_base_vma_iterate(vma_module, (void *) base, bound - base, true, + mca_smsc_xpmem_check_reg, &check_ctx); + if (2 == rc) { + bound = bound < (uintptr_t) reg->bound ? (uintptr_t) reg->bound : bound; + base = base > (uintptr_t) reg->base ? (uintptr_t) reg->base : base; + mca_smsc_xpmem_unmap_peer_region(reg); + reg = NULL; + } + + if (NULL == reg) { + reg = OBJ_NEW(mca_rcache_base_registration_t); + if (OPAL_LIKELY(NULL != reg)) { + /* stick around for awhile */ + reg->ref_count = 2; + reg->base = (unsigned char *) base; + reg->bound = (unsigned char *) bound; + reg->alloc_base = (void *) endpoint; + +#if defined(HAVE_SN_XPMEM_H) + xpmem_addr.id = xpmem_endpoint->apid; +#else + xpmem_addr.apid = xpmem_endpoint->apid; +#endif + xpmem_addr.offset = base; + + opal_output_verbose(MCA_BASE_VERBOSE_INFO, opal_smsc_base_framework.framework_output, + "mca_smsc_xpmem_map_peer_region: creating region mapping " + "for endpoint %p address range %p-%p", + endpoint, reg->base, reg->bound); + + reg->rcache_context = xpmem_attach(xpmem_addr, bound - base, NULL); + if (OPAL_UNLIKELY((void *) -1 == reg->rcache_context)) { + OBJ_RELEASE(reg); + return NULL; + } + + opal_memchecker_base_mem_defined(reg->rcache_context, bound - base); + + mca_rcache_base_vma_insert(vma_module, reg, 0); + } + } + + opal_atomic_wmb(); + *local_ptr = (void *) ((uintptr_t) reg->rcache_context + + (ptrdiff_t)((uintptr_t) remote_ptr - (uintptr_t) reg->base)); + + return (void *) reg; +} + +void mca_smsc_xpmem_unmap_peer_region(void *ctx) +{ + mca_rcache_base_registration_t *reg = (mca_rcache_base_registration_t *) ctx; + mca_rcache_base_vma_module_t *vma_module = mca_smsc_xpmem_module.vma_module; + int32_t ref_count; + + ref_count = opal_atomic_add_fetch_32(®->ref_count, -1); + if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) { + opal_output_verbose(MCA_BASE_VERBOSE_INFO, opal_smsc_base_framework.framework_output, + "mca_smsc_xpmem_unmap_peer_region: deleting region mapping for " + "endpoint %p address range %p-%p", + reg->alloc_base, reg->base, reg->bound); +#if OPAL_ENABLE_DEBUG + int ret = mca_rcache_base_vma_delete(vma_module, reg); + assert(OPAL_SUCCESS == ret); +#else + (void) mca_rcache_base_vma_delete(vma_module, reg); +#endif + opal_memchecker_base_mem_noaccess(reg->rcache_context, (uintptr_t)(reg->bound - reg->base)); + (void) xpmem_detach(reg->rcache_context); + OBJ_RELEASE(reg); + } +} + +static int mca_smsc_xpmem_endpoint_rcache_cleanup(mca_rcache_base_registration_t *reg, void *ctx) +{ + mca_smsc_xpmem_cleanup_reg_ctx_t *cleanup_ctx = (mca_smsc_xpmem_cleanup_reg_ctx_t *) ctx; + if (reg->alloc_base == (void *) cleanup_ctx->endpoint) { + opal_list_append(cleanup_ctx->registrations, ®->super.super); + } + + return OPAL_SUCCESS; +} + +static void mca_smsc_xpmem_cleanup_endpoint(mca_smsc_xpmem_endpoint_t *endpoint) +{ + mca_rcache_base_registration_t *reg; + opal_list_t registrations; + mca_smsc_xpmem_cleanup_reg_ctx_t cleanup_ctx = {.endpoint = endpoint, + .registrations = ®istrations}; + + opal_output_verbose(MCA_BASE_VERBOSE_INFO, opal_smsc_base_framework.framework_output, + "mca_smsc_xpmem_cleanup_endpoint: cleaning up endpoint %p", endpoint); + + OBJ_CONSTRUCT(®istrations, opal_list_t); + + /* clean out the registration cache */ + (void) mca_rcache_base_vma_iterate(mca_smsc_xpmem_module.vma_module, NULL, (size_t) -1, true, + mca_smsc_xpmem_endpoint_rcache_cleanup, + (void *) &cleanup_ctx); + opal_output_verbose(MCA_BASE_VERBOSE_INFO, opal_smsc_base_framework.framework_output, + "mca_smsc_xpmem_cleanup_endpoint: deleting %" PRIsize_t " region mappings", + opal_list_get_size(®istrations)); + while (NULL + != (reg = (mca_rcache_base_registration_t *) opal_list_remove_first(®istrations))) { + mca_smsc_xpmem_unmap_peer_region(reg); + } + OBJ_DESTRUCT(®istrations); + + xpmem_release(endpoint->apid); + endpoint->apid = 0; +} + +void mca_smsc_xpmem_return_endpoint(mca_smsc_endpoint_t *endpoint) +{ + mca_smsc_xpmem_cleanup_endpoint((mca_smsc_xpmem_endpoint_t *) endpoint); + OBJ_RELEASE(endpoint); +} + +/* memcpy is faster at larger sizes but is undefined if the + pointers are aliased (TODO -- readd alias check) */ +static inline void mca_smsc_xpmem_memmove(void *dst, void *src, size_t size) +{ + while (size > 0) { + size_t copy_size = opal_min(size, mca_smsc_xpmem_component.memcpy_chunk_size); + memcpy(dst, src, copy_size); + dst = (void *) ((uintptr_t) dst + copy_size); + src = (void *) ((uintptr_t) src + copy_size); + size -= copy_size; + } +} + +int mca_smsc_xpmem_copy_to(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address, + size_t size, void *reg_handle) +{ + /* ignore the registration handle as it is not used for XPMEM */ + (void) reg_handle; + + void *remote_ptr, *ctx; + ctx = mca_smsc_xpmem_map_peer_region(endpoint, /*flags=*/0, remote_address, size, &remote_ptr); + mca_smsc_xpmem_memmove(remote_ptr, local_address, size); + + mca_smsc_xpmem_unmap_peer_region(ctx); + + return OPAL_SUCCESS; +} + +int mca_smsc_xpmem_copy_from(mca_smsc_endpoint_t *endpoint, void *local_address, + void *remote_address, size_t size, void *reg_handle) +{ + /* ignore the registration handle as it is not used for XPMEM */ + (void) reg_handle; + + void *remote_ptr, *ctx; + + struct timespec start, stop; + ctx = mca_smsc_xpmem_map_peer_region(endpoint, /*flags=*/0, remote_address, size, &remote_ptr); + mca_smsc_xpmem_memmove(local_address, remote_ptr, size); + + mca_smsc_xpmem_unmap_peer_region(ctx); + + return OPAL_SUCCESS; +} + +/* unsupported interfaces defined to support MCA direct */ +void *mca_smsc_xpmem_register_region(void *local_address, size_t size) +{ + return NULL; +} + +void mca_smsc_xpmem_deregister_region(void *reg_data) +{ +} + +mca_smsc_xpmem_module_t mca_smsc_xpmem_module = { + .super = { + .features = MCA_SMSC_FEATURE_CAN_MAP, + .get_endpoint = mca_smsc_xpmem_get_endpoint, + .return_endpoint = mca_smsc_xpmem_return_endpoint, + .copy_to = mca_smsc_xpmem_copy_to, + .copy_from = mca_smsc_xpmem_copy_from, + .map_peer_region = mca_smsc_xpmem_map_peer_region, + .unmap_peer_region = mca_smsc_xpmem_unmap_peer_region, + }, +}; diff --git a/opal/mca/threads/argobots/threads_argobots_module.c b/opal/mca/threads/argobots/threads_argobots_module.c index 708a7da645a..50acb60d8ce 100644 --- a/opal/mca/threads/argobots/threads_argobots_module.c +++ b/opal/mca/threads/argobots/threads_argobots_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -33,78 +33,6 @@ #include "opal/util/output.h" #include "opal/util/sys_limits.h" -/* - * Constructor - */ -static void opal_thread_construct(opal_thread_t *t) -{ - t->t_run = 0; - t->t_handle = ABT_THREAD_NULL; -} - -OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL); - -static inline ABT_thread opal_thread_get_argobots_self(void) -{ - ABT_thread self; - ABT_thread_self(&self); - return self; -} - -static void opal_thread_argobots_wrapper(void *arg) -{ - opal_thread_t *t = (opal_thread_t *) arg; - t->t_ret = ((void *(*) (void *) ) t->t_run)(t); -} - -opal_thread_t *opal_thread_get_self(void) -{ - opal_threads_argobots_ensure_init(); - opal_thread_t *t = OBJ_NEW(opal_thread_t); - t->t_handle = opal_thread_get_argobots_self(); - return t; -} - -bool opal_thread_self_compare(opal_thread_t *t) -{ - opal_threads_argobots_ensure_init(); - return opal_thread_get_argobots_self() == t->t_handle; -} - -int opal_thread_join(opal_thread_t *t, void **thr_return) -{ - int rc = ABT_thread_free(&t->t_handle); - if (thr_return) { - *thr_return = t->t_ret; - } - t->t_handle = ABT_THREAD_NULL; - return (ABT_SUCCESS == rc) ? OPAL_SUCCESS : OPAL_ERROR; -} - -void opal_thread_set_main() -{ -} - -int opal_thread_start(opal_thread_t *t) -{ - opal_threads_argobots_ensure_init(); - int rc; - if (OPAL_ENABLE_DEBUG) { - if (NULL == t->t_run || ABT_THREAD_NULL != t->t_handle) { - return OPAL_ERR_BAD_PARAM; - } - } - - ABT_xstream self_xstream; - ABT_xstream_self(&self_xstream); - rc = ABT_thread_create_on_xstream(self_xstream, opal_thread_argobots_wrapper, t, - ABT_THREAD_ATTR_NULL, &t->t_handle); - - return (ABT_SUCCESS == rc) ? OPAL_SUCCESS : OPAL_ERROR; -} - -OBJ_CLASS_DECLARATION(opal_thread_t); - int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor) { opal_threads_argobots_ensure_init(); diff --git a/opal/mca/threads/argobots/threads_argobots_threads.h b/opal/mca/threads/argobots/threads_argobots_threads.h index 649553adcbd..417ad3d5884 100644 --- a/opal/mca/threads/argobots/threads_argobots_threads.h +++ b/opal/mca/threads/argobots/threads_argobots_threads.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, @@ -27,15 +27,6 @@ #define OPAL_MCA_THREADS_ARGOBOTS_THREADS_ARGOBOTS_THREADS_H #include "opal/mca/threads/argobots/threads_argobots.h" -#include - -struct opal_thread_t { - opal_object_t super; - opal_thread_fn_t t_run; - void *t_arg; - ABT_thread t_handle; - void *t_ret; -}; /* Argobots are cooperatively scheduled so yield when idle */ #define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true diff --git a/opal/mca/threads/base/Makefile.am b/opal/mca/threads/base/Makefile.am index 4cbd5c742b1..91e24f11198 100644 --- a/opal/mca/threads/base/Makefile.am +++ b/opal/mca/threads/base/Makefile.am @@ -24,6 +24,7 @@ headers += \ libmca_threads_la_SOURCES += \ base/mutex.c \ + base/create_join.c \ base/threads_base.c \ base/tsd.c \ base/wait_sync.c diff --git a/opal/mca/threads/base/create_join.c b/opal/mca/threads/base/create_join.c new file mode 100644 index 00000000000..e134f085930 --- /dev/null +++ b/opal/mca/threads/base/create_join.c @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2021 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Sandia National Laboratories. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include "opal/constants.h" +#include "opal/mca/threads/threads.h" +#include "opal/mca/threads/tsd.h" +#include "opal/prefetch.h" +#include "opal/util/output.h" +#include "opal/util/sys_limits.h" + +/* + * Constructor + */ +static void opal_thread_construct(opal_thread_t *t) +{ + t->t_run = 0; + t->t_handle = (pthread_t) -1; +} + +OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL); + +int opal_thread_start(opal_thread_t *t) +{ + int rc; + + if (OPAL_ENABLE_DEBUG) { + if (NULL == t->t_run || (pthread_t) -1 != t->t_handle) { + return OPAL_ERR_BAD_PARAM; + } + } + + rc = pthread_create(&t->t_handle, NULL, (void *(*) (void *) ) t->t_run, t); + + return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; +} + +int opal_thread_join(opal_thread_t *t, void **thr_return) +{ + int rc = pthread_join(t->t_handle, thr_return); + t->t_handle = (pthread_t) -1; + return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; +} + +bool opal_thread_self_compare(opal_thread_t *t) +{ + return pthread_self() == t->t_handle; +} + +opal_thread_t *opal_thread_get_self(void) +{ + opal_thread_t *t = OBJ_NEW(opal_thread_t); + t->t_handle = pthread_self(); + return t; +} + +void opal_thread_set_main(void) +{ +} diff --git a/opal/mca/threads/mutex.h b/opal/mca/threads/mutex.h index e6c9dc3f5bf..94846e4d3c7 100644 --- a/opal/mca/threads/mutex.h +++ b/opal/mca/threads/mutex.h @@ -18,6 +18,8 @@ * Copyright (c) 2019 Sandia National Laboratories. All rights reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. * Copyright (c) 2021 Argonne National Laboratory. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * * $COPYRIGHT$ * @@ -138,11 +140,7 @@ static inline void opal_mutex_unlock(opal_mutex_t *mutex) */ static inline int opal_mutex_atomic_trylock(opal_mutex_t *mutex) { -#if OPAL_HAVE_ATOMIC_SPINLOCKS return opal_atomic_trylock(&mutex->m_lock_atomic); -#else - return opal_mutex_trylock(mutex); -#endif } /** @@ -152,11 +150,7 @@ static inline int opal_mutex_atomic_trylock(opal_mutex_t *mutex) */ static inline void opal_mutex_atomic_lock(opal_mutex_t *mutex) { -#if OPAL_HAVE_ATOMIC_SPINLOCKS opal_atomic_lock(&mutex->m_lock_atomic); -#else - opal_mutex_lock(mutex); -#endif } /** @@ -166,11 +160,7 @@ static inline void opal_mutex_atomic_lock(opal_mutex_t *mutex) */ static inline void opal_mutex_atomic_unlock(opal_mutex_t *mutex) { -#if OPAL_HAVE_ATOMIC_SPINLOCKS opal_atomic_unlock(&mutex->m_lock_atomic); -#else - opal_mutex_unlock(mutex); -#endif } /** diff --git a/opal/mca/threads/pthreads/threads_pthreads_module.c b/opal/mca/threads/pthreads/threads_pthreads_module.c index 70dec2964b6..ac09b71d53d 100644 --- a/opal/mca/threads/pthreads/threads_pthreads_module.c +++ b/opal/mca/threads/pthreads/threads_pthreads_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -32,58 +32,9 @@ #include "opal/util/output.h" #include "opal/util/sys_limits.h" -/* - * Constructor - */ -static void opal_thread_construct(opal_thread_t *t) -{ - t->t_run = 0; - t->t_handle = (pthread_t) -1; -} - -OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL); - -int opal_thread_start(opal_thread_t *t) -{ - int rc; - - if (OPAL_ENABLE_DEBUG) { - if (NULL == t->t_run || (pthread_t) -1 != t->t_handle) { - return OPAL_ERR_BAD_PARAM; - } - } - - rc = pthread_create(&t->t_handle, NULL, (void *(*) (void *) ) t->t_run, t); - - return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; -} - -int opal_thread_join(opal_thread_t *t, void **thr_return) -{ - int rc = pthread_join(t->t_handle, thr_return); - t->t_handle = (pthread_t) -1; - return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; -} - -bool opal_thread_self_compare(opal_thread_t *t) -{ - return pthread_self() == t->t_handle; -} - -opal_thread_t *opal_thread_get_self(void) -{ - opal_thread_t *t = OBJ_NEW(opal_thread_t); - t->t_handle = pthread_self(); - return t; -} - int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor) { int rc; rc = pthread_key_create(key, destructor); return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; } - -void opal_thread_set_main(void) -{ -} diff --git a/opal/mca/threads/pthreads/threads_pthreads_threads.h b/opal/mca/threads/pthreads/threads_pthreads_threads.h index 3cd7a3dae14..4bdb3710715 100644 --- a/opal/mca/threads/pthreads/threads_pthreads_threads.h +++ b/opal/mca/threads/pthreads/threads_pthreads_threads.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, @@ -32,13 +32,6 @@ #include "opal/mca/threads/pthreads/threads_pthreads.h" #include "opal/mca/threads/threads.h" -struct opal_thread_t { - opal_object_t super; - opal_thread_fn_t t_run; - void *t_arg; - pthread_t t_handle; -}; - /* Pthreads do not need to yield when idle */ #define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT false diff --git a/opal/mca/threads/qthreads/threads_qthreads_module.c b/opal/mca/threads/qthreads/threads_qthreads_module.c index a5dc24674a2..7dca13f5f4f 100644 --- a/opal/mca/threads/qthreads/threads_qthreads_module.c +++ b/opal/mca/threads/qthreads/threads_qthreads_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -51,74 +51,6 @@ static inline void self_key_ensure_init(void) /* opal_thread_self_key has been already initialized. */ } -/* - * Constructor - */ -static void opal_thread_construct(opal_thread_t *t) -{ - t->t_run = 0; - t->t_thread_ret = 0; -} - -OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL); - -static inline aligned_t *opal_thread_get_qthreads_self(void) -{ - self_key_ensure_init(); - void *ptr = qthread_getspecific(opal_thread_self_key); - return (aligned_t *) ptr; -} - -static aligned_t opal_thread_qthreads_wrapper(void *arg) -{ - opal_thread_t *t = (opal_thread_t *) arg; - - /* Register itself. */ - self_key_ensure_init(); - qthread_setspecific(opal_thread_self_key, t->t_thread_ret_ptr); - - t->t_ret = ((void *(*) (void *) ) t->t_run)(t); - return 0; -} - -opal_thread_t *opal_thread_get_self(void) -{ - opal_threads_ensure_init_qthreads(); - opal_thread_t *t = OBJ_NEW(opal_thread_t); - t->t_thread_ret_ptr = opal_thread_get_qthreads_self(); - return t; -} - -bool opal_thread_self_compare(opal_thread_t *t) -{ - opal_threads_ensure_init_qthreads(); - return opal_thread_get_qthreads_self() == &t->t_thread_ret; -} - -int opal_thread_join(opal_thread_t *t, void **thr_return) -{ - qthread_readFF(NULL, t->t_thread_ret_ptr); - if (thr_return) { - *thr_return = t->t_ret; - } - t->t_thread_ret = 0; - return OPAL_SUCCESS; -} - -void opal_thread_set_main(void) -{ -} - -int opal_thread_start(opal_thread_t *t) -{ - opal_threads_ensure_init_qthreads(); - t->t_thread_ret_ptr = &t->t_thread_ret; - qthread_fork(opal_thread_qthreads_wrapper, t, &t->t_thread_ret); - return OPAL_SUCCESS; -} - -OBJ_CLASS_DECLARATION(opal_thread_t); - int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor) { opal_threads_ensure_init_qthreads(); diff --git a/opal/mca/threads/qthreads/threads_qthreads_threads.h b/opal/mca/threads/qthreads/threads_qthreads_threads.h index 1969c558c3d..58630f4023a 100644 --- a/opal/mca/threads/qthreads/threads_qthreads_threads.h +++ b/opal/mca/threads/qthreads/threads_qthreads_threads.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, @@ -27,16 +27,6 @@ #define OPAL_MCA_THREADS_QTHREADS_THREADS_QTHREADS_THREADS_H 1 #include "opal/mca/threads/qthreads/threads_qthreads.h" -#include - -struct opal_thread_t { - opal_object_t super; - opal_thread_fn_t t_run; - void *t_arg; - void *t_ret; - aligned_t t_thread_ret; - aligned_t *t_thread_ret_ptr; -}; /* Qthreads are cooperatively scheduled so yield when idle */ #define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true diff --git a/opal/mca/threads/threads.h b/opal/mca/threads/threads.h index 7e168380666..0f78830cdad 100644 --- a/opal/mca/threads/threads.h +++ b/opal/mca/threads/threads.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -45,6 +45,13 @@ typedef void *(*opal_thread_fn_t)(opal_object_t *); #include MCA_threads_base_include_HEADER +struct opal_thread_t { + opal_object_t super; + opal_thread_fn_t t_run; + void *t_arg; + pthread_t t_handle; +}; + typedef struct opal_thread_t opal_thread_t; OBJ_CLASS_DECLARATION(opal_thread_t); diff --git a/opal/mca/timer/linux/timer_linux_component.c b/opal/mca/timer/linux/timer_linux_component.c index b710e54f0e8..fabb3f4ea09 100644 --- a/opal/mca/timer/linux/timer_linux_component.c +++ b/opal/mca/timer/linux/timer_linux_component.c @@ -16,6 +16,8 @@ * reserved. * Copyright (c) 2015-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2016 Broadcom Limited. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -132,7 +134,7 @@ static int opal_timer_linux_find_freq(void) } } -#if ((OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64)) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) if (0 == opal_timer_linux_freq && opal_sys_timer_is_monotonic()) { /* tsc is exposed through bogomips ~> loops_per_jiffy ~> tsc_khz */ loc = find_info(fp, "bogomips", buf, 1024); diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 57f1ea4e062..bfec2674974 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -54,6 +54,7 @@ #include "opal/mca/pmix/base/base.h" #include "opal/mca/reachable/base/base.h" #include "opal/mca/shmem/base/base.h" +#include "opal/mca/smsc/base/base.h" #include "opal/mca/threads/threads.h" #include "opal/mca/threads/tsd.h" #include "opal/mca/timer/base/base.h" @@ -79,11 +80,6 @@ #include "opal/util/sys_limits.h" #include "opal/util/timings.h" -#if OPAL_CC_USE_PRAGMA_IDENT -# pragma ident OPAL_IDENT_STRING -#elif OPAL_CC_USE_IDENT -# ident OPAL_IDENT_STRING -#endif const char opal_version_string[] = OPAL_IDENT_STRING; int opal_initialized = 0; @@ -485,6 +481,9 @@ int opal_init_util(int *pargc, char ***pargv) opal_init_called = true; + /* register for */ + opal_finalize_register_cleanup_arg (mca_base_framework_close_list, opal_init_util_frameworks); + /* set the nodename right away so anyone who needs it has it. Note * that we don't bother with fqdn and prefix issues here - we let * the RTE later replace this with a modified name if the user @@ -619,11 +618,12 @@ int opal_init_util(int *pargc, char ***pargv) * versions of memcpy correctly configured. */ static mca_base_framework_t *opal_init_frameworks[] = { - &opal_threads_base_framework, &opal_hwloc_base_framework, - &opal_memcpy_base_framework, &opal_memchecker_base_framework, + &opal_threads_base_framework, &opal_hwloc_base_framework, + &opal_memcpy_base_framework, &opal_memchecker_base_framework, &opal_backtrace_base_framework, &opal_timer_base_framework, - &opal_shmem_base_framework, &opal_reachable_base_framework, - &opal_pmix_base_framework, NULL, + &opal_shmem_base_framework, &opal_reachable_base_framework, + &opal_pmix_base_framework, &opal_smsc_base_framework, + NULL, }; int opal_init(int *pargc, char ***pargv) diff --git a/opal/runtime/opal_params.c b/opal/runtime/opal_params.c index 93896defc98..1704c83343c 100644 --- a/opal/runtime/opal_params.c +++ b/opal/runtime/opal_params.c @@ -61,6 +61,7 @@ bool opal_timing_overhead = true; #endif bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT); +bool opal_cuda_runtime_initialized = false; bool opal_cuda_support = false; bool opal_warn_on_missing_libcuda = true; diff --git a/opal/runtime/opal_params.h b/opal/runtime/opal_params.h index d5d63cb933c..bbdd8fa40db 100644 --- a/opal/runtime/opal_params.h +++ b/opal/runtime/opal_params.h @@ -48,6 +48,11 @@ OPAL_DECLSPEC extern bool opal_built_with_cuda_support; * */ OPAL_DECLSPEC extern bool opal_cuda_support; +/** + * Whether cuda runtime support is initialized or not. + */ +OPAL_DECLSPEC extern bool opal_cuda_runtime_initialized; + /** * * Whether we want to warn the user when libcuda is missing. * */ diff --git a/opal/runtime/opal_progress.c b/opal/runtime/opal_progress.c index cf50d6bb6f9..3d319128b9f 100644 --- a/opal/runtime/opal_progress.c +++ b/opal/runtime/opal_progress.c @@ -213,7 +213,7 @@ static int opal_progress_events(void) * care, as the cost of that happening is far outweighed by the cost * of the if checks (they were resulting in bad pipe stalling behavior) */ -void opal_progress(void) +int opal_progress(void) { static uint32_t num_calls = 0; size_t i; @@ -250,6 +250,8 @@ void opal_progress(void) */ opal_thread_yield(); } + + return events; } int opal_progress_set_event_flag(int flag) diff --git a/opal/runtime/opal_progress.h b/opal/runtime/opal_progress.h index 16d1404c8f1..c079d116ca9 100644 --- a/opal/runtime/opal_progress.h +++ b/opal/runtime/opal_progress.h @@ -57,18 +57,28 @@ OPAL_DECLSPEC int opal_progress_init(void); * opal_progress_event_users_delete()) or the time since the last call * into the event library is greater than the progress tick rate (by * default, 10ms). + * + * Returns 0 if no progress has been observed, non-zero otherwise. */ -OPAL_DECLSPEC void opal_progress(void); +OPAL_DECLSPEC int opal_progress(void); /** * Control how the event library is called * * Adjust the flags argument used to call opal_event_loop() from - * opal_progress(). The default argument is OPAL_EVLOOP_ONELOOP, - * meaning that the call to opal_event_loop() will block pending - * events, but may block for a period of time. + * opal_progress(). The default argument is + * + * OPAL_EVLOOP_NONBLOCK | OPAL_EVLOOP_ONCE + * + * OPAL_EVLOOP_NONBLOCK means that if there is no active events, + * opal_event_loop() should return immediately (instead of + * waiting for active events). + * + * OPAL_EVLOOP_ONCE means opal_event_loop() should onlyrun one + * iteration, which includes poll the event queue and process + * the active events. * - * @param flags One of the valid vlags argument to + * @param flags One of the valid flags argument to * opal_event_loop(). * @return Previous value of flags used to call * opal_event_loop(). diff --git a/opal/tools/wrappers/Makefile.am b/opal/tools/wrappers/Makefile.am index 4e63d742c98..a03ff2e6ecd 100644 --- a/opal/tools/wrappers/Makefile.am +++ b/opal/tools/wrappers/Makefile.am @@ -40,21 +40,18 @@ dist_opaldata_DATA = help-opal-wrapper.txt if WANT_INSTALL_HEADERS nodist_opaldata_DATA = \ - opalcc-wrapper-data.txt \ - opalc++-wrapper-data.txt + opalcc-wrapper-data.txt -nodist_man_MANS += opalcc.1 opalc++.1 +nodist_man_MANS += opalcc.1 pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = opal.pc install-exec-hook: (cd $(DESTDIR)$(bindir); rm -f opalcc$(EXEEXT); $(LN_S) opal_wrapper$(EXEECT) opalcc$(EXEEXT)) - (cd $(DESTDIR)$(bindir); rm -f opalc++$(EXEEXT); $(LN_S) opal_wrapper$(EXEECT) opalc++$(EXEEXT)) uninstall-local: - rm -f $(DESTDIR)$(bindir)/opalcc$(EXEEXT) \ - $(DESTDIR)$(bindir)/opalc++$(EXEEXT) + rm -f $(DESTDIR)$(bindir)/opalcc$(EXEEXT) endif # WANT_INSTALL_HEADERS @@ -73,9 +70,5 @@ opalcc.1: generic_wrapper.1 rm -f opalcc.1 sed -e 's/#COMMAND#/opalcc/g' -e 's/#PROJECT#/Open PAL/g' -e 's/#PROJECT_SHORT#/OPAL/g' -e 's/#LANGUAGE#/C/g' < $(top_builddir)/opal/tools/wrappers/generic_wrapper.1 > opalcc.1 -opalc++.1: generic_wrapper.1 - rm -f opalc++.1 - sed -e 's/#COMMAND#/opalc++/g' -e 's/#PROJECT#/Open PAL/g' -e 's/#PROJECT_SHORT#/OPAL/g' -e 's/#LANGUAGE#/C++/g' < $(top_builddir)/opal/tools/wrappers/generic_wrapper.1 > opalc++.1 - distclean-local: - rm -f $(real_man_pages) opalcc.1 opalc++.1 + rm -f $(real_man_pages) opalcc.1 diff --git a/opal/tools/wrappers/opal_wrapper.c b/opal/tools/wrappers/opal_wrapper.c index 45827dc9356..04d54dc57bc 100644 --- a/opal/tools/wrappers/opal_wrapper.c +++ b/opal/tools/wrappers/opal_wrapper.c @@ -673,7 +673,7 @@ int main(int argc, char *argv[]) == strncmp(user_argv[i], "--showme:version", strlen("--showme:version"))) { char *str; - str = opal_show_help_string("help-opal-wrapper.txt", "version", false, argv[0], + str = opal_show_help_string("help-opal-wrapper.txt", "version", false, base_argv0, options_data[user_data_idx].project, options_data[user_data_idx].version, options_data[user_data_idx].language, NULL); @@ -685,7 +685,7 @@ int main(int argc, char *argv[]) } else if (0 == strncmp(user_argv[i], "-showme:help", strlen("-showme:help")) || 0 == strncmp(user_argv[i], "--showme:help", strlen("--showme:help"))) { char *str; - str = opal_show_help_string("help-opal-wrapper.txt", "usage", false, argv[0], + str = opal_show_help_string("help-opal-wrapper.txt", "usage", false, base_argv0, options_data[user_data_idx].project, NULL); if (NULL != str) { printf("%s", str); @@ -696,7 +696,7 @@ int main(int argc, char *argv[]) goto cleanup; } else if (0 == strncmp(user_argv[i], "-showme:", strlen("-showme:")) || 0 == strncmp(user_argv[i], "--showme:", strlen("--showme:"))) { - fprintf(stderr, "%s: unrecognized option: %s\n", argv[0], user_argv[i]); + fprintf(stderr, "%s: unrecognized option: %s\n", base_argv0, user_argv[i]); fprintf(stderr, "Type '%s --showme:help' for usage.\n", argv[0]); exit_status = 1; goto cleanup; diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index 9018a596dd1..4a457d7e1b3 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2007-2022 Cisco Systems, Inc. All rights reserved # Copyright (c) 2013 NVIDIA Corporation. All rights reserved. # Copyright (c) 2013 Intel, Inc. All rights reserved # Copyright (c) 2016 Los Alamos National Security, LLC. All rights @@ -48,9 +48,9 @@ headers = \ bipartite_graph.h \ bipartite_graph_internal.h \ bit_ops.h \ + clock_gettime.h \ cmd_line.h \ crc.h \ - daemon_init.h \ ethtool.h \ error.h \ event.h \ @@ -91,7 +91,6 @@ libopalutil_la_SOURCES = \ bipartite_graph.c \ cmd_line.c \ crc.c \ - daemon_init.c \ ethtool.c \ error.c \ event.c \ diff --git a/opal/util/clock_gettime.h b/opal/util/clock_gettime.h new file mode 100644 index 00000000000..87692388cc6 --- /dev/null +++ b/opal/util/clock_gettime.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** @file clock_gettime.h + * + * Simple, portable wrappers around clock_gettime(3) and + * clock_getres(3) to always get monotonically-increasing time. + * + * If the underlying OS does not have clock_gettime(3), use + * gettimeofday(3) instead. + * + * We intentionally do not use the OPAL timer framework for + * high-prevision time here; see + * https://github.com/open-mpi/ompi/issues/3003 for more details. + * + * As of Dec 2021, it turns out that CLOCK_MONOTONIC can actually go + * backwards on macOS (!). CLOCK_MONOTONIC does *not* go backwards on + * Linux (or anywhere else we can find), though, even in the presence + * of small NTP time adjustments -- e.g., adjtime(3) simply slightly + * speeds up or slows down the system clock to make it eventually get + * to the desired time. On macOS, we can use CLOCK_MONOTONIC_RAW, + * which never goes backwards. + * + * Hence, for these wrappers, use CLOCK_MONOTONIC_RAW on Darwin, and + * use CLOCK_MONOTONIC everywhere else. + * + * See + * https://github.com/open-mpi/ompi/pull/8057#discussion_r762612710 + * and + * https://github.com/open-mpi/ompi/pull/8057#discussion_r762618783 + * for more details. + */ + +#ifndef OPAL_UTIL_CLOCK_GETTIME_H_ +#define OPAL_UTIL_CLOCK_GETTIME_H_ + +#include "opal_config.h" + +#if HAVE_TIME_H +#include +#endif +#if HAVE_SYS_TIME_H +#include +#endif + +#if OPAL_HAVE_CLOCK_GETTIME +#if defined(__darwin__) +#define OPAL_CLOCK_TYPE CLOCK_MONOTONIC_RAW +#else +#define OPAL_CLOCK_TYPE CLOCK_MONOTONIC +#endif +#endif // OPAL_HAVE_CLOCK_GETTIME + +#if !defined(HAVE_STRUCT_TIMESPEC_TV_NSEC) +// Make sure that we have struct timespec; if not, define it. +struct timespec { + time_t tv_sec; + long tv_nsec; +}; +#endif + +/** + * Simple, portable wrapper around clock_gettime(3) for high-precision time. + * + * If the underlying system does not have clock_gettime(3), use + * gettimeofday(3) instead. + * + * @param spec (OUT) Struct to return the time + * @return Return value from underlying clock_gettime() + */ +static inline int opal_clock_gettime(struct timespec *spec) +{ +#if OPAL_HAVE_CLOCK_GETTIME + return clock_gettime(OPAL_CLOCK_TYPE, spec); +#else + // If we do not have clock_gettime(), fall back to gettimeofday() + struct timeval tv; + int ret = gettimeofday(&tv, NULL); + + spec->tv_sec = tv.tv_sec; + // Elevate the micrseconds to nanoseconds + spec->tv_nsec = tv.tv_usec * 1000; + + return ret; +#endif +} + +/** + * Simple, portable wrapper around clock_getres(3) for high-precision time. + * + * If the underlying system does not have clock_gettime(3), return usec + * precison (because opal_clock_gettime() will be using gettimeofday(3)). + * + * @param spec (OUT) Struct to return the resolution + * @return Return value from underlying clock_getres() + */ +static inline int opal_clock_getres(struct timespec *spec) +{ +#if OPAL_HAVE_CLOCK_GETTIME + return clock_getres(OPAL_CLOCK_TYPE, spec); +#else + // If we don't have clock_gettime(), just return usec precision. + spec->tv_sec = 0; + spec->tv_nsec = 1000; + + return 0; +#endif +} + +#endif // OPAL_UTIL_CLOCK_GETTIME_H_ diff --git a/opal/util/cmd_line.c b/opal/util/cmd_line.c index 391de0de4ea..9708e3fc15b 100644 --- a/opal/util/cmd_line.c +++ b/opal/util/cmd_line.c @@ -499,7 +499,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) int argc; size_t j; char **argv; - char *ret, temp[MAX_WIDTH * 2], line[MAX_WIDTH * 2]; + char *ret, line[(MAX_WIDTH * 2) + 1]; char *start, *desc, *ptr; opal_list_item_t *item; ompi_cmd_line_option_t *option, **sorted; @@ -559,27 +559,27 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) } if (NULL != option->clo_single_dash_name) { line[2] = (filled) ? '|' : ' '; - strncat(line, "-", sizeof(line) - 1); - strncat(line, option->clo_single_dash_name, sizeof(line) - 1); + strncat(line, "-", sizeof(line) - strlen(line) - 1); + strncat(line, option->clo_single_dash_name, sizeof(line) - strlen(line) - 1); filled = true; } if (NULL != option->clo_long_name) { if (filled) { - strncat(line, "|", sizeof(line) - 1); + strncat(line, "|", sizeof(line) - strlen(line) - 1); } else { - strncat(line, " ", sizeof(line) - 1); + strncat(line, " ", sizeof(line) - strlen(line) - 1); } - strncat(line, "--", sizeof(line) - 1); - strncat(line, option->clo_long_name, sizeof(line) - 1); + strncat(line, "--", sizeof(line) - strlen(line) - 1); + strncat(line, option->clo_long_name, sizeof(line) - strlen(line) - 1); } - strncat(line, " ", sizeof(line) - 1); + strncat(line, " ", sizeof(line) - strlen(line) - 1); for (i = 0; (int) i < option->clo_num_params; ++i) { - len = sizeof(temp); - snprintf(temp, len, " ", (int) i); - strncat(line, temp, sizeof(line) - 1); + char temp[MAX_WIDTH * 2]; + snprintf(temp, MAX_WIDTH * 2, " ", (int) i); + strncat(line, temp, sizeof(line) - strlen(line) - 1); } if (option->clo_num_params > 0) { - strncat(line, " ", sizeof(line) - 1); + strncat(line, " ", sizeof(line) - strlen(line) - 1); } /* If we're less than param width, then start adding the @@ -635,7 +635,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) /* Last line */ if (strlen(start) < (MAX_WIDTH - PARAM_WIDTH)) { - strncat(line, start, sizeof(line) - 1); + strncat(line, start, sizeof(line) - strlen(line) - 1); opal_argv_append(&argc, &argv, line); break; } @@ -647,7 +647,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) for (ptr = start + (MAX_WIDTH - PARAM_WIDTH); ptr > start; --ptr) { if (isspace(*ptr)) { *ptr = '\0'; - strncat(line, start, sizeof(line) - 1); + strncat(line, start, sizeof(line) - strlen(line) - 1); opal_argv_append(&argc, &argv, line); start = ptr + 1; @@ -666,7 +666,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) if (isspace(*ptr)) { *ptr = '\0'; - strncat(line, start, sizeof(line) - 1); + strncat(line, start, sizeof(line) - strlen(line) - 1); opal_argv_append(&argc, &argv, line); start = ptr + 1; @@ -680,7 +680,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) whitespace, then just add it on and be done */ if (ptr >= start + len) { - strncat(line, start, sizeof(line) - 1); + strncat(line, start, sizeof(line) - strlen(line) - 1); opal_argv_append(&argc, &argv, line); start = desc + len + 1; } diff --git a/opal/util/daemon_init.c b/opal/util/daemon_init.c deleted file mode 100644 index 55059836131..00000000000 --- a/opal/util/daemon_init.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#include -#include -#ifdef HAVE_UNISTD_H -# include -#endif -#include - -#include "opal/constants.h" -#include "opal/util/daemon_init.h" - -int opal_daemon_init(char *working_dir) -{ -#if defined(HAVE_FORK) - pid_t pid; - int fd; - - if ((pid = fork()) < 0) { - return OPAL_ERROR; - } else if (pid != 0) { - exit(0); /* parent goes bye-bye */ - } - - /* child continues */ -# if defined(HAVE_SETSID) - setsid(); /* become session leader */ -# endif - - if (NULL != working_dir) { - chdir(working_dir); /* change working directory */ - } - - /* connect input to /dev/null */ - fd = open("/dev/null", O_RDONLY); - if (0 > fd) { - return OPAL_ERR_FATAL; - } - dup2(fd, STDIN_FILENO); - if (fd != STDIN_FILENO) { - close(fd); - } - - /* connect outputs to /dev/null */ - fd = open("/dev/null", O_RDWR | O_CREAT | O_TRUNC, 0666); - if (fd >= 0) { - dup2(fd, STDOUT_FILENO); - dup2(fd, STDERR_FILENO); - /* just to be safe, make sure we aren't trying - * to close stdout or stderr! since we dup'd both - * of them to the same fd, we can't just close it - * since one of the two would still be open and - * someone could attempt to use it. - */ - if (fd != STDOUT_FILENO && fd != STDERR_FILENO) { - close(fd); - } - } else { - return OPAL_ERR_FATAL; - } - - return OPAL_SUCCESS; - -#else /* HAVE_FORK */ - return OPAL_ERR_NOT_SUPPORTED; -#endif -} diff --git a/opal/util/daemon_init.h b/opal/util/daemon_init.h deleted file mode 100644 index 74b00768502..00000000000 --- a/opal/util/daemon_init.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file **/ - -#ifndef OPAL_DAEMON_INIT_H -#define OPAL_DAEMON_INIT_H - -#include "opal_config.h" - -BEGIN_C_DECLS - -/* - * Turn a process into a daemon. - * - * This function converts a process into a daemon in an orderly manner. It first forks a child - * process, then the parent exits. The child continues on to become a session leader, reset the file - * mode creation mask, and changes working directories to the one specified. - * - * @param working_dir Pointer to a character string containing the desired working directory. - * Providing a value of NULL will cause the function to leave the program in the current working - * directory. - * - * @retval OPAL_SUCCESS Indicates that the conversion was successful - * @retval OPAL_ERROR Indicates that the conversion was not successful - a fork could not be - * completed. - */ -OPAL_DECLSPEC int opal_daemon_init(char *working_dir); - -END_C_DECLS - -#endif /* OPAL_DAEMON_INIT_H */ diff --git a/opal/util/error.c b/opal/util/error.c index efbbacf3b9e..7f8be1e5817 100644 --- a/opal/util/error.c +++ b/opal/util/error.c @@ -35,6 +35,7 @@ #include "opal/constants.h" #include "opal/runtime/opal_params.h" #include "opal/util/error.h" +#include "opal/util/output.h" #include "opal/util/printf.h" #include "opal/util/proc.h" #include "opal/util/string_copy.h" @@ -217,14 +218,18 @@ void opal_delay_abort(void) "[%s:%05d] Looping forever " "(MCA parameter opal_abort_delay is < 0)\n", opal_process_info.nodename, (int) pid); - write(STDERR_FILENO, msg, strlen(msg)); + } else { + snprintf(msg, sizeof(msg), "[%s:%05d] Delaying for %d seconds before aborting\n", + opal_process_info.nodename, (int) pid, delay); + } + + opal_best_effort_write(STDERR_FILENO, msg, strlen(msg)); + + if (delay < 0) { while (1) { sleep(5); } } else { - snprintf(msg, sizeof(msg), "[%s:%05d] Delaying for %d seconds before aborting\n", - opal_process_info.nodename, (int) pid, delay); - write(STDERR_FILENO, msg, strlen(msg)); do { sleep(1); } while (--delay > 0); diff --git a/opal/util/info.c b/opal/util/info.c index 87ed64f7bce..1c73466f689 100644 --- a/opal/util/info.c +++ b/opal/util/info.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -102,6 +102,7 @@ static void opal_info_get_nolock(opal_info_t *info, const char *key, opal_cstrin if (NULL != value) { OBJ_RETAIN(search->ie_value); *value = search->ie_value; + search->ie_referenced++; } } } @@ -118,6 +119,7 @@ static int opal_info_set_cstring_nolock(opal_info_t *info, const char *key, opal OBJ_RELEASE(old_info->ie_value); OBJ_RETAIN(value); old_info->ie_value = value; + old_info->ie_referenced++; } else { opal_info_entry_t *new_info; new_info = OBJ_NEW(opal_info_entry_t); @@ -128,6 +130,7 @@ static int opal_info_set_cstring_nolock(opal_info_t *info, const char *key, opal new_info->ie_key = key_str; OBJ_RETAIN(value); new_info->ie_value = value; + new_info->ie_referenced++; opal_list_append(&(info->super), (opal_list_item_t *) new_info); } return OPAL_SUCCESS; @@ -143,6 +146,7 @@ static int opal_info_set_nolock(opal_info_t *info, const char *key, const char * * key already exists, check whether it is the same */ size_t value_len = strlen(value); + old_info->ie_referenced++; if (old_info->ie_value->length == value_len && 0 == strcmp(old_info->ie_value->string, value)) { return OPAL_SUCCESS; @@ -166,129 +170,12 @@ static int opal_info_set_nolock(opal_info_t *info, const char *key, const char * OBJ_RELEASE(new_info); return OPAL_ERR_OUT_OF_RESOURCE; } + new_info->ie_referenced++; opal_list_append(&(info->super), (opal_list_item_t *) new_info); } return OPAL_SUCCESS; } -/* - * An object's info can be set, but those settings can be modified by - * system callbacks. When those callbacks happen, we save a "__IN_"/"val" - * copy of changed or erased values. - * - * extra options for how to dup: - * include_system_extras (default 1) - * omit_ignored (default 1) - * show_modifications (default 0) - */ -static int opal_info_dup_mode(opal_info_t *info, opal_info_t **newinfo, - int include_system_extras, // (k/v with no corresponding __IN_k) - int omit_ignored, // (__IN_k with no k/v) - int show_modifications) // (pick v from k/v or __IN_k/v) -{ - int err, flag; - opal_info_entry_t *iterator; - - const char *pkey; - int is_IN_key; - int exists_IN_key, exists_reg_key; - - OPAL_THREAD_LOCK(info->i_lock); - OPAL_LIST_FOREACH (iterator, &info->super, opal_info_entry_t) { - // If we see an __IN_ key but no , decide what to do based on mode. - // If we see an __IN_ and a , skip since it'll be handled when - // we process . - is_IN_key = 0; - exists_IN_key = 0; - exists_reg_key = 0; - pkey = iterator->ie_key->string; - opal_cstring_t *savedval = NULL; - opal_cstring_t *valstr = NULL; - if (0 - == strncmp(iterator->ie_key->string, OPAL_INFO_SAVE_PREFIX, - strlen(OPAL_INFO_SAVE_PREFIX))) { - pkey += strlen(OPAL_INFO_SAVE_PREFIX); - - is_IN_key = 1; - exists_IN_key = 1; - opal_info_get_nolock(info, pkey, NULL, &flag); - if (flag) { - exists_reg_key = 1; - } - } else { - is_IN_key = 0; - exists_reg_key = 1; - - // see if there is an __IN_ for the current - if (strlen(OPAL_INFO_SAVE_PREFIX) + strlen(pkey) < OPAL_MAX_INFO_KEY) { - char savedkey[OPAL_MAX_INFO_KEY + 1]; // iterator->ie_key has this as its size - snprintf(savedkey, OPAL_MAX_INFO_KEY + 1, OPAL_INFO_SAVE_PREFIX "%s", pkey); - // (the prefix macro is a string, so the unreadable part above is a string - // concatenation) - opal_info_get_nolock(info, savedkey, &savedval, &flag); - // release savedval, it remains valid as long we're holding the lock - OBJ_RELEASE(savedval); - exists_IN_key = 1; - } else { - flag = 0; - } - } - - if (is_IN_key) { - if (exists_reg_key) { - // we're processing __IN_ and there exists a so we'll handle it then - continue; - } else { - // we're processing __IN_ and no exists - // this would mean was set by the user but ignored by the system - // so base our behavior on the omit_ignored - if (!omit_ignored) { - err = opal_info_set_cstring_nolock(*newinfo, pkey, iterator->ie_value); - if (OPAL_SUCCESS != err) { - OPAL_THREAD_UNLOCK(info->i_lock); - return err; - } - } - } - } else { - if (!exists_IN_key) { - // we're processing and no __IN_ exists - // this would mean it's a system setting, not something that came from the user - if (include_system_extras) { - valstr = iterator->ie_value; - } - } else { - // we're processing and __IN_ also exists - // pick which value to use - if (!show_modifications) { - valstr = savedval; - } else { - valstr = iterator->ie_value; - } - } - if (NULL != valstr) { - err = opal_info_set_cstring_nolock(*newinfo, pkey, valstr); - /* NOTE: we have not retained valstr so don't release here after using it */ - if (OPAL_SUCCESS != err) { - OPAL_THREAD_UNLOCK(info->i_lock); - return err; - } - } - } - } - OPAL_THREAD_UNLOCK(info->i_lock); - return OPAL_SUCCESS; -} - -/* - * Implement opal_info_dup_mpistandard by using whatever mode - * settings represent our interpretation of the standard - */ -int opal_info_dup_mpistandard(opal_info_t *info, opal_info_t **newinfo) -{ - return opal_info_dup_mode(info, newinfo, 1, 1, 0); -} - /* * Set a value on the info */ @@ -484,6 +371,7 @@ static void info_entry_constructor(opal_info_entry_t *entry) { entry->ie_key = NULL; entry->ie_value = NULL; + entry->ie_referenced = 0; } static void info_entry_destructor(opal_info_entry_t *entry) @@ -522,3 +410,52 @@ static opal_info_entry_t *info_find_key(opal_info_t *info, const char *key) } return NULL; } + +/** + * Mark the entry \c key as referenced. + */ +int opal_info_mark_referenced(opal_info_t *info, const char *key) +{ + opal_info_entry_t *entry; + + OPAL_THREAD_LOCK(info->i_lock); + entry = info_find_key(info, key); + entry->ie_referenced++; + OPAL_THREAD_UNLOCK(info->i_lock); + + return OPAL_SUCCESS; +} + +/** + * Remove a reference from the entry \c key. + */ +int opal_info_unmark_referenced(opal_info_t *info, const char *key) +{ + opal_info_entry_t *entry; + + OPAL_THREAD_LOCK(info->i_lock); + entry = info_find_key(info, key); + entry->ie_referenced--; + OPAL_THREAD_UNLOCK(info->i_lock); + + return OPAL_SUCCESS; +} + +/** + * Remove any entries that are not marked as referenced + */ +int opal_info_remove_unreferenced(opal_info_t *info) +{ + opal_info_entry_t *iterator, *next; + /* iterate over all entries and remove the ones that are not referenced */ + OPAL_THREAD_LOCK(info->i_lock); + OPAL_LIST_FOREACH_SAFE (iterator, next, &info->super, opal_info_entry_t) { + if (!iterator->ie_referenced) { + opal_list_remove_item(&info->super, &iterator->super); + } + } + OPAL_THREAD_UNLOCK(info->i_lock); + + + return OPAL_SUCCESS; +} diff --git a/opal/util/info.h b/opal/util/info.h index 23c25984e41..5e7de286c5d 100644 --- a/opal/util/info.h +++ b/opal/util/info.h @@ -65,6 +65,8 @@ struct opal_info_entry_t { opal_list_item_t super; /**< required for opal_list_t type */ opal_cstring_t *ie_value; /**< value part of the (key, value) pair. */ opal_cstring_t *ie_key; /**< "key" part of the (key, value) pair */ + uint32_t ie_referenced; /**< number of times this entry was internally + referenced */ }; /** @@ -87,8 +89,6 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_t); */ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_entry_t); -int opal_mpiinfo_init(void *); - /** * opal_info_dup - Duplicate an 'MPI_Info' object * @@ -105,40 +105,8 @@ int opal_mpiinfo_init(void *); */ int opal_info_dup(opal_info_t *info, opal_info_t **newinfo); -// Comments might still say __IN_, but the code should be using the -// below macro instead. -#define OPAL_INFO_SAVE_PREFIX "_OMPI_IN_" - /** - * opal_info_dup_mpistandard - Duplicate an 'MPI_Info' object - * - * @param info source info object (handle) - * @param newinfo pointer to the new info object (handle) - * - * @retval OPAL_SUCCESS upon success - * @retval OPAL_ERR_OUT_OF_RESOURCE if out of memory - * - * The user sets an info object with key/value pairs and once processed, - * we keep key/val pairs that might have been modified vs what the user - * provided, and some user inputs might have been ignored too. The original - * user inpust are kept as __IN_/. - * - * This routine then outputs key/value pairs as: - * - * if and __IN_ both exist: - * This means the user set a k/v pair and it was used. - * output: / value(__IN_), the original user input - * if exists but __IN_ doesn't: - * This is a system-provided setting. - * output: /value() - * if __IN_ exists but doesn't: - * The user provided a setting that was rejected (ignored) by the system - * output: nothing for this key - */ -int opal_info_dup_mpistandard(opal_info_t *info, opal_info_t **newinfo); - -/** - * Set a new key,value pair on info. + * Set a new key,value pair on info and mark it as referenced. * * @param info pointer to opal_info_t object * @param key pointer to the new key object @@ -197,6 +165,8 @@ int opal_info_free(opal_info_t **info); * Get a (key, value) pair from an 'MPI_Info' object and assign it * into a boolen output. * + * This call marks the entry referenced. + * * @param info Pointer to opal_info_t object * @param key null-terminated character string of the index key * @param value Boolean output value @@ -239,7 +209,8 @@ OPAL_DECLSPEC int opal_info_get_value_enum(opal_info_t *info, const char *key, i int *flag); /** - * Get a (key, value) pair from an 'MPI_Info' object + * Get a (key, value) pair from an 'MPI_Info' object and mark the entry + * as referenced. * * @param info Pointer to opal_info_t object * @param key null-terminated character string of the index key @@ -316,6 +287,43 @@ static inline int opal_info_get_nkeys(opal_info_t *info, int *nkeys) return OPAL_SUCCESS; } + +/** + * Mark the entry \c key as referenced. + * + * This function is useful for lazily initialized components + * that do not read the key immediately but want to make sure + * the key is kept by the object owning the info key. + * + * @param info Pointer to opal_info_t object. + * @param key The key which to mark as referenced. + * + * @retval OPAL_SUCCESS + */ +int opal_info_mark_referenced(opal_info_t *info, const char *key); + +/** + * Remove a reference from the entry \c key. + * + * This function should be used by components reading the key + * without wanting to retain it in the object owning the info. + * + * @param info Pointer to opal_info_t object. + * @param key The key which to unmark as referenced. + * + * @retval OPAL_SUCCESS + */ +int opal_info_unmark_referenced(opal_info_t *info, const char *key); + +/** + * Remove any entries that are not marked as referenced + * + * @param info Pointer to opal_info_t object. + * + * @retval OPAL_SUCCESS + */ +int opal_info_remove_unreferenced(opal_info_t *info); + END_C_DECLS #endif /* OPAL_INFO_H */ diff --git a/opal/util/info_subscriber.c b/opal/util/info_subscriber.c index eb30ec9175c..68dc7ef0871 100644 --- a/opal/util/info_subscriber.c +++ b/opal/util/info_subscriber.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -156,6 +156,7 @@ static int ntesting_callbacks = 0; static opal_key_interest_callback_t *testing_callbacks[5]; static char *testing_keys[5]; static char *testing_initialvals[5]; + // User-level call, user adds their own callback function to be subscribed // to every object: int opal_infosubscribe_testcallback(opal_key_interest_callback_t *callback, char *key, char *val); @@ -244,47 +245,8 @@ int opal_infosubscribe_testregister(opal_infosubscriber_t *object) return OPAL_SUCCESS; } -// This routine is to be used after making a callback for a -// key/val pair. The callback would have ggiven a new value to associate -// with , and this function saves the previous value under -// __IN_. -// -// The last argument indicates whether to overwrite a previous -// __IN_ or not. -static int save_original_key_val(opal_info_t *info, const char *key, opal_cstring_t *val, - int overwrite) -{ - char modkey[OPAL_MAX_INFO_KEY]; - int flag, err; - - // Checking strlen, even though it should be unnecessary. - // This should only happen on predefined keys with short lengths. - if (strlen(key) + strlen(OPAL_INFO_SAVE_PREFIX) < OPAL_MAX_INFO_KEY) { - snprintf(modkey, OPAL_MAX_INFO_KEY, OPAL_INFO_SAVE_PREFIX "%s", key); - // (the prefix macro is a string, so the unreadable part above is a string concatenation) - flag = 0; - opal_info_get(info, modkey, 0, &flag); - if (!flag || overwrite) { - err = opal_info_set_cstring(info, modkey, val); - if (OPAL_SUCCESS != err) { - return err; - } - } -// FIXME: use whatever the Open MPI convention is for DEBUG options like this -// Even though I don't expect this codepath to happen, if it somehow DID happen -// in a real run with user-keys, I'd rather it be silent at that point rather -// being noisy and/or aborting. -#ifdef OMPI_DEBUG - } else { - printf("WARNING: Unexpected key length [%s]\n", key); -#endif - } - return OPAL_SUCCESS; -} - int opal_infosubscribe_change_info(opal_infosubscriber_t *object, opal_info_t *new_info) { - int err; opal_info_entry_t *iterator; const char *updated_value; @@ -297,6 +259,7 @@ int opal_infosubscribe_change_info(opal_infosubscriber_t *object, opal_info_t *n if (NULL != new_info) { OPAL_LIST_FOREACH (iterator, &new_info->super, opal_info_entry_t) { + int err = OPAL_SUCCESS; opal_cstring_t *value_str, *key_str; value_str = iterator->ie_value; OBJ_RETAIN(value_str); @@ -306,28 +269,15 @@ int opal_infosubscribe_change_info(opal_infosubscriber_t *object, opal_info_t *n updated_value = opal_infosubscribe_inform_subscribers(object, iterator->ie_key->string, iterator->ie_value->string, &found_callback); - if (updated_value) { + if (NULL != updated_value + && 0 != strncmp(updated_value, value_str->string, value_str->length)) { err = opal_info_set(object->s_info, iterator->ie_key->string, updated_value); - } else { - // This path would happen if there was no callback for this key, - // or if there was a callback and it returned null. One way the - // setting was unrecognized the other way it was recognized and ignored, - // either way it shouldn't be set, which we'll ensure with an unset - // in case a previous value exists. - err = opal_info_delete(object->s_info, iterator->ie_key->string); - err = OPAL_SUCCESS; // we don't care if the key was found or not } + OBJ_RELEASE(value_str); + OBJ_RELEASE(key_str); if (OPAL_SUCCESS != err) { - OBJ_RELEASE(value_str); - OBJ_RELEASE(key_str); return err; } - // Save the original at "__IN_":"original" - // And if multiple set-info calls happen, the last would be the most relevant - // to save, so overwrite a previously saved value if there is one. - save_original_key_val(object->s_info, key_str->string, value_str, 1); - OBJ_RELEASE(value_str); - OBJ_RELEASE(key_str); } } @@ -358,25 +308,6 @@ int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, const char *key, opal_list_t *list = NULL; opal_hash_table_t *table = &object->s_subscriber_table; opal_callback_list_item_t *callback_list_item; - size_t max_len = OPAL_MAX_INFO_KEY - strlen(OPAL_INFO_SAVE_PREFIX); - - if (strlen(key) > max_len) { - opal_output(0, - "DEVELOPER WARNING: Unexpected MPI info key length [%s]: " - "OMPI internal callback keys are limited to %" PRIsize_t " chars.", - key, max_len); -#if OPAL_ENABLE_DEBUG - opal_output(0, - "Aborting because this is a developer / debugging build. Go fix this error."); - // Do not assert() / dump core. Just exit un-gracefully. - exit(1); -#else - opal_output(0, - "The \"%s\" MPI info key almost certainly will not work properly. You should " - "inform an Open MPI developer about this.", - key); -#endif - } if (table) { opal_hash_table_get_value_ptr(table, key, strlen(key), (void **) &list); @@ -422,20 +353,11 @@ int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, const char *key, err = opal_info_delete(object->s_info, key); err = OPAL_SUCCESS; // we don't care if the key was found or not } + OBJ_RELEASE(val); + if (OPAL_SUCCESS != err) { - OBJ_RELEASE(val); return err; } - // - save the previous val under key __IN_* - // This function might be called separately for the same key multiple - // times (multiple modules might register an interest in the same key), - // so we only save __IN_ for the first. - // Note we're saving the first k/v regardless of whether it was the default - // or whether it came from info. This means system settings will show - // up if the user queries later with get_info. - save_original_key_val(object->s_info, key, val, 0); - - OBJ_RELEASE(val); } else { /* * TODO: This should not happen @@ -444,9 +366,3 @@ int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, const char *key, return OPAL_SUCCESS; } - -/* - OBJ_DESTRUCT(&opal_comm_info_hashtable); - OBJ_DESTRUCT(&opal_win_info_hashtable); - OBJ_DESTRUCT(&opal_file_info_hashtable); -*/ diff --git a/opal/util/malloc.h b/opal/util/malloc.h index 5ff79c57a32..1c777037a1a 100644 --- a/opal/util/malloc.h +++ b/opal/util/malloc.h @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2022 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -127,7 +128,7 @@ OPAL_DECLSPEC void *opal_realloc(void *ptr, size_t size, const char *file, int l * to configure (or by default if you're building in a SVN * checkout). */ -OPAL_DECLSPEC void opal_free(void *addr, const char *file, int line) __opal_attribute_nonnull__(1); +OPAL_DECLSPEC void opal_free(void *addr, const char *file, int line); /** * Used to set the debug level for malloc debug. diff --git a/opal/util/minmax.h b/opal/util/minmax.h index d483cd7adbc..5b7f3c768b8 100644 --- a/opal/util/minmax.h +++ b/opal/util/minmax.h @@ -16,6 +16,8 @@ #ifndef OPAL_MINMAX_H #define OPAL_MINMAX_H +#include "opal_stdint.h" + #define OPAL_DEFINE_MINMAX(type, suffix) \ static inline const type opal_min_##suffix(const type a, const type b) \ { \ diff --git a/opal/util/os_path.c b/opal/util/os_path.c index 6c87ca66635..6b67737dacb 100644 --- a/opal/util/os_path.c +++ b/opal/util/os_path.c @@ -88,15 +88,15 @@ char *opal_os_path(int relative, ...) va_start(ap, relative); if (NULL != (element = va_arg(ap, char *))) { if (path_sep[0] != element[0]) { - strncat(path, path_sep, total_length); + strncat(path, path_sep, total_length - strlen(path) - 1); } - strcat(path, element); + strncat(path, element, total_length - strlen(path) - 1); } while (NULL != (element = va_arg(ap, char *))) { if (path_sep[0] != element[0]) { - strncat(path, path_sep, total_length); + strncat(path, path_sep, total_length - strlen(path) - 1); } - strncat(path, element, total_length); + strncat(path, element, total_length - strlen(path) - 1); } va_end(ap); diff --git a/opal/util/output.c b/opal/util/output.c index b9ae8ae6bca..778a0dc826c 100644 --- a/opal/util/output.c +++ b/opal/util/output.c @@ -948,14 +948,20 @@ static int output(int output_id, const char *format, va_list arglist) /* stdout output */ if (ldi->ldi_stdout) { - write(fileno(stdout), out, (int) strlen(out)); + int tmp = opal_best_effort_write(fileno(stdout), out, strlen(out)); + if (OPAL_SUCCESS != tmp) { + rc = tmp; + } fflush(stdout); } /* stderr output */ if (ldi->ldi_stderr) { - write((-1 == default_stderr_fd) ? fileno(stderr) : default_stderr_fd, out, - (int) strlen(out)); + int tmp = opal_best_effort_write((-1 == default_stderr_fd) ? fileno(stderr) : default_stderr_fd, + out, strlen(out)); + if (OPAL_SUCCESS != tmp) { + rc = tmp; + } fflush(stderr); } @@ -970,17 +976,24 @@ static int output(int output_id, const char *format, va_list arglist) ++ldi->ldi_file_num_lines_lost; } else if (ldi->ldi_file_num_lines_lost > 0) { char buffer[BUFSIZ]; + int tmp; memset(buffer, 0, BUFSIZ); snprintf(buffer, BUFSIZ - 1, "[WARNING: %d lines lost because the Open MPI process session " "directory did\n not exist when opal_output() was invoked]\n", ldi->ldi_file_num_lines_lost); - write(ldi->ldi_fd, buffer, (int) strlen(buffer)); + tmp = opal_best_effort_write(ldi->ldi_fd, buffer, strlen(buffer)); + if (OPAL_SUCCESS != tmp) { + rc = tmp; + } ldi->ldi_file_num_lines_lost = 0; } } if (ldi->ldi_fd != -1) { - write(ldi->ldi_fd, out, (int) strlen(out)); + int tmp = opal_best_effort_write(ldi->ldi_fd, out, strlen(out)); + if (OPAL_SUCCESS != tmp) { + rc = tmp; + } } } OPAL_THREAD_UNLOCK(&mutex); diff --git a/opal/util/output.h b/opal/util/output.h index 109ee791b28..4659240d9b5 100644 --- a/opal/util/output.h +++ b/opal/util/output.h @@ -71,7 +71,10 @@ #include "opal_config.h" #include +#include +#include +#include "opal/constants.h" #include "opal/class/opal_object.h" BEGIN_C_DECLS @@ -552,6 +555,30 @@ OPAL_DECLSPEC void opal_output_set_output_file_info(const char *dir, const char */ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_output_stream_t); +/** + * Best effort write for blocking file descriptors + * + * A wrapper around write() that will spin trying to write the entire + * buffer until either an error occurs or a write is not able to write + * any data. Useful for situations where opal_output cannot be used + * for signal reasons. Do not use with non-blocking file descriptors. + */ +static inline int opal_best_effort_write(int fd, const void *buf, size_t count) +{ + size_t written = 0; + + while (written != count) { + ssize_t tmp = write(fd, (char *)buf + written, count - written); + /* we explicitly do not recover from errors in this wrapper */ + if ((tmp < 0 && errno != EINTR) || (tmp == 0)) { + return OPAL_ERROR; + } + written += tmp; + } + + return OPAL_SUCCESS; +} + END_C_DECLS #endif /* OPAL_OUTPUT_H_ */ diff --git a/opal/util/path.c b/opal/util/path.c index c937eea1407..44e921d2b9b 100644 --- a/opal/util/path.c +++ b/opal/util/path.c @@ -393,10 +393,15 @@ char *opal_find_absolute_path(char *app_name) if (NULL != abs_app_name) { char *resolved_path = (char *) malloc(OPAL_PATH_MAX); - realpath(abs_app_name, resolved_path); + char *ret; + ret = realpath(abs_app_name, resolved_path); if (abs_app_name != app_name) { free(abs_app_name); } + if (NULL == ret) { + free(resolved_path); + resolved_path = NULL; + } return resolved_path; } return NULL; diff --git a/opal/util/proc.h b/opal/util/proc.h index 433735e50f3..ad3282ae654 100644 --- a/opal/util/proc.h +++ b/opal/util/proc.h @@ -115,8 +115,9 @@ typedef struct opal_process_info_t { uint32_t num_local_peers; /**< number of procs from my job that share my node with me */ uint16_t my_local_rank; /**< local rank on this node within my job */ uint16_t my_node_rank; - char *cpuset; /**< String-representation of bitmap where we are bound */ - char *locality; /**< String-representation of process locality */ + uint16_t my_numa_rank; /**< rank on this processes NUMA node. A value of UINT16_MAX indicates unavailable numa_rank */ + char *cpuset; /**< String-representation of bitmap where we are bound */ + char *locality; /**< String-representation of process locality */ pid_t pid; uint32_t num_procs; uint32_t app_num; diff --git a/opal/util/stacktrace.c b/opal/util/stacktrace.c index f00e54ec343..5c3a342ab7c 100644 --- a/opal/util/stacktrace.c +++ b/opal/util/stacktrace.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2022 Cisco Systems, Inc. All rights reserved * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2019 Triad National Security, LLC. All rights @@ -146,7 +146,7 @@ static void show_stackframe(int signo, siginfo_t *info, void *p) memset(print_buffer, 0, sizeof(print_buffer)); ret = snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT "*** Process received signal ***\n", stacktrace_hostname, getpid()); - write(opal_stacktrace_output_fileno, print_buffer, ret); + opal_best_effort_write(opal_stacktrace_output_fileno, print_buffer, ret); memset(print_buffer, 0, sizeof(print_buffer)); @@ -467,13 +467,14 @@ static void show_stackframe(int signo, siginfo_t *info, void *p) } /* write out the signal information generated above */ - write(opal_stacktrace_output_fileno, print_buffer, sizeof(print_buffer) - size); + opal_best_effort_write(opal_stacktrace_output_fileno, print_buffer, sizeof(print_buffer) - size); /* print out the stack trace */ snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT, stacktrace_hostname, getpid()); ret = opal_backtrace_print(NULL, print_buffer, 2); if (OPAL_SUCCESS != ret) { - write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg)); + opal_best_effort_write(opal_stacktrace_output_fileno, unable_to_print_msg, + strlen(unable_to_print_msg)); } /* write out the footer information */ @@ -481,9 +482,10 @@ static void show_stackframe(int signo, siginfo_t *info, void *p) ret = snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT "*** End of error message ***\n", stacktrace_hostname, getpid()); if (ret > 0) { - write(opal_stacktrace_output_fileno, print_buffer, ret); + opal_best_effort_write(opal_stacktrace_output_fileno, print_buffer, ret); } else { - write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg)); + opal_best_effort_write(opal_stacktrace_output_fileno, unable_to_print_msg, + strlen(unable_to_print_msg)); } if (fileno(stdout) != opal_stacktrace_output_fileno @@ -640,7 +642,6 @@ int opal_util_register_stackhandlers(void) set_stacktrace_filename(); opal_stacktrace_output_fileno = -1; } else if (0 == strncasecmp(opal_stacktrace_output_filename, "file:", 5)) { - char *filename_cpy = NULL; next = strchr(opal_stacktrace_output_filename, ':'); next++; // move past the ':' to the filename specified @@ -654,8 +655,6 @@ int opal_util_register_stackhandlers(void) sizeof(char) * opal_stacktrace_output_filename_max_len); set_stacktrace_filename(); opal_stacktrace_output_fileno = -1; - - free(filename_cpy); } else { opal_stacktrace_output_fileno = fileno(stderr); } diff --git a/oshmem/Makefile.am b/oshmem/Makefile.am index 69a2a747182..da59aadba39 100644 --- a/oshmem/Makefile.am +++ b/oshmem/Makefile.am @@ -6,6 +6,8 @@ # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2021 Amazon.com, Inc. or its affiliates. +# All Rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -13,24 +15,11 @@ # $HEADER$ # -# Do we have profiling? -if OSHMEM_PROFILING -c_pshmem_lib = shmem/c/profile/liboshmem_c_pshmem.la -else -c_pshmem_lib = -endif - # Do we have the Fortran bindings? if OSHMEM_BUILD_FORTRAN_BINDINGS fortran_oshmem_lib = shmem/fortran/liboshmem_fortran.la - -if OSHMEM_PROFILING -fortran_pshmem_lib = shmem/fortran/profile/liboshmem_fortran_pshmem.la -endif - else fortran_oshmem_lib = -fortran_pshmem_lib = endif SUBDIRS = \ @@ -63,9 +52,7 @@ endif liboshmem_la_SOURCES = liboshmem_la_LIBADD = \ shmem/c/liboshmem_c.la \ - $(c_pshmem_lib) \ $(fortran_oshmem_lib) \ - $(fortran_pshmem_lib) \ $(MCA_oshmem_FRAMEWORK_LIBS) \ $(OSHMEM_TOP_BUILDDIR)/ompi/lib@OMPI_LIBMPI_NAME@.la liboshmem_la_DEPENDENCIES = $(liboshmem_la_LIBADD) diff --git a/oshmem/include/oshmem/constants.h b/oshmem/include/oshmem/constants.h index a77849a6b55..cecb5c3a302 100644 --- a/oshmem/include/oshmem/constants.h +++ b/oshmem/include/oshmem/constants.h @@ -80,6 +80,7 @@ enum { SHMEM_NULL = 0, SHMEM_CHAR, SHMEM_UCHAR, + SHMEM_SCHAR, SHMEM_SHORT, SHMEM_USHORT, SHMEM_INT, @@ -87,12 +88,23 @@ enum { SHMEM_LONG, SHMEM_ULONG, SHMEM_LLONG, + SHMEM_BYTE, + SHMEM_INT8_T, + SHMEM_INT16_T, SHMEM_INT32_T, SHMEM_INT64_T, + SHMEM_UINT8_T, + SHMEM_UINT16_T, + SHMEM_UINT32_T, + SHMEM_UINT64_T, + SHMEM_SIZE_T, + SHMEM_PTRDIFF_T, SHMEM_ULLONG, SHMEM_FLOAT, SHMEM_DOUBLE, SHMEM_LDOUBLE, + SHMEM_COMPLEXD, + SHMEM_COMPLEXF, SHMEM_FINT, SHMEM_FINT4, diff --git a/oshmem/include/pshmem.h b/oshmem/include/pshmem.h index cc51b453477..08177719d70 100644 --- a/oshmem/include/pshmem.h +++ b/oshmem/include/pshmem.h @@ -57,8 +57,10 @@ OSHMEM_DECLSPEC void* pshmem_malloc(size_t size); OSHMEM_DECLSPEC void* pshmem_calloc(size_t count, size_t size); OSHMEM_DECLSPEC void* pshmem_align(size_t align, size_t size); OSHMEM_DECLSPEC void* pshmem_realloc(void *ptr, size_t size); +OSHMEM_DECLSPEC void* pshmem_malloc_with_hints(size_t size, long hints); OSHMEM_DECLSPEC void pshmem_free(void* ptr); + /* * Remote pointer operations */ @@ -70,6 +72,560 @@ OSHMEM_DECLSPEC void *pshmem_ptr(const void *ptr, int pe); OSHMEM_DECLSPEC int pshmem_ctx_create(long options, shmem_ctx_t *ctx); OSHMEM_DECLSPEC void pshmem_ctx_destroy(shmem_ctx_t ctx); +/* + * Team management operations + */ +OSHMEM_DECLSPEC int pshmem_team_my_pe(shmem_team_t team); +OSHMEM_DECLSPEC int pshmem_team_n_pes(shmem_team_t team); +OSHMEM_DECLSPEC int pshmem_team_get_config(shmem_team_t team, long config_mask, shmem_team_config_t *config); +OSHMEM_DECLSPEC int pshmem_team_translate_pe(shmem_team_t src_team, int src_pe, shmem_team_t dest_team); +OSHMEM_DECLSPEC int pshmem_team_split_strided(shmem_team_t parent_team, int start, int stride, int size, const shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); +OSHMEM_DECLSPEC int pshmem_team_split_2d(shmem_team_t parent_team, int xrange, const shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t *xaxis_team, const shmem_team_config_t *yaxis_config, long yaxis_mask, shmem_team_t *yaxis_team); +OSHMEM_DECLSPEC void pshmem_team_destroy(shmem_team_t team); +OSHMEM_DECLSPEC int pshmem_ctx_get_team(shmem_ctx_t ctx, shmem_team_t *team); +OSHMEM_DECLSPEC int pshmem_team_create_ctx(shmem_team_t team, long options, shmem_ctx_t *ctx); + +/* + * Teams-based Collectives + */ + +/* Teams sync */ +OSHMEM_DECLSPEC void pshmem_team_sync(shmem_team_t team); + + +/* Teams alltoall */ +OSHMEM_DECLSPEC int pshmem_char_alltoall(shmem_team_t team, char *target, const char *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_short_alltoall(shmem_team_t team, short *target, const short *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int_alltoall(shmem_team_t team, int *target, const int *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_long_alltoall(shmem_team_t team, long *target, const long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_float_alltoall(shmem_team_t team, float *target, const float *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_double_alltoall(shmem_team_t team, double *target, const double *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_longlong_alltoall(shmem_team_t team, long long *target, const long long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_schar_alltoall(shmem_team_t team, signed char *target, const signed char *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uchar_alltoall(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ushort_alltoall(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint_alltoall(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ulong_alltoall(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ulonglong_alltoall(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_longdouble_alltoall(shmem_team_t team, long double *target, const long double *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int8_alltoall(shmem_team_t team, int8_t *target, const int8_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int16_alltoall(shmem_team_t team, int16_t *target, const int16_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int32_alltoall(shmem_team_t team, int32_t *target, const int32_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int64_alltoall(shmem_team_t team, int64_t *target, const int64_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint8_alltoall(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint16_alltoall(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint32_alltoall(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint64_alltoall(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_size_alltoall(shmem_team_t team, size_t *target, const size_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ptrdiff_alltoall(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nelems); +#if OSHMEM_HAVE_C11 +#define pshmem_alltoall(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_char_alltoall, \ + short*: pshmem_short_alltoall, \ + int*: pshmem_int_alltoall, \ + long*: pshmem_long_alltoall, \ + long long*: pshmem_longlong_alltoall, \ + signed char*: pshmem_schar_alltoall, \ + unsigned char*: pshmem_uchar_alltoall, \ + unsigned short*: pshmem_ushort_alltoall, \ + unsigned int*: pshmem_uint_alltoall, \ + unsigned long*: pshmem_ulong_alltoall, \ + unsigned long long*: pshmem_ulonglong_alltoall, \ + float*: pshmem_float_alltoall, \ + double*: pshmem_double_alltoall, \ + long double*: pshmem_longdouble_alltoall, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC int pshmem_alltoallmem(shmem_team_t team, void *target, const void *source, size_t nelems); + +/* Teams alltoalls */ +OSHMEM_DECLSPEC int pshmem_char_alltoalls(shmem_team_t team, char *target, const char *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_short_alltoalls(shmem_team_t team, short *target, const short *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int_alltoalls(shmem_team_t team, int *target, const int *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_long_alltoalls(shmem_team_t team, long *target, const long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_float_alltoalls(shmem_team_t team, float *target, const float *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_double_alltoalls(shmem_team_t team, double *target, const double *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_longlong_alltoalls(shmem_team_t team, long long *target, const long long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_schar_alltoalls(shmem_team_t team, signed char *target, const signed char *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uchar_alltoalls(shmem_team_t team, unsigned char *target, const unsigned char *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ushort_alltoalls(shmem_team_t team, unsigned short *target, const unsigned short *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint_alltoalls(shmem_team_t team, unsigned int *target, const unsigned int *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ulong_alltoalls(shmem_team_t team, unsigned long *target, const unsigned long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ulonglong_alltoalls(shmem_team_t team, unsigned long long *target, const unsigned long long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_longdouble_alltoalls(shmem_team_t team, long double *target, const long double *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int8_alltoalls(shmem_team_t team, int8_t *target, const int8_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int16_alltoalls(shmem_team_t team, int16_t *target, const int16_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int32_alltoalls(shmem_team_t team, int32_t *target, const int32_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int64_alltoalls(shmem_team_t team, int64_t *target, const int64_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint8_alltoalls(shmem_team_t team, uint8_t *target, const uint8_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint16_alltoalls(shmem_team_t team, uint16_t *target, const uint16_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint32_alltoalls(shmem_team_t team, uint32_t *target, const uint32_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint64_alltoalls(shmem_team_t team, uint64_t *target, const uint64_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_size_alltoalls(shmem_team_t team, size_t *target, const size_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ptrdiff_alltoalls(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +#if OSHMEM_HAVE_C11 +#define pshmem_alltoalls(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_char_alltoalls, \ + short*: pshmem_short_alltoalls, \ + int*: pshmem_int_alltoalls, \ + long*: pshmem_long_alltoalls, \ + long long*: pshmem_longlong_alltoalls, \ + signed char*: pshmem_schar_alltoalls, \ + unsigned char*: pshmem_uchar_alltoalls, \ + unsigned short*: pshmem_ushort_alltoalls, \ + unsigned int*: pshmem_uint_alltoalls, \ + unsigned long*: pshmem_ulong_alltoalls, \ + unsigned long long*: pshmem_ulonglong_alltoalls, \ + float*: pshmem_float_alltoalls, \ + double*: pshmem_double_alltoalls, \ + long double*: pshmem_longdouble_alltoalls, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC int pshmem_alltoallsmem(shmem_team_t team, void *target, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); + + +/* Teams broadcast */ +OSHMEM_DECLSPEC int pshmem_char_broadcast(shmem_team_t team, char *target, const char *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_short_broadcast(shmem_team_t team, short *target, const short *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_int_broadcast(shmem_team_t team, int *target, const int *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_long_broadcast(shmem_team_t team, long *target, const long *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_float_broadcast(shmem_team_t team, float *target, const float *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_double_broadcast(shmem_team_t team, double *target, const double *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_longlong_broadcast(shmem_team_t team, long long *target, const long long *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_schar_broadcast(shmem_team_t team, signed char *target, const signed char *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_uchar_broadcast(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_ushort_broadcast(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_uint_broadcast(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_ulong_broadcast(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_ulonglong_broadcast(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_longdouble_broadcast(shmem_team_t team, long double *target, const long double *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_int8_broadcast(shmem_team_t team, int8_t *target, const int8_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_int16_broadcast(shmem_team_t team, int16_t *target, const int16_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_int32_broadcast(shmem_team_t team, int32_t *target, const int32_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_int64_broadcast(shmem_team_t team, int64_t *target, const int64_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_uint8_broadcast(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_uint16_broadcast(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_uint32_broadcast(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_uint64_broadcast(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_size_broadcast(shmem_team_t team, size_t *target, const size_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int pshmem_ptrdiff_broadcast(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nelems, int PE_root); +#if OSHMEM_HAVE_C11 +#define pshmem_broadcast(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_char_broadcast, \ + short*: pshmem_short_broadcast, \ + int*: pshmem_int_broadcast, \ + long*: pshmem_long_broadcast, \ + long long*: pshmem_longlong_broadcast, \ + signed char*: pshmem_schar_broadcast, \ + unsigned char*: pshmem_uchar_broadcast, \ + unsigned short*: pshmem_ushort_broadcast, \ + unsigned int*: pshmem_uint_broadcast, \ + unsigned long*: pshmem_ulong_broadcast, \ + unsigned long long*: pshmem_ulonglong_broadcast, \ + float*: pshmem_float_broadcast, \ + double*: pshmem_double_broadcast, \ + long double*: pshmem_longdouble_broadcast, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC int pshmem_broadcastmem(shmem_team_t team, void *target, const void *source, size_t nelems, int PE_root); + +/* Teams collect */ +OSHMEM_DECLSPEC int pshmem_char_collect(shmem_team_t team, char *target, const char *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_short_collect(shmem_team_t team, short *target, const short *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int_collect(shmem_team_t team, int *target, const int *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_long_collect(shmem_team_t team, long *target, const long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_float_collect(shmem_team_t team, float *target, const float *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_double_collect(shmem_team_t team, double *target, const double *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_longlong_collect(shmem_team_t team, long long *target, const long long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_schar_collect(shmem_team_t team, signed char *target, const signed char *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uchar_collect(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ushort_collect(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint_collect(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ulong_collect(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ulonglong_collect(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_longdouble_collect(shmem_team_t team, long double *target, const long double *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int8_collect(shmem_team_t team, int8_t *target, const int8_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int16_collect(shmem_team_t team, int16_t *target, const int16_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int32_collect(shmem_team_t team, int32_t *target, const int32_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int64_collect(shmem_team_t team, int64_t *target, const int64_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint8_collect(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint16_collect(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint32_collect(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint64_collect(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_size_collect(shmem_team_t team, size_t *target, const size_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ptrdiff_collect(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nelems); +#if OSHMEM_HAVE_C11 +#define pshmem_collect(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_char_collect, \ + short*: pshmem_short_collect, \ + int*: pshmem_int_collect, \ + long*: pshmem_long_collect, \ + long long*: pshmem_longlong_collect, \ + signed char*: pshmem_schar_collect, \ + unsigned char*: pshmem_uchar_collect, \ + unsigned short*: pshmem_ushort_collect, \ + unsigned int*: pshmem_uint_collect, \ + unsigned long*: pshmem_ulong_collect, \ + unsigned long long*: pshmem_ulonglong_collect, \ + float*: pshmem_float_collect, \ + double*: pshmem_double_collect, \ + long double*: pshmem_longdouble_collect, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + + +OSHMEM_DECLSPEC int pshmem_collectmem(shmem_team_t team, void *target, const void *source, size_t nelems); + +/* Teams fcollect */ +OSHMEM_DECLSPEC int pshmem_char_fcollect(shmem_team_t team, char *target, const char *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_short_fcollect(shmem_team_t team, short *target, const short *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int_fcollect(shmem_team_t team, int *target, const int *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_long_fcollect(shmem_team_t team, long *target, const long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_float_fcollect(shmem_team_t team, float *target, const float *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_double_fcollect(shmem_team_t team, double *target, const double *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_longlong_fcollect(shmem_team_t team, long long *target, const long long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_schar_fcollect(shmem_team_t team, signed char *target, const signed char *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uchar_fcollect(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ushort_fcollect(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint_fcollect(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ulong_fcollect(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ulonglong_fcollect(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_longdouble_fcollect(shmem_team_t team, long double *target, const long double *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int8_fcollect(shmem_team_t team, int8_t *target, const int8_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int16_fcollect(shmem_team_t team, int16_t *target, const int16_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int32_fcollect(shmem_team_t team, int32_t *target, const int32_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_int64_fcollect(shmem_team_t team, int64_t *target, const int64_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint8_fcollect(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint16_fcollect(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint32_fcollect(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_uint64_fcollect(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_size_fcollect(shmem_team_t team, size_t *target, const size_t *source, size_t nelems); +OSHMEM_DECLSPEC int pshmem_ptrdiff_fcollect(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nelems); +#if OSHMEM_HAVE_C11 +#define pshmem_fcollect(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_char_fcollect, \ + short*: pshmem_short_fcollect, \ + int*: pshmem_int_fcollect, \ + long*: pshmem_long_fcollect, \ + long long*: pshmem_longlong_fcollect, \ + signed char*: pshmem_schar_fcollect, \ + unsigned char*: pshmem_uchar_fcollect, \ + unsigned short*: pshmem_ushort_fcollect, \ + unsigned int*: pshmem_uint_fcollect, \ + unsigned long*: pshmem_ulong_fcollect, \ + unsigned long long*: pshmem_ulonglong_fcollect, \ + float*: pshmem_float_fcollect, \ + double*: pshmem_double_fcollect, \ + long double*: pshmem_longdouble_fcollect, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC int pshmem_fcollectmem(shmem_team_t team, void *target, const void *source, size_t nelems); + + + +/* Teams reduction: AND */ +OSHMEM_DECLSPEC int pshmem_uchar_and_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ushort_and_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint_and_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulong_and_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulonglong_and_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int_and_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longlong_and_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int8_and_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int16_and_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int32_and_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int64_and_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint8_and_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint16_and_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint32_and_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint64_and_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_size_and_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define pshmem_and_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned char*: pshmem_uchar_and_reduce, \ + unsigned short*: pshmem_ushort_and_reduce, \ + unsigned int*: pshmem_uint_and_reduce, \ + unsigned long*: pshmem_ulong_and_reduce, \ + unsigned long long*: pshmem_ulonglong_and_reduce, \ + int8_t*: pshmem_int8_and_reduce, \ + int16_t*: pshmem_int16_and_reduce, \ + int32_t*: pshmem_int32_and_reduce, \ + int64_t*: pshmem_int64_and_reduce, \ + long long*: pshmem_longlong_and_reduce, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + + +/* Teams reduction: OR */ +OSHMEM_DECLSPEC int pshmem_uchar_or_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ushort_or_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint_or_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulong_or_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulonglong_or_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int_or_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longlong_or_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int8_or_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int16_or_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int32_or_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int64_or_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint8_or_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint16_or_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint32_or_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint64_or_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_size_or_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define pshmem_or_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned char*: pshmem_uchar_or_reduce, \ + unsigned short*: pshmem_ushort_or_reduce, \ + unsigned int*: pshmem_uint_or_reduce, \ + unsigned long*: pshmem_ulong_or_reduce, \ + unsigned long long*: pshmem_ulonglong_or_reduce, \ + int8_t*: pshmem_int8_or_reduce, \ + int16_t*: pshmem_int16_or_reduce, \ + int32_t*: pshmem_int32_or_reduce, \ + int64_t*: pshmem_int64_or_reduce, \ + long long*: pshmem_longlong_or_reduce, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + + + +/* Teams reduction: XOR */ +OSHMEM_DECLSPEC int pshmem_uchar_xor_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ushort_xor_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint_xor_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulong_xor_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulonglong_xor_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int_xor_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longlong_xor_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int8_xor_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int16_xor_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int32_xor_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int64_xor_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint8_xor_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint16_xor_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint32_xor_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint64_xor_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_size_xor_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define pshmem_xor_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned char*: pshmem_uchar_xor_reduce, \ + unsigned short*: pshmem_ushort_xor_reduce, \ + unsigned int*: pshmem_uint_xor_reduce, \ + unsigned long*: pshmem_ulong_xor_reduce, \ + unsigned long long*: pshmem_ulonglong_xor_reduce, \ + int8_t*: pshmem_int8_xor_reduce, \ + int16_t*: pshmem_int16_xor_reduce, \ + int32_t*: pshmem_int32_xor_reduce, \ + int64_t*: pshmem_int64_xor_reduce, \ + long long*: pshmem_longlong_xor_reduce, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + +/* Teams reduction: MAX */ +OSHMEM_DECLSPEC int pshmem_char_max_reduce(shmem_team_t team, char *target, const char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_short_max_reduce(shmem_team_t team, short *target, const short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int_max_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_long_max_reduce(shmem_team_t team, long *target, const long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_float_max_reduce(shmem_team_t team, float *target, const float *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_double_max_reduce(shmem_team_t team, double *target, const double *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longlong_max_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_schar_max_reduce(shmem_team_t team, signed char *target, const signed char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uchar_max_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ushort_max_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint_max_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulong_max_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulonglong_max_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longdouble_max_reduce(shmem_team_t team, long double *target, const long double *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int8_max_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int16_max_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int32_max_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int64_max_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint8_max_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint16_max_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint32_max_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint64_max_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_size_max_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ptrdiff_max_reduce(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define pshmem_max_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_char_max_reduce, \ + short*: pshmem_short_max_reduce, \ + int*: pshmem_int_max_reduce, \ + long*: pshmem_long_max_reduce, \ + long long*: pshmem_longlong_max_reduce, \ + signed char*: pshmem_schar_max_reduce, \ + unsigned char*: pshmem_uchar_max_reduce, \ + unsigned short*: pshmem_ushort_max_reduce, \ + unsigned int*: pshmem_uint_max_reduce, \ + unsigned long*: pshmem_ulong_max_reduce, \ + unsigned long long*: pshmem_ulonglong_max_reduce, \ + float*: pshmem_float_max_reduce, \ + double*: pshmem_double_max_reduce, \ + long double*: pshmem_longdouble_max_reduce, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + + +/* Teams reduction: MIN */ +OSHMEM_DECLSPEC int pshmem_char_min_reduce(shmem_team_t team, char *target, const char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_short_min_reduce(shmem_team_t team, short *target, const short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int_min_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_long_min_reduce(shmem_team_t team, long *target, const long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_float_min_reduce(shmem_team_t team, float *target, const float *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_double_min_reduce(shmem_team_t team, double *target, const double *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longlong_min_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_schar_min_reduce(shmem_team_t team, signed char *target, const signed char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uchar_min_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ushort_min_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint_min_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulong_min_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulonglong_min_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longdouble_min_reduce(shmem_team_t team, long double *target, const long double *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int8_min_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int16_min_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int32_min_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int64_min_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint8_min_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint16_min_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint32_min_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint64_min_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_size_min_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ptrdiff_min_reduce(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define pshmem_min_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_char_min_reduce, \ + short*: pshmem_short_min_reduce, \ + int*: pshmem_int_min_reduce, \ + long*: pshmem_long_min_reduce, \ + long long*: pshmem_longlong_min_reduce, \ + signed char*: pshmem_schar_min_reduce, \ + unsigned char*: pshmem_uchar_min_reduce, \ + unsigned short*: pshmem_ushort_min_reduce, \ + unsigned int*: pshmem_uint_min_reduce, \ + unsigned long*: pshmem_ulong_min_reduce, \ + unsigned long long*: pshmem_ulonglong_min_reduce, \ + float*: pshmem_float_min_reduce, \ + double*: pshmem_double_min_reduce, \ + long double*: pshmem_longdouble_min_reduce, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + + + +/* Teams reduction: SUM */ +OSHMEM_DECLSPEC int pshmem_char_sum_reduce(shmem_team_t team, char *target, const char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_short_sum_reduce(shmem_team_t team, short *target, const short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int_sum_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_long_sum_reduce(shmem_team_t team, long *target, const long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_float_sum_reduce(shmem_team_t team, float *target, const float *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_double_sum_reduce(shmem_team_t team, double *target, const double *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longlong_sum_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_schar_sum_reduce(shmem_team_t team, signed char *target, const signed char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uchar_sum_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ushort_sum_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint_sum_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulong_sum_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulonglong_sum_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longdouble_sum_reduce(shmem_team_t team, long double *target, const long double *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int8_sum_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int16_sum_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int32_sum_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int64_sum_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint8_sum_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint16_sum_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint32_sum_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint64_sum_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_size_sum_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ptrdiff_sum_reduce(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_complexd_sum_reduce(shmem_team_t team, OSHMEM_COMPLEX_TYPE(double) *target, const OSHMEM_COMPLEX_TYPE(double) *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_complexf_sum_reduce(shmem_team_t team, OSHMEM_COMPLEX_TYPE(float) *target, const OSHMEM_COMPLEX_TYPE(float) *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define pshmem_sum_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_char_sum_reduce, \ + short*: pshmem_short_sum_reduce, \ + int*: pshmem_int_sum_reduce, \ + long*: pshmem_long_sum_reduce, \ + long long*: pshmem_longlong_sum_reduce, \ + signed char*: pshmem_schar_sum_reduce, \ + unsigned char*: pshmem_uchar_sum_reduce, \ + unsigned short*: pshmem_ushort_sum_reduce, \ + unsigned int*: pshmem_uint_sum_reduce, \ + unsigned long*: pshmem_ulong_sum_reduce, \ + unsigned long long*: pshmem_ulonglong_sum_reduce, \ + float*: pshmem_float_sum_reduce, \ + double*: pshmem_double_sum_reduce, \ + long double*: pshmem_longdouble_sum_reduce, \ + OSHMEM_COMPLEX_TYPE(double)*: pshmem_complexd_sum_reduce, \ + OSHMEM_COMPLEX_TYPE(float)*: pshmem_complexf_sum_reduce, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + + +/* Teams reduction: PROD */ +OSHMEM_DECLSPEC int pshmem_char_prod_reduce(shmem_team_t team, char *target, const char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_short_prod_reduce(shmem_team_t team, short *target, const short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int_prod_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_long_prod_reduce(shmem_team_t team, long *target, const long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_float_prod_reduce(shmem_team_t team, float *target, const float *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_double_prod_reduce(shmem_team_t team, double *target, const double *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longlong_prod_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_schar_prod_reduce(shmem_team_t team, signed char *target, const signed char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uchar_prod_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ushort_prod_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint_prod_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulong_prod_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ulonglong_prod_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_longdouble_prod_reduce(shmem_team_t team, long double *target, const long double *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int8_prod_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int16_prod_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int32_prod_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_int64_prod_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint8_prod_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint16_prod_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint32_prod_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_uint64_prod_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_size_prod_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_ptrdiff_prod_reduce(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_complexd_prod_reduce(shmem_team_t team, OSHMEM_COMPLEX_TYPE(double) *target, const OSHMEM_COMPLEX_TYPE(double) *source, size_t nreduce); +OSHMEM_DECLSPEC int pshmem_complexf_prod_reduce(shmem_team_t team, OSHMEM_COMPLEX_TYPE(float) *target, const OSHMEM_COMPLEX_TYPE(float) *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define pshmem_prod_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_char_prod_reduce, \ + short*: pshmem_short_prod_reduce, \ + int*: pshmem_int_prod_reduce, \ + long*: pshmem_long_prod_reduce, \ + long long*: pshmem_longlong_prod_reduce, \ + signed char*: pshmem_schar_prod_reduce, \ + unsigned char*: pshmem_uchar_prod_reduce, \ + unsigned short*: pshmem_ushort_prod_reduce, \ + unsigned int*: pshmem_uint_prod_reduce, \ + unsigned long*: pshmem_ulong_prod_reduce, \ + unsigned long long*: pshmem_ulonglong_prod_reduce, \ + float*: pshmem_float_prod_reduce, \ + double*: pshmem_double_prod_reduce, \ + long double*: pshmem_longdouble_prod_reduce, \ + OSHMEM_COMPLEX_TYPE(double)*: pshmem_complexd_prod_reduce, \ + OSHMEM_COMPLEX_TYPE(float)*: pshmem_complexf_prod_reduce, \ + default: __opshmem_datatype_ignore)(__VA_ARGS__) +#endif + /* * Elemental put routines */ @@ -458,6 +1014,217 @@ OSHMEM_DECLSPEC void pshmem_put64_nbi(void *target, const void *source, size_t OSHMEM_DECLSPEC void pshmem_put128_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_putmem_nbi(void *target, const void *source, size_t len, int pe); + +/* + * Signaled put routines + */ +OSHMEM_DECLSPEC void pshmem_ctx_char_put_signal(shmem_ctx_t ctx, char *dest, const char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_put_signal(shmem_ctx_t ctx, short *dest, const short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_put_signal(shmem_ctx_t ctx, int *dest, const int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_put_signal(shmem_ctx_t ctx, long *dest, const long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_put_signal(shmem_ctx_t ctx, float *dest, const float *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_put_signal(shmem_ctx_t ctx, double *dest, const double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_put_signal(shmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_schar_put_signal(shmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uchar_put_signal(shmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ushort_put_signal(shmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_put_signal(shmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_put_signal(shmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_put_signal(shmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_put_signal(shmem_ctx_t ctx, long double *dest, const long double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int8_put_signal(shmem_ctx_t ctx, int8_t *dest, const int8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int16_put_signal(shmem_ctx_t ctx, int16_t *dest, const int16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_put_signal(shmem_ctx_t ctx, int32_t *dest, const int32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_put_signal(shmem_ctx_t ctx, int64_t *dest, const int64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint8_put_signal(shmem_ctx_t ctx, uint8_t *dest, const uint8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint16_put_signal(shmem_ctx_t ctx, uint16_t *dest, const uint16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_put_signal(shmem_ctx_t ctx, uint32_t *dest, const uint32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_put_signal(shmem_ctx_t ctx, uint64_t *dest, const uint64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_put_signal(shmem_ctx_t ctx, size_t *dest, const size_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_put_signal(shmem_ctx_t ctx, ptrdiff_t *dest, const ptrdiff_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void pshmem_char_put_signal(char *dest, const char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_short_put_signal(short *dest, const short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int_put_signal(int *dest, const int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_long_put_signal(long *dest, const long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_float_put_signal(float *dest, const float *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_double_put_signal(double *dest, const double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_put_signal(long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_schar_put_signal(signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uchar_put_signal(unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ushort_put_signal(unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint_put_signal(unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_put_signal(unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_put_signal(unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_longdouble_put_signal(long double *dest, const long double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int8_put_signal(int8_t *dest, const int8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int16_put_signal(int16_t *dest, const int16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int32_put_signal(int32_t *dest, const int32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int64_put_signal(int64_t *dest, const int64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint8_put_signal(uint8_t *dest, const uint8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint16_put_signal(uint16_t *dest, const uint16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_put_signal(uint32_t *dest, const uint32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_put_signal(uint64_t *dest, const uint64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_size_put_signal(size_t *dest, const size_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_put_signal(ptrdiff_t *dest, const ptrdiff_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_put_signal(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_ctx_char_put_signal, \ + short*: pshmem_ctx_short_put_signal, \ + int*: pshmem_ctx_int_put_signal, \ + long*: pshmem_ctx_long_put_signal, \ + long long*: pshmem_ctx_longlong_put_signal, \ + signed char*: pshmem_ctx_schar_put_signal, \ + unsigned char*: pshmem_ctx_uchar_put_signal, \ + unsigned short*: pshmem_ctx_ushort_put_signal, \ + unsigned int*: pshmem_ctx_uint_put_signal, \ + unsigned long*: pshmem_ctx_ulong_put_signal, \ + unsigned long long*: pshmem_ctx_ulonglong_put_signal, \ + float*: pshmem_ctx_float_put_signal, \ + double*: pshmem_ctx_double_put_signal, \ + long double*: pshmem_ctx_longdouble_put_signal, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_put_signal, \ + short*: pshmem_short_put_signal, \ + int*: pshmem_int_put_signal, \ + long*: pshmem_long_put_signal, \ + long long*: pshmem_longlong_put_signal, \ + signed char*: pshmem_schar_put_signal, \ + unsigned char*: pshmem_uchar_put_signal, \ + unsigned short*: pshmem_ushort_put_signal, \ + unsigned int*: pshmem_uint_put_signal, \ + unsigned long*: pshmem_ulong_put_signal, \ + unsigned long long*: pshmem_ulonglong_put_signal, \ + float*: pshmem_float_put_signal, \ + double*: pshmem_double_put_signal, \ + long double*: pshmem_longdouble_put_signal)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC void pshmem_put8_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_put16_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_put32_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_put64_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_put128_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void pshmem_ctx_put8_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put16_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put32_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put64_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put128_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void pshmem_putmem_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_putmem_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +/* + * Nonblocking signaled put routines + */ +OSHMEM_DECLSPEC void pshmem_ctx_char_put_signal_nbi(shmem_ctx_t ctx, char *dest, const char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_put_signal_nbi(shmem_ctx_t ctx, short *dest, const short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_put_signal_nbi(shmem_ctx_t ctx, int *dest, const int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_put_signal_nbi(shmem_ctx_t ctx, long *dest, const long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_put_signal_nbi(shmem_ctx_t ctx, float *dest, const float *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_put_signal_nbi(shmem_ctx_t ctx, double *dest, const double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_put_signal_nbi(shmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_schar_put_signal_nbi(shmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uchar_put_signal_nbi(shmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ushort_put_signal_nbi(shmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_put_signal_nbi(shmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_put_signal_nbi(shmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_put_signal_nbi(shmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_put_signal_nbi(shmem_ctx_t ctx, long double *dest, const long double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int8_put_signal_nbi(shmem_ctx_t ctx, int8_t *dest, const int8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int16_put_signal_nbi(shmem_ctx_t ctx, int16_t *dest, const int16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_put_signal_nbi(shmem_ctx_t ctx, int32_t *dest, const int32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_put_signal_nbi(shmem_ctx_t ctx, int64_t *dest, const int64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint8_put_signal_nbi(shmem_ctx_t ctx, uint8_t *dest, const uint8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint16_put_signal_nbi(shmem_ctx_t ctx, uint16_t *dest, const uint16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_put_signal_nbi(shmem_ctx_t ctx, uint32_t *dest, const uint32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_put_signal_nbi(shmem_ctx_t ctx, uint64_t *dest, const uint64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_put_signal_nbi(shmem_ctx_t ctx, size_t *dest, const size_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_put_signal_nbi(shmem_ctx_t ctx, ptrdiff_t *dest, const ptrdiff_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void pshmem_char_put_signal_nbi(char *dest, const char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_short_put_signal_nbi(short *dest, const short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int_put_signal_nbi(int *dest, const int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_long_put_signal_nbi(long *dest, const long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_float_put_signal_nbi(float *dest, const float *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_double_put_signal_nbi(double *dest, const double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_put_signal_nbi(long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_schar_put_signal_nbi(signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uchar_put_signal_nbi(unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ushort_put_signal_nbi(unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint_put_signal_nbi(unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_put_signal_nbi(unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_put_signal_nbi(unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_longdouble_put_signal_nbi(long double *dest, const long double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int8_put_signal_nbi(int8_t *dest, const int8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int16_put_signal_nbi(int16_t *dest, const int16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int32_put_signal_nbi(int32_t *dest, const int32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_int64_put_signal_nbi(int64_t *dest, const int64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint8_put_signal_nbi(uint8_t *dest, const uint8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint16_put_signal_nbi(uint16_t *dest, const uint16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_put_signal_nbi(uint32_t *dest, const uint32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_put_signal_nbi(uint64_t *dest, const uint64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_size_put_signal_nbi(size_t *dest, const size_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_put_signal_nbi(ptrdiff_t *dest, const ptrdiff_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_put_signal_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_ctx_char_put_signal_nbi, \ + short*: pshmem_ctx_short_put_signal_nbi, \ + int*: pshmem_ctx_int_put_signal_nbi, \ + long*: pshmem_ctx_long_put_signal_nbi, \ + long long*: pshmem_ctx_longlong_put_signal_nbi, \ + signed char*: pshmem_ctx_schar_put_signal_nbi, \ + unsigned char*: pshmem_ctx_uchar_put_signal_nbi, \ + unsigned short*: pshmem_ctx_ushort_put_signal_nbi, \ + unsigned int*: pshmem_ctx_uint_put_signal_nbi, \ + unsigned long*: pshmem_ctx_ulong_put_signal_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_put_signal_nbi, \ + float*: pshmem_ctx_float_put_signal_nbi, \ + double*: pshmem_ctx_double_put_signal_nbi, \ + long double*: pshmem_ctx_longdouble_put_signal_nbi, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_put_signal_nbi, \ + short*: pshmem_short_put_signal_nbi, \ + int*: pshmem_int_put_signal_nbi, \ + long*: pshmem_long_put_signal_nbi, \ + long long*: pshmem_longlong_put_signal_nbi, \ + signed char*: pshmem_schar_put_signal_nbi, \ + unsigned char*: pshmem_uchar_put_signal_nbi, \ + unsigned short*: pshmem_ushort_put_signal_nbi, \ + unsigned int*: pshmem_uint_put_signal_nbi, \ + unsigned long*: pshmem_ulong_put_signal_nbi, \ + unsigned long long*: pshmem_ulonglong_put_signal_nbi, \ + float*: pshmem_float_put_signal_nbi, \ + double*: pshmem_double_put_signal_nbi, \ + long double*: pshmem_longdouble_put_signal_nbi)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC void pshmem_put8_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_put16_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_put32_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_put64_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_put128_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void pshmem_ctx_put8_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put16_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put32_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put64_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put128_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void pshmem_putmem_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_putmem_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + + +OSHMEM_DECLSPEC uint64_t pshmem_signal_fetch(const uint64_t *sig_addr); + + /* * Elemental get routines */ @@ -858,6 +1625,12 @@ OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_swap(shmem_ctx_t ctx, unsi OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_swap(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC float pshmem_ctx_float_atomic_swap(shmem_ctx_t ctx, float *target, float value, int pe); OSHMEM_DECLSPEC double pshmem_ctx_double_atomic_swap(shmem_ctx_t ctx, double *target, double value, int pe); +OSHMEM_DECLSPEC int32_t pshmem_ctx_int32_atomic_swap(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_ctx_int64_atomic_swap(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_ctx_uint32_atomic_swap(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_ctx_uint64_atomic_swap(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t pshmem_ctx_size_atomic_swap(shmem_ctx_t ctx, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ctx_ptrdiff_atomic_swap(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t value, int pe); OSHMEM_DECLSPEC int pshmem_int_atomic_swap(int *target, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_swap(long *target, long value, int pe); @@ -867,6 +1640,13 @@ OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_swap(unsigned long *target, un OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_swap(unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC float pshmem_float_atomic_swap(float *target, float value, int pe); OSHMEM_DECLSPEC double pshmem_double_atomic_swap(double *target, double value, int pe); +OSHMEM_DECLSPEC int32_t pshmem_int32_atomic_swap( int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_int64_atomic_swap( int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_uint32_atomic_swap( uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_uint64_atomic_swap( uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t pshmem_size_atomic_swap( size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ptrdiff_atomic_swap( ptrdiff_t *target, ptrdiff_t value, int pe); + #if OSHMEM_HAVE_C11 #define pshmem_atomic_swap(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -914,6 +1694,12 @@ OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_set(shmem_ctx_t ctx, unsigned long OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_set(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_float_atomic_set(shmem_ctx_t ctx, float *target, float value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_double_atomic_set(shmem_ctx_t ctx, double *target, double value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_set(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_set(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_set(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_set(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_atomic_set(shmem_ctx_t ctx, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_atomic_set(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t value, int pe); OSHMEM_DECLSPEC void pshmem_int_atomic_set(int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_long_atomic_set(long *target, long value, int pe); @@ -923,6 +1709,13 @@ OSHMEM_DECLSPEC void pshmem_ulong_atomic_set(unsigned long *target, unsigned lon OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_set(unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void pshmem_float_atomic_set(float *target, float value, int pe); OSHMEM_DECLSPEC void pshmem_double_atomic_set(double *target, double value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_set(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_set(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_set(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_set(uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_size_atomic_set(size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_atomic_set(ptrdiff_t *target, ptrdiff_t value, int pe); + #if OSHMEM_HAVE_C11 #define pshmem_atomic_set(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -968,6 +1761,12 @@ OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_compare_swap(shmem_ctx_t ct OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_compare_swap(shmem_ctx_t ctx, unsigned int *target, unsigned int cond, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_compare_swap(shmem_ctx_t ctx, unsigned long *target, unsigned long cond, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_compare_swap(shmem_ctx_t ctx, unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t pshmem_ctx_int32_atomic_compare_swap(shmem_ctx_t ctx, int32_t *target, int32_t cond, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_ctx_int64_atomic_compare_swap(shmem_ctx_t ctx, int64_t *target, int64_t cond, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_ctx_uint32_atomic_compare_swap(shmem_ctx_t ctx, uint32_t *target, uint32_t cond, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_ctx_uint64_atomic_compare_swap(shmem_ctx_t ctx, uint64_t *target, uint64_t cond, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t pshmem_ctx_size_atomic_compare_swap(shmem_ctx_t ctx, size_t *target, size_t cond, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ctx_ptrdiff_atomic_compare_swap(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t cond, ptrdiff_t value, int pe); OSHMEM_DECLSPEC int pshmem_int_atomic_compare_swap(int *target, int cond, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_compare_swap(long *target, long cond, long value, int pe); @@ -975,6 +1774,12 @@ OSHMEM_DECLSPEC long long pshmem_longlong_atomic_compare_swap(long long *target, OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_compare_swap(unsigned int *target, unsigned int cond, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_compare_swap(unsigned long *target, unsigned long cond, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_compare_swap(unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t pshmem_int32_atomic_compare_swap( int32_t *target, int32_t cond, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_int64_atomic_compare_swap( int64_t *target, int64_t cond, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_uint32_atomic_compare_swap( uint32_t *target, uint32_t cond, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_uint64_atomic_compare_swap( uint64_t *target, uint64_t cond, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t pshmem_size_atomic_compare_swap( size_t *target, size_t cond, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ptrdiff_atomic_compare_swap( ptrdiff_t *target, ptrdiff_t cond, ptrdiff_t value, int pe); #if OSHMEM_HAVE_C11 #define pshmem_atomic_compare_swap(...) \ @@ -1013,6 +1818,12 @@ OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch_add(shmem_ctx_t ctx, OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_add(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_add(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_add(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t pshmem_ctx_int32_atomic_fetch_add(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_ctx_int64_atomic_fetch_add(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_ctx_uint32_atomic_fetch_add(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_ctx_uint64_atomic_fetch_add(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t pshmem_ctx_size_atomic_fetch_add(shmem_ctx_t ctx, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ctx_ptrdiff_atomic_fetch_add(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t value, int pe); OSHMEM_DECLSPEC int pshmem_int_atomic_fetch_add(int *target, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_fetch_add(long *target, long value, int pe); @@ -1020,6 +1831,13 @@ OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch_add(long long *target, lo OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_add(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_add(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_add(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t pshmem_int32_atomic_fetch_add(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_int64_atomic_fetch_add(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_uint32_atomic_fetch_add(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_uint64_atomic_fetch_add(uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t pshmem_size_atomic_fetch_add(size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ptrdiff_atomic_fetch_add(ptrdiff_t *target, ptrdiff_t value, int pe); + #if OSHMEM_HAVE_C11 #define pshmem_atomic_fetch_add(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1206,6 +2024,12 @@ OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch(shmem_ctx_t ctx, con OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch(shmem_ctx_t ctx, const unsigned long long *target, int pe); OSHMEM_DECLSPEC float pshmem_ctx_float_atomic_fetch(shmem_ctx_t ctx, const float *target, int pe); OSHMEM_DECLSPEC double pshmem_ctx_double_atomic_fetch(shmem_ctx_t ctx, const double *target, int pe); +OSHMEM_DECLSPEC int32_t pshmem_ctx_int32_atomic_fetch(shmem_ctx_t ctx, const int32_t *target, int pe); +OSHMEM_DECLSPEC int64_t pshmem_ctx_int64_atomic_fetch(shmem_ctx_t ctx, const int64_t *target, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_ctx_uint32_atomic_fetch(shmem_ctx_t ctx, const uint32_t *target, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_ctx_uint64_atomic_fetch(shmem_ctx_t ctx, const uint64_t *target, int pe); +OSHMEM_DECLSPEC size_t pshmem_ctx_size_atomic_fetch(shmem_ctx_t ctx, const size_t *target, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ctx_ptrdiff_atomic_fetch(shmem_ctx_t ctx, const ptrdiff_t *target, int pe); OSHMEM_DECLSPEC int pshmem_int_atomic_fetch(const int *target, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_fetch(const long *target, int pe); @@ -1215,6 +2039,12 @@ OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch(const unsigned long *tar OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch(const unsigned long long *target, int pe); OSHMEM_DECLSPEC float pshmem_float_atomic_fetch(const float *target, int pe); OSHMEM_DECLSPEC double pshmem_double_atomic_fetch(const double *target, int pe); +OSHMEM_DECLSPEC int32_t pshmem_int32_atomic_fetch(const int32_t *target, int pe); +OSHMEM_DECLSPEC int64_t pshmem_int64_atomic_fetch(const int64_t *target, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_uint32_atomic_fetch(const uint32_t *target, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_uint64_atomic_fetch(const uint64_t *target, int pe); +OSHMEM_DECLSPEC size_t pshmem_size_atomic_fetch(const size_t *target, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ptrdiff_atomic_fetch(const ptrdiff_t *target, int pe); #if OSHMEM_HAVE_C11 #define pshmem_atomic_fetch(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1260,6 +2090,12 @@ OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch_inc(shmem_ctx_t ctx, OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_inc(shmem_ctx_t ctx, unsigned int *target, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_inc(shmem_ctx_t ctx, unsigned long *target, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_inc(shmem_ctx_t ctx, unsigned long long *target, int pe); +OSHMEM_DECLSPEC int32_t pshmem_ctx_int32_atomic_fetch_inc(shmem_ctx_t ctx, int32_t *target, int pe); +OSHMEM_DECLSPEC int64_t pshmem_ctx_int64_atomic_fetch_inc(shmem_ctx_t ctx, int64_t *target, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_ctx_uint32_atomic_fetch_inc(shmem_ctx_t ctx, uint32_t *target, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_ctx_uint64_atomic_fetch_inc(shmem_ctx_t ctx, uint64_t *target, int pe); +OSHMEM_DECLSPEC size_t pshmem_ctx_size_atomic_fetch_inc(shmem_ctx_t ctx, size_t *target, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ctx_ptrdiff_atomic_fetch_inc(shmem_ctx_t ctx, ptrdiff_t *target, int pe); OSHMEM_DECLSPEC int pshmem_int_atomic_fetch_inc(int *target, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_fetch_inc(long *target, int pe); @@ -1267,6 +2103,13 @@ OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch_inc(long long *target, in OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_inc(unsigned int *target, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_inc(unsigned long *target, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_inc(unsigned long long *target, int pe); +OSHMEM_DECLSPEC int32_t pshmem_int32_atomic_fetch_inc(int32_t *target, int pe); +OSHMEM_DECLSPEC int64_t pshmem_int64_atomic_fetch_inc(int64_t *target, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_uint32_atomic_fetch_inc(uint32_t *target, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_uint64_atomic_fetch_inc(uint64_t *target, int pe); +OSHMEM_DECLSPEC size_t pshmem_size_atomic_fetch_inc(size_t *target, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ptrdiff_atomic_fetch_inc(ptrdiff_t *target, int pe); + #if OSHMEM_HAVE_C11 #define pshmem_atomic_fetch_inc(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1304,6 +2147,12 @@ OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_add(shmem_ctx_t ctx, long long * OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_add(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_add(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_add(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_add(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_add(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_add(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_add(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_atomic_add(shmem_ctx_t ctx, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_atomic_add(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t value, int pe); OSHMEM_DECLSPEC void pshmem_int_atomic_add(int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_long_atomic_add(long *target, long value, int pe); @@ -1311,6 +2160,13 @@ OSHMEM_DECLSPEC void pshmem_longlong_atomic_add(long long *target, long long val OSHMEM_DECLSPEC void pshmem_uint_atomic_add(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ulong_atomic_add(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_add(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_add(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_add(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_add(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_add(uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_size_atomic_add(size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_atomic_add(ptrdiff_t *target, ptrdiff_t value, int pe); + #if OSHMEM_HAVE_C11 #define pshmem_atomic_add(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1471,6 +2327,12 @@ OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_inc(shmem_ctx_t ctx, long long * OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_inc(shmem_ctx_t ctx, unsigned int *target, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_inc(shmem_ctx_t ctx, unsigned long *target, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_inc(shmem_ctx_t ctx, unsigned long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_inc(shmem_ctx_t ctx, int32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_inc(shmem_ctx_t ctx, int64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_inc(shmem_ctx_t ctx, uint32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_inc(shmem_ctx_t ctx, uint64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_atomic_inc(shmem_ctx_t ctx, size_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_atomic_inc(shmem_ctx_t ctx, ptrdiff_t *target, int pe); OSHMEM_DECLSPEC void pshmem_int_atomic_inc(int *target, int pe); OSHMEM_DECLSPEC void pshmem_long_atomic_inc(long *target, int pe); @@ -1478,6 +2340,13 @@ OSHMEM_DECLSPEC void pshmem_longlong_atomic_inc(long long *target, int pe); OSHMEM_DECLSPEC void pshmem_uint_atomic_inc(unsigned int *target, int pe); OSHMEM_DECLSPEC void pshmem_ulong_atomic_inc(unsigned long *target, int pe); OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_inc(unsigned long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_inc(int32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_inc(int64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_inc(uint32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_inc(uint64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_size_atomic_inc(size_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_atomic_inc(ptrdiff_t *target, int pe); + #if OSHMEM_HAVE_C11 #define pshmem_atomic_inc(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1508,6 +2377,377 @@ OSHMEM_DECLSPEC void pshmem_longlong_inc(long long *target, int pe); long long*: pshmem_longlong_inc)(dst, pe) #endif +/* + * Nonblocking atomic memory operations + */ + + +/* Atomic Nonblocking Fetch */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_fetch_nbi(shmem_ctx_t ctx, int *fetch, const int *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_fetch_nbi(shmem_ctx_t ctx, long *fetch, const long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_fetch_nbi(shmem_ctx_t ctx, long long *fetch, const long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_fetch_nbi(shmem_ctx_t ctx, unsigned int *fetch, const unsigned int *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_fetch_nbi(shmem_ctx_t ctx, unsigned long *fetch, const unsigned long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_fetch_nbi(shmem_ctx_t ctx, unsigned long long *fetch, const unsigned long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_atomic_fetch_nbi(shmem_ctx_t ctx, float *fetch, const float *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_atomic_fetch_nbi(shmem_ctx_t ctx, double *fetch, const double *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_fetch_nbi(shmem_ctx_t ctx, int32_t *fetch, const int32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_fetch_nbi(shmem_ctx_t ctx, int64_t *fetch, const int64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_fetch_nbi(shmem_ctx_t ctx, uint32_t *fetch, const uint32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_fetch_nbi(shmem_ctx_t ctx, uint64_t *fetch, const uint64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_atomic_fetch_nbi(shmem_ctx_t ctx, size_t *fetch, const size_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_atomic_fetch_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, const ptrdiff_t *target, int pe); + +OSHMEM_DECLSPEC void pshmem_int_atomic_fetch_nbi(int *fetch, const int *target, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_fetch_nbi(long *fetch, const long *target, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_fetch_nbi(long long *fetch, const long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_uint_atomic_fetch_nbi(unsigned int *fetch, const unsigned int *target, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_fetch_nbi(unsigned long *fetch, const unsigned long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_fetch_nbi(unsigned long long *fetch, const unsigned long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_float_atomic_fetch_nbi(float *fetch, const float *target, int pe); +OSHMEM_DECLSPEC void pshmem_double_atomic_fetch_nbi(double *fetch, const double *target, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_fetch_nbi(int32_t *fetch, const int32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_fetch_nbi(int64_t *fetch, const int64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_fetch_nbi(uint32_t *fetch, const uint32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_fetch_nbi(uint64_t *fetch, const uint64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_size_atomic_fetch_nbi(size_t *fetch, const size_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_atomic_fetch_nbi(ptrdiff_t *fetch, const ptrdiff_t *target, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_fetch_nbi, \ + long*: pshmem_ctx_long_atomic_fetch_nbi, \ + long long*: pshmem_ctx_longlong_atomic_fetch_nbi, \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_nbi, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_nbi, \ + float*: pshmem_ctx_float_atomic_fetch_nbi, \ + double*: pshmem_ctx_double_atomic_fetch_nbi, \ + size_t*: pshmem_ctx_size_atomic_fetch_nbi, \ + ptrdiff_t*: pshmem_ctx_ptrdiff_atomic_fetch_nbi, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_fetch_nbi, \ + long*: pshmem_long_atomic_fetch_nbi, \ + long long*: pshmem_longlong_atomic_fetch_nbi, \ + unsigned int*: pshmem_uint_atomic_fetch_nbi, \ + unsigned long*: pshmem_ulong_atomic_fetch_nbi, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_nbi, \ + float*: pshmem_float_atomic_fetch_nbi, \ + double*: pshmem_double_atomic_fetch_nbi, \ + size_t*: pshmem_size_atomic_fetch_nbi, \ + ptrdiff_t*: pshmem_ptrdiff_atomic_fetch_nbi)(__VA_ARGS__) +#endif + + +/* Atomic Nonblocking Compare and Swap */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_compare_swap_nbi(shmem_ctx_t ctx, int *fetch, int *target, int cond, int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_compare_swap_nbi(shmem_ctx_t ctx, long *fetch, long *target, long cond, long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_compare_swap_nbi(shmem_ctx_t ctx, long long *fetch, long long *target, long long cond, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_compare_swap_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int cond, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_compare_swap_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long cond, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_compare_swap_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_compare_swap_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t cond, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_compare_swap_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t cond, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_compare_swap_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t cond, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_compare_swap_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t cond, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_atomic_compare_swap_nbi(shmem_ctx_t ctx, size_t *fetch, size_t *target, size_t cond, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_atomic_compare_swap_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t cond, ptrdiff_t value, int pe); + +OSHMEM_DECLSPEC void pshmem_int_atomic_compare_swap_nbi(int *fetch, int *target, int cond, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_compare_swap_nbi(long *fetch, long *target, long cond, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_compare_swap_nbi(long long *fetch, long long *target, long long cond, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_uint_atomic_compare_swap_nbi(unsigned int *fetch, unsigned int *target, unsigned int cond, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_compare_swap_nbi(unsigned long *fetch, unsigned long *target, unsigned long cond, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_compare_swap_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_compare_swap_nbi(int32_t *fetch, int32_t *target, int32_t cond, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_compare_swap_nbi(int64_t *fetch, int64_t *target, int64_t cond, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_compare_swap_nbi(uint32_t *fetch, uint32_t *target, uint32_t cond, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_compare_swap_nbi(uint64_t *fetch, uint64_t *target, uint64_t cond, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_size_atomic_compare_swap_nbi(size_t *fetch, size_t *target, size_t cond, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_atomic_compare_swap_nbi(ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t cond, ptrdiff_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_compare_swap_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_compare_swap_nbi, \ + long*: pshmem_ctx_long_atomic_compare_swap_nbi, \ + long long*: pshmem_ctx_longlong_atomic_compare_swap_nbi, \ + unsigned int*: pshmem_ctx_uint_atomic_compare_swap_nbi, \ + unsigned long*: pshmem_ctx_ulong_atomic_compare_swap_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_compare_swap_nbi, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_compare_swap_nbi, \ + long*: pshmem_long_atomic_compare_swap_nbi, \ + long long*: pshmem_longlong_atomic_compare_swap_nbi, \ + unsigned int*: pshmem_uint_atomic_compare_swap_nbi, \ + unsigned long*: pshmem_ulong_atomic_compare_swap_nbi, \ + unsigned long long*: pshmem_ulonglong_atomic_compare_swap_nbi)(__VA_ARGS__) +#endif + +/* Atomic Nonblocking Swap */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_swap_nbi(shmem_ctx_t ctx, int *fetch, int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_swap_nbi(shmem_ctx_t ctx, long *fetch, long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_swap_nbi(shmem_ctx_t ctx, long long *fetch, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_swap_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_swap_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_swap_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_atomic_swap_nbi(shmem_ctx_t ctx, float *fetch, float *target, float value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_atomic_swap_nbi(shmem_ctx_t ctx, double *fetch, double *target, double value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_swap_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_swap_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_swap_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_swap_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_atomic_swap_nbi(shmem_ctx_t ctx, size_t *fetch, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_atomic_swap_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t value, int pe); + +OSHMEM_DECLSPEC void pshmem_int_atomic_swap_nbi(int *fetch, int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_swap_nbi(long *fetch, long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_swap_nbi(long long *fetch, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_uint_atomic_swap_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_swap_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_swap_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_float_atomic_swap_nbi(float *fetch, float *target, float value, int pe); +OSHMEM_DECLSPEC void pshmem_double_atomic_swap_nbi(double *fetch, double *target, double value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_swap_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_swap_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_swap_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_swap_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_size_atomic_swap_nbi(size_t *fetch, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_atomic_swap_nbi(ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_swap_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_swap_nbi, \ + long*: pshmem_ctx_long_atomic_swap_nbi, \ + long long*: pshmem_ctx_longlong_atomic_swap_nbi, \ + unsigned int*: pshmem_ctx_uint_atomic_swap_nbi, \ + unsigned long*: pshmem_ctx_ulong_atomic_swap_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_swap_nbi, \ + float*: pshmem_ctx_float_atomic_swap_nbi, \ + double*: pshmem_ctx_double_atomic_swap_nbi, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_swap_nbi, \ + long*: pshmem_long_atomic_swap_nbi, \ + long long*: pshmem_longlong_atomic_swap_nbi, \ + unsigned int*: pshmem_uint_atomic_swap_nbi, \ + unsigned long*: pshmem_ulong_atomic_swap_nbi, \ + unsigned long long*: pshmem_ulonglong_atomic_swap_nbi, \ + float*: pshmem_float_atomic_swap_nbi, \ + double*: pshmem_double_atomic_swap_nbi)(__VA_ARGS__) +#endif + + + +/* Atomic Nonblocking Fetch and Increment */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_fetch_inc_nbi(shmem_ctx_t ctx, int *fetch, int *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_fetch_inc_nbi(shmem_ctx_t ctx, long *fetch, long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_fetch_inc_nbi(shmem_ctx_t ctx, long long *fetch, long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_fetch_inc_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_fetch_inc_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_fetch_inc_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_fetch_inc_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_fetch_inc_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_fetch_inc_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_fetch_inc_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_atomic_fetch_inc_nbi(shmem_ctx_t ctx, size_t *fetch, size_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_atomic_fetch_inc_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, ptrdiff_t *target, int pe); + +OSHMEM_DECLSPEC void pshmem_int_atomic_fetch_inc_nbi(int *fetch, int *target, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_fetch_inc_nbi(long *fetch, long *target, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_fetch_inc_nbi(long long *fetch, long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_uint_atomic_fetch_inc_nbi(unsigned int *fetch, unsigned int *target, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_fetch_inc_nbi(unsigned long *fetch, unsigned long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_fetch_inc_nbi(unsigned long long *fetch, unsigned long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_fetch_inc_nbi(int32_t *fetch, int32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_fetch_inc_nbi(int64_t *fetch, int64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_fetch_inc_nbi(uint32_t *fetch, uint32_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_fetch_inc_nbi(uint64_t *fetch, uint64_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_size_atomic_fetch_inc_nbi(size_t *fetch, size_t *target, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_atomic_fetch_inc_nbi(ptrdiff_t *fetch, ptrdiff_t *target, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_inc_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_fetch_inc_nbi, \ + long*: pshmem_ctx_long_atomic_fetch_inc_nbi, \ + long long*: pshmem_ctx_longlong_atomic_fetch_inc_nbi, \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_inc_nbi, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_inc_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_inc_nbi, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_fetch_inc_nbi, \ + long*: pshmem_long_atomic_fetch_inc_nbi, \ + long long*: pshmem_longlong_atomic_fetch_inc_nbi, \ + unsigned int*: pshmem_uint_atomic_fetch_inc_nbi, \ + unsigned long*: pshmem_ulong_atomic_fetch_inc_nbi, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_inc_nbi)(__VA_ARGS__) +#endif + + +/* Atomic Nonblocking Fetch and Add */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_fetch_add_nbi(shmem_ctx_t ctx, int *fetch, int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_fetch_add_nbi(shmem_ctx_t ctx, long *fetch, long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_fetch_add_nbi(shmem_ctx_t ctx, long long *fetch, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_fetch_add_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_fetch_add_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_fetch_add_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_fetch_add_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_fetch_add_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_fetch_add_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_fetch_add_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_atomic_fetch_add_nbi(shmem_ctx_t ctx, size_t *fetch, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_atomic_fetch_add_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t value, int pe); + +OSHMEM_DECLSPEC void pshmem_int_atomic_fetch_add_nbi(int *fetch, int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_fetch_add_nbi(long *fetch, long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_fetch_add_nbi(long long *fetch, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_uint_atomic_fetch_add_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_fetch_add_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_fetch_add_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_fetch_add_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_fetch_add_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_fetch_add_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_fetch_add_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_size_atomic_fetch_add_nbi(size_t *fetch, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_atomic_fetch_add_nbi(ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_add_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_fetch_add_nbi, \ + long*: pshmem_ctx_long_atomic_fetch_add_nbi, \ + long long*: pshmem_ctx_longlong_atomic_fetch_add_nbi, \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_add_nbi, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_add_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_add_nbi, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_fetch_add_nbi, \ + long*: pshmem_long_atomic_fetch_add_nbi, \ + long long*: pshmem_longlong_atomic_fetch_add_nbi, \ + unsigned int*: pshmem_uint_atomic_fetch_add_nbi, \ + unsigned long*: pshmem_ulong_atomic_fetch_add_nbi, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_add_nbi)(__VA_ARGS__) +#endif + + +/* Atomic Nonblocking Fetch and And */ +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_fetch_and_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_fetch_and_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_fetch_and_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_fetch_and_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_fetch_and_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_fetch_and_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_fetch_and_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void pshmem_uint_atomic_fetch_and_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_fetch_and_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_fetch_and_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_fetch_and_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_fetch_and_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_fetch_and_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_fetch_and_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_and_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_and_nbi, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_and_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_and_nbi, \ + int32_t*: pshmem_ctx_int32_atomic_fetch_and_nbi, \ + int64_t*: pshmem_ctx_int64_atomic_fetch_and_nbi, \ + default: __opshmem_datatype_ignore), \ + unsigned int*: pshmem_uint_atomic_fetch_and_nbi, \ + unsigned long*: pshmem_ulong_atomic_fetch_and_nbi, \ + int32_t*: pshmem_int32_atomic_fetch_and_nbi, \ + int64_t*: pshmem_int64_atomic_fetch_and_nbi, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_and_nbi)(__VA_ARGS__) +#endif + +/* Atomic Nonblocking Fetch and OR */ +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_fetch_or_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_fetch_or_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_fetch_or_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_fetch_or_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_fetch_or_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_fetch_or_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_fetch_or_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void pshmem_uint_atomic_fetch_or_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_fetch_or_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_fetch_or_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_fetch_or_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_fetch_or_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_fetch_or_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_fetch_or_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_or_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_or_nbi, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_or_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_or_nbi, \ + int32_t*: pshmem_ctx_int32_atomic_fetch_or_nbi, \ + int64_t*: pshmem_ctx_int64_atomic_fetch_or_nbi, \ + default: __opshmem_datatype_ignore), \ + unsigned int*: pshmem_uint_atomic_fetch_or_nbi, \ + unsigned long*: pshmem_ulong_atomic_fetch_or_nbi, \ + int32_t*: pshmem_int32_atomic_fetch_or_nbi, \ + int64_t*: pshmem_int64_atomic_fetch_or_nbi, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_or_nbi)(__VA_ARGS__) +#endif + + +/* Atomic Nonblocking Fetch and XOR */ +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_fetch_xor_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_fetch_xor_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_fetch_xor_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_fetch_xor_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_fetch_xor_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_fetch_xor_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_fetch_xor_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void pshmem_uint_atomic_fetch_xor_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_fetch_xor_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_fetch_xor_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_fetch_xor_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_fetch_xor_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_fetch_xor_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_fetch_xor_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_xor_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_xor_nbi, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_xor_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_xor_nbi, \ + int32_t*: pshmem_ctx_int32_atomic_fetch_xor_nbi, \ + int64_t*: pshmem_ctx_int64_atomic_fetch_xor_nbi, \ + default: __opshmem_datatype_ignore), \ + unsigned int*: pshmem_uint_atomic_fetch_xor_nbi, \ + unsigned long*: pshmem_ulong_atomic_fetch_xor_nbi, \ + int32_t*: pshmem_int32_atomic_fetch_xor_nbi, \ + int64_t*: pshmem_int64_atomic_fetch_xor_nbi, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_xor_nbi)(__VA_ARGS__) +#endif + +/* + * + * Control of profiling + * + */ +OSHMEM_DECLSPEC void pshmem_pcontrol(int level, ...); + + /* * Lock functions */ @@ -1545,10 +2785,174 @@ OSHMEM_DECLSPEC void pshmem_ptrdiff_wait_until(volatile ptrdiff_t *addr, int cm int*: pshmem_int_wait_until, \ long*: pshmem_long_wait_until, \ long long*: pshmem_longlong_wait_until, \ - unsigned short*: pshmem_short_wait_until, \ - unsigned int*: pshmem_int_wait_until, \ - unsigned long*: pshmem_long_wait_until, \ - unsigned long long*: pshmem_longlong_wait_until)(addr, cmp, value) + unsigned short*: pshmem_ushort_wait_until, \ + unsigned int*: pshmem_uint_wait_until, \ + unsigned long*: pshmem_ulong_wait_until, \ + unsigned long long*: pshmem_ulonglong_wait_until)(addr, cmp, value) +#endif + +OSHMEM_DECLSPEC void pshmem_short_wait_until_all(volatile short *ivars, size_t nelems, const int *status, int cmp, short value); +OSHMEM_DECLSPEC void pshmem_ushort_wait_until_all(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC void pshmem_int_wait_until_all(volatile int *ivars, size_t nelems, const int *status, int cmp, int value); +OSHMEM_DECLSPEC void pshmem_long_wait_until_all(volatile long *ivars, size_t nelems, const int *status, int cmp, long value); +OSHMEM_DECLSPEC void pshmem_longlong_wait_until_all(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC void pshmem_uint_wait_until_all(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC void pshmem_ulong_wait_until_all(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC void pshmem_ulonglong_wait_until_all(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC void pshmem_int32_wait_until_all(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC void pshmem_int64_wait_until_all(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC void pshmem_uint32_wait_until_all(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC void pshmem_uint64_wait_until_all(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC void pshmem_size_wait_until_all(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC void pshmem_ptrdiff_wait_until_all(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define pshmem_wait_until_all(ivars, nelems, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: pshmem_short_wait_until_all, \ + unsigned short*: pshmem_ushort_wait_until_all, \ + int*: pshmem_int_wait_until_all, \ + long*: pshmem_long_wait_until_all, \ + long long*: pshmem_longlong_wait_until_all, \ + unsigned int*: pshmem_uint_wait_until_all, \ + unsigned long*: pshmem_ulong_wait_until_all, \ + unsigned long long*: pshmem_ulonglong_wait_until_all)(ivars, nelems, status, cmp, value) +#endif + +OSHMEM_DECLSPEC size_t pshmem_short_wait_until_any(volatile short *ivars, size_t nelems, const int *status, int cmp, short value); +OSHMEM_DECLSPEC size_t pshmem_ushort_wait_until_any(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC size_t pshmem_int_wait_until_any(volatile int *ivars, size_t nelems, const int *status, int cmp, int value); +OSHMEM_DECLSPEC size_t pshmem_long_wait_until_any(volatile long *ivars, size_t nelems, const int *status, int cmp, long value); +OSHMEM_DECLSPEC size_t pshmem_longlong_wait_until_any(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC size_t pshmem_uint_wait_until_any(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC size_t pshmem_ulong_wait_until_any(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC size_t pshmem_ulonglong_wait_until_any(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC size_t pshmem_int32_wait_until_any(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC size_t pshmem_int64_wait_until_any(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC size_t pshmem_uint32_wait_until_any(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC size_t pshmem_uint64_wait_until_any(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC size_t pshmem_size_wait_until_any(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC size_t pshmem_ptrdiff_wait_until_any(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define pshmem_wait_until_any(ivars, nelems, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: pshmem_short_wait_until_any, \ + unsigned short*: pshmem_ushort_wait_until_any, \ + int*: pshmem_int_wait_until_any, \ + long*: pshmem_long_wait_until_any, \ + long long*: pshmem_longlong_wait_until_any, \ + unsigned int*: pshmem_uint_wait_until_any, \ + unsigned long*: pshmem_ulong_wait_until_any, \ + unsigned long long*: pshmem_ulonglong_wait_until_any)(ivars, nelems, status, cmp, value) +#endif + +OSHMEM_DECLSPEC size_t pshmem_short_wait_until_some(volatile short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, short value); +OSHMEM_DECLSPEC size_t pshmem_ushort_wait_until_some(volatile unsigned short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC size_t pshmem_int_wait_until_some(volatile int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int value); +OSHMEM_DECLSPEC size_t pshmem_long_wait_until_some(volatile long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long value); +OSHMEM_DECLSPEC size_t pshmem_longlong_wait_until_some(volatile long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC size_t pshmem_uint_wait_until_some(volatile unsigned int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC size_t pshmem_ulong_wait_until_some(volatile unsigned long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC size_t pshmem_ulonglong_wait_until_some(volatile unsigned long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC size_t pshmem_int32_wait_until_some(volatile int32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC size_t pshmem_int64_wait_until_some(volatile int64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC size_t pshmem_uint32_wait_until_some(volatile uint32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC size_t pshmem_uint64_wait_until_some(volatile uint64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC size_t pshmem_size_wait_until_some(volatile size_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC size_t pshmem_ptrdiff_wait_until_some(volatile ptrdiff_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define pshmem_wait_until_some(ivars, nelems, indices, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: pshmem_short_wait_until_some, \ + unsigned short*: pshmem_ushort_wait_until_some, \ + int*: pshmem_int_wait_until_some, \ + long*: pshmem_long_wait_until_some, \ + long long*: pshmem_longlong_wait_until_some, \ + unsigned int*: pshmem_uint_wait_until_some, \ + unsigned long*: pshmem_ulong_wait_until_some, \ + unsigned long long*: pshmem_ulonglong_wait_until_some)(ivars, nelems, indices, status, cmp, value) +#endif + +OSHMEM_DECLSPEC void pshmem_short_wait_until_all_vector(volatile short *ivars, size_t nelems, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC void pshmem_ushort_wait_until_all_vector(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC void pshmem_int_wait_until_all_vector(volatile int *ivars, size_t nelems, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC void pshmem_long_wait_until_all_vector(volatile long *ivars, size_t nelems, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC void pshmem_longlong_wait_until_all_vector(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC void pshmem_uint_wait_until_all_vector(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC void pshmem_ulong_wait_until_all_vector(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC void pshmem_ulonglong_wait_until_all_vector(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC void pshmem_int32_wait_until_all_vector(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC void pshmem_int64_wait_until_all_vector(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC void pshmem_uint32_wait_until_all_vector(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC void pshmem_uint64_wait_until_all_vector(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC void pshmem_size_wait_until_all_vector(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC void pshmem_ptrdiff_wait_until_all_vector(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define pshmem_wait_until_all_vector(ivars, nelems, status, cmp, values) \ + _Generic(&*(ivars), \ + short*: pshmem_short_wait_until_all_vector, \ + unsigned short*: pshmem_ushort_wait_until_all_vector, \ + int*: pshmem_int_wait_until_all_vector, \ + long*: pshmem_long_wait_until_all_vector, \ + long long*: pshmem_longlong_wait_until_all_vector, \ + unsigned int*: pshmem_uint_wait_until_all_vector, \ + unsigned long*: pshmem_ulong_wait_until_all_vector, \ + unsigned long long*: pshmem_ulonglong_wait_until_all_vector)(ivars, nelems, status, cmp, values) +#endif + + +OSHMEM_DECLSPEC size_t pshmem_short_wait_until_any_vector(volatile short *ivars, size_t nelems, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC size_t pshmem_ushort_wait_until_any_vector(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC size_t pshmem_int_wait_until_any_vector(volatile int *ivars, size_t nelems, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC size_t pshmem_long_wait_until_any_vector(volatile long *ivars, size_t nelems, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC size_t pshmem_longlong_wait_until_any_vector(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC size_t pshmem_uint_wait_until_any_vector(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC size_t pshmem_ulong_wait_until_any_vector(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC size_t pshmem_ulonglong_wait_until_any_vector(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC size_t pshmem_int32_wait_until_any_vector(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC size_t pshmem_int64_wait_until_any_vector(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC size_t pshmem_uint32_wait_until_any_vector(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC size_t pshmem_uint64_wait_until_any_vector(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC size_t pshmem_size_wait_until_any_vector(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC size_t pshmem_ptrdiff_wait_until_any_vector(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define pshmem_wait_until_any_vector(ivars, nelems, status, cmp, values) \ + _Generic(&*(ivars), \ + short*: pshmem_short_wait_until_any_vector, \ + unsigned short*: pshmem_ushort_wait_until_any_vector, \ + int*: pshmem_int_wait_until_any_vector, \ + long*: pshmem_long_wait_until_any_vector, \ + long long*: pshmem_longlong_wait_until_any_vector, \ + unsigned int*: pshmem_uint_wait_until_any_vector, \ + unsigned long*: pshmem_ulong_wait_until_any_vector, \ + unsigned long long*: pshmem_ulonglong_wait_until_any_vector)(ivars, nelems, status, cmp, values) +#endif + + +OSHMEM_DECLSPEC size_t pshmem_short_wait_until_some_vector(volatile short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC size_t pshmem_ushort_wait_until_some_vector(volatile unsigned short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC size_t pshmem_int_wait_until_some_vector(volatile int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC size_t pshmem_long_wait_until_some_vector(volatile long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC size_t pshmem_longlong_wait_until_some_vector(volatile long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC size_t pshmem_uint_wait_until_some_vector(volatile unsigned int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC size_t pshmem_ulong_wait_until_some_vector(volatile unsigned long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC size_t pshmem_ulonglong_wait_until_some_vector(volatile unsigned long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC size_t pshmem_int32_wait_until_some_vector(volatile int32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC size_t pshmem_int64_wait_until_some_vector(volatile int64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC size_t pshmem_uint32_wait_until_some_vector(volatile uint32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC size_t pshmem_uint64_wait_until_some_vector(volatile uint64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC size_t pshmem_size_wait_until_some_vector(volatile size_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC size_t pshmem_ptrdiff_wait_until_some_vector(volatile ptrdiff_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define pshmem_wait_until_some_vector(ivars, nelems, indices, status, cmp, values)\ + _Generic(&*(ivars), \ + short*: pshmem_short_wait_until_some_vector, \ + unsigned short*: pshmem_ushort_wait_until_some_vector, \ + int*: pshmem_int_wait_until_some_vector, \ + long*: pshmem_long_wait_until_some_vector, \ + long long*: pshmem_longlong_wait_until_some_vector, \ + unsigned int*: pshmem_uint_wait_until_some_vector, \ + unsigned long*: pshmem_ulong_wait_until_some_vector, \ + unsigned long long*: pshmem_ulonglong_wait_until_some_vector)(ivars, nelems, indices, status, cmp, values) #endif OSHMEM_DECLSPEC int pshmem_short_test(volatile short *addr, int cmp, short value); @@ -1578,18 +2982,189 @@ OSHMEM_DECLSPEC int pshmem_ptrdiff_test(volatile ptrdiff_t *addr, int cmp, ptrd unsigned long long*: pshmem_longlong_test)(addr, cmp, value) #endif +OSHMEM_DECLSPEC int pshmem_short_test_all(volatile short *ivars, size_t nelems, const int *status, int cmp, short value); +OSHMEM_DECLSPEC int pshmem_ushort_test_all(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC int pshmem_int_test_all(volatile int *ivars, size_t nelems, const int *status, int cmp, int value); +OSHMEM_DECLSPEC int pshmem_long_test_all(volatile long *ivars, size_t nelems, const int *status, int cmp, long value); +OSHMEM_DECLSPEC int pshmem_longlong_test_all(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC int pshmem_uint_test_all(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC int pshmem_ulong_test_all(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC int pshmem_ulonglong_test_all(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC int pshmem_int32_test_all(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC int pshmem_int64_test_all(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC int pshmem_uint32_test_all(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC int pshmem_uint64_test_all(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC int pshmem_size_test_all(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC int pshmem_ptrdiff_test_all(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define pshmem_test_all(ivars, nelems, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: pshmem_short_test_all, \ + unsigned short*: pshmem_ushort_test_all, \ + int*: pshmem_int_test_all, \ + long*: pshmem_long_test_all, \ + long long*: pshmem_longlong_test_all, \ + unsigned int*: pshmem_uint_test_all, \ + unsigned long*: pshmem_ulong_test_all, \ + unsigned long long*: pshmem_ulonglong_test_all)(ivars, nelems, status, cmp, value) +#endif + +OSHMEM_DECLSPEC size_t pshmem_short_test_any(volatile short *ivars, size_t nelems, const int *status, int cmp, short value); +OSHMEM_DECLSPEC size_t pshmem_ushort_test_any(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC size_t pshmem_int_test_any(volatile int *ivars, size_t nelems, const int *status, int cmp, int value); +OSHMEM_DECLSPEC size_t pshmem_long_test_any(volatile long *ivars, size_t nelems, const int *status, int cmp, long value); +OSHMEM_DECLSPEC size_t pshmem_longlong_test_any(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC size_t pshmem_uint_test_any(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC size_t pshmem_ulong_test_any(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC size_t pshmem_ulonglong_test_any(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC size_t pshmem_int32_test_any(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC size_t pshmem_int64_test_any(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC size_t pshmem_uint32_test_any(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC size_t pshmem_uint64_test_any(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC size_t pshmem_size_test_any(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC size_t pshmem_ptrdiff_test_any(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define pshmem_test_any(ivars, nelems, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: pshmem_short_test_any, \ + unsigned short*: pshmem_ushort_test_any, \ + int*: pshmem_int_test_any, \ + long*: pshmem_long_test_any, \ + long long*: pshmem_longlong_test_any, \ + unsigned int*: pshmem_uint_test_any, \ + unsigned long*: pshmem_ulong_test_any, \ + unsigned long long*: pshmem_ulonglong_test_any)(ivars, nelems, status, cmp, value) +#endif + +OSHMEM_DECLSPEC size_t pshmem_short_test_some(volatile short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, short value); +OSHMEM_DECLSPEC size_t pshmem_ushort_test_some(volatile unsigned short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC size_t pshmem_int_test_some(volatile int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int value); +OSHMEM_DECLSPEC size_t pshmem_long_test_some(volatile long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long value); +OSHMEM_DECLSPEC size_t pshmem_longlong_test_some(volatile long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC size_t pshmem_uint_test_some(volatile unsigned int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC size_t pshmem_ulong_test_some(volatile unsigned long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC size_t pshmem_ulonglong_test_some(volatile unsigned long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC size_t pshmem_int32_test_some(volatile int32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC size_t pshmem_int64_test_some(volatile int64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC size_t pshmem_uint32_test_some(volatile uint32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC size_t pshmem_uint64_test_some(volatile uint64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC size_t pshmem_size_test_some(volatile size_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC size_t pshmem_ptrdiff_test_some(volatile ptrdiff_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define pshmem_test_some(ivars, nelems, indices, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: pshmem_short_test_some, \ + unsigned short*: pshmem_ushort_test_some, \ + int*: pshmem_int_test_some, \ + long*: pshmem_long_test_some, \ + long long*: pshmem_longlong_test_some, \ + unsigned int*: pshmem_uint_test_some, \ + unsigned long*: pshmem_ulong_test_some, \ + unsigned long long*: pshmem_ulonglong_test_some)(ivars, nelems, indices, status, cmp, value) +#endif + +OSHMEM_DECLSPEC int pshmem_short_test_all_vector(volatile short *ivars, size_t nelems, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC int pshmem_ushort_test_all_vector(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC int pshmem_int_test_all_vector(volatile int *ivars, size_t nelems, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC int pshmem_long_test_all_vector(volatile long *ivars, size_t nelems, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC int pshmem_longlong_test_all_vector(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC int pshmem_uint_test_all_vector(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC int pshmem_ulong_test_all_vector(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC int pshmem_ulonglong_test_all_vector(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC int pshmem_int32_test_all_vector(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC int pshmem_int64_test_all_vector(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC int pshmem_uint32_test_all_vector(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC int pshmem_uint64_test_all_vector(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC int pshmem_size_test_all_vector(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC int pshmem_ptrdiff_test_all_vector(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define pshmem_test_all_vector(ivars, nelems, status, cmp, values) \ + _Generic(&*(ivars), \ + short*: pshmem_short_test_all_vector, \ + unsigned short*: pshmem_ushort_test_all_vector, \ + int*: pshmem_int_test_all_vector, \ + long*: pshmem_long_test_all_vector, \ + long long*: pshmem_longlong_test_all_vector, \ + unsigned int*: pshmem_uint_test_all_vector, \ + unsigned long*: pshmem_ulong_test_all_vector, \ + unsigned long long*: pshmem_ulonglong_test_all_vector)(ivars, nelems, status, cmp, values) +#endif + + +OSHMEM_DECLSPEC size_t pshmem_short_test_any_vector(volatile short *ivars, size_t nelems, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC size_t pshmem_ushort_test_any_vector(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC size_t pshmem_int_test_any_vector(volatile int *ivars, size_t nelems, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC size_t pshmem_long_test_any_vector(volatile long *ivars, size_t nelems, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC size_t pshmem_longlong_test_any_vector(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC size_t pshmem_uint_test_any_vector(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC size_t pshmem_ulong_test_any_vector(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC size_t pshmem_ulonglong_test_any_vector(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC size_t pshmem_int32_test_any_vector(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC size_t pshmem_int64_test_any_vector(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC size_t pshmem_uint32_test_any_vector(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC size_t pshmem_uint64_test_any_vector(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC size_t pshmem_size_test_any_vector(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC size_t pshmem_ptrdiff_test_any_vector(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define pshmem_test_any_vector(ivars, nelems, status, cmp, values) \ + _Generic(&*(ivars), \ + short*: pshmem_short_test_any_vector, \ + unsigned short*: pshmem_ushort_test_any_vector, \ + int*: pshmem_int_test_any_vector, \ + long*: pshmem_long_test_any_vector, \ + long long*: pshmem_longlong_test_any_vector, \ + unsigned int*: pshmem_uint_test_any_vector, \ + unsigned long*: pshmem_ulong_test_any_vector, \ + unsigned long long*: pshmem_ulonglong_test_any_vector)(ivars, nelems, status, cmp, values) +#endif + + +OSHMEM_DECLSPEC size_t pshmem_short_test_some_vector(volatile short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC size_t pshmem_ushort_test_some_vector(volatile unsigned short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC size_t pshmem_int_test_some_vector(volatile int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC size_t pshmem_long_test_some_vector(volatile long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC size_t pshmem_longlong_test_some_vector(volatile long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC size_t pshmem_uint_test_some_vector(volatile unsigned int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC size_t pshmem_ulong_test_some_vector(volatile unsigned long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC size_t pshmem_ulonglong_test_some_vector(volatile unsigned long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC size_t pshmem_int32_test_some_vector(volatile int32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC size_t pshmem_int64_test_some_vector(volatile int64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC size_t pshmem_uint32_test_some_vector(volatile uint32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC size_t pshmem_uint64_test_some_vector(volatile uint64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC size_t pshmem_size_test_some_vector(volatile size_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC size_t pshmem_ptrdiff_test_some_vector(volatile ptrdiff_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define pshmem_test_some_vector(ivars, nelems, indices, status, cmp, values)\ + _Generic(&*(ivars), \ + short*: pshmem_short_test_some_vector, \ + unsigned short*: pshmem_ushort_test_some_vector, \ + int*: pshmem_int_test_some_vector, \ + long*: pshmem_long_test_some_vector, \ + long long*: pshmem_longlong_test_some_vector, \ + unsigned int*: pshmem_uint_test_some_vector, \ + unsigned long*: pshmem_ulong_test_some_vector, \ + unsigned long long*: pshmem_ulonglong_test_some_vector)(ivars, nelems, indices, status, cmp, values) +#endif + /* * Barrier sync routines */ OSHMEM_DECLSPEC void pshmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void pshmem_barrier_all(void); -OSHMEM_DECLSPEC void pshmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync); +OSHMEM_DECLSPEC void pshmem_sync_deprecated(int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void pshmem_sync_all(void); OSHMEM_DECLSPEC void pshmem_fence(void); OSHMEM_DECLSPEC void pshmem_ctx_fence(shmem_ctx_t ctx); OSHMEM_DECLSPEC void pshmem_quiet(void); OSHMEM_DECLSPEC void pshmem_ctx_quiet(shmem_ctx_t ctx); +#if OSHMEM_HAVE_C11 +#define pshmem_sync(...) \ + _Generic((__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_team_t: pshmem_team_sync, \ + int: pshmem_sync_deprecated)(__VA_ARGS__) +#endif + /* * Collective routines */ diff --git a/oshmem/include/pshmemx.h b/oshmem/include/pshmemx.h index 0b4ffcbd202..5a0f7f5a95c 100644 --- a/oshmem/include/pshmemx.h +++ b/oshmem/include/pshmemx.h @@ -16,11 +16,6 @@ extern "C" { #endif -/* - * Symmetric heap routines - */ -OSHMEM_DECLSPEC void* pshmemx_malloc_with_hint(size_t size, long hint); - /* * Legacy API diff --git a/oshmem/include/shmem.h.in b/oshmem/include/shmem.h.in index 118e6e90b27..cf0b539cff6 100644 --- a/oshmem/include/shmem.h.in +++ b/oshmem/include/shmem.h.in @@ -102,6 +102,16 @@ enum { #define SHMEM_CTX_PRIVATE (1<<0) #define SHMEM_CTX_SERIALIZED (1<<1) #define SHMEM_CTX_NOSTORE (1<<2) +#define SHMEM_CTX_INVALID NULL + +#define SHMEM_SIGNAL_SET (1<<0) +#define SHMEM_SIGNAL_ADD (1<<1) + +#define SHMEM_MALLOC_ATOMICS_REMOTE (1<<0) +#define SHMEM_MALLOC_SIGNAL_REMOTE (1<<1) + +#define SHMEM_TEAM_INVALID NULL +#define SHMEM_TEAM_NUM_CONTEXTS (1<<0) /* * Deprecated (but still valid) names @@ -187,6 +197,7 @@ OSHMEM_DECLSPEC void* shmem_malloc(size_t size); OSHMEM_DECLSPEC void* shmem_calloc(size_t count, size_t size); OSHMEM_DECLSPEC void* shmem_align(size_t align, size_t size); OSHMEM_DECLSPEC void* shmem_realloc(void *ptr, size_t size); +OSHMEM_DECLSPEC void* shmem_malloc_with_hints(size_t size, long hints); OSHMEM_DECLSPEC void shmem_free(void* ptr); /* @@ -207,6 +218,569 @@ extern shmem_ctx_t oshmem_ctx_default; OSHMEM_DECLSPEC int shmem_ctx_create(long options, shmem_ctx_t *ctx); OSHMEM_DECLSPEC void shmem_ctx_destroy(shmem_ctx_t ctx); +/* + * Team management operations + */ +typedef struct { int dummy; } * shmem_team_t; +typedef struct { int num_contexts; } shmem_team_config_t; + +extern shmem_team_t oshmem_team_shared; +extern shmem_team_t oshmem_team_world; + +#define SHMEM_TEAM_SHARED oshmem_team_shared +#define SHMEM_TEAM_WORLD oshmem_team_world + +OSHMEM_DECLSPEC int shmem_team_my_pe(shmem_team_t team); +OSHMEM_DECLSPEC int shmem_team_n_pes(shmem_team_t team); +OSHMEM_DECLSPEC int shmem_team_get_config(shmem_team_t team, long config_mask, shmem_team_config_t *config); +OSHMEM_DECLSPEC int shmem_team_translate_pe(shmem_team_t src_team, int src_pe, shmem_team_t dest_team); +OSHMEM_DECLSPEC int shmem_team_split_strided(shmem_team_t parent_team, int start, int stride, int size, const shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); +OSHMEM_DECLSPEC int shmem_team_split_2d(shmem_team_t parent_team, int xrange, const shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t *xaxis_team, const shmem_team_config_t *yaxis_config, long yaxis_mask, shmem_team_t *yaxis_team); +OSHMEM_DECLSPEC void shmem_team_destroy(shmem_team_t team); +OSHMEM_DECLSPEC int shmem_ctx_get_team(shmem_ctx_t ctx, shmem_team_t *team); +OSHMEM_DECLSPEC int shmem_team_create_ctx(shmem_team_t team, long options, shmem_ctx_t *ctx); + +/* + * Teams-based Collectives + */ + +/* Teams sync */ +OSHMEM_DECLSPEC void shmem_team_sync(shmem_team_t team); + +/* Teams alltoall */ +OSHMEM_DECLSPEC int shmem_char_alltoall(shmem_team_t team, char *target, const char *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_short_alltoall(shmem_team_t team, short *target, const short *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int_alltoall(shmem_team_t team, int *target, const int *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_long_alltoall(shmem_team_t team, long *target, const long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_float_alltoall(shmem_team_t team, float *target, const float *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_double_alltoall(shmem_team_t team, double *target, const double *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_longlong_alltoall(shmem_team_t team, long long *target, const long long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_schar_alltoall(shmem_team_t team, signed char *target, const signed char *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uchar_alltoall(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ushort_alltoall(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint_alltoall(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ulong_alltoall(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ulonglong_alltoall(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_longdouble_alltoall(shmem_team_t team, long double *target, const long double *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int8_alltoall(shmem_team_t team, int8_t *target, const int8_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int16_alltoall(shmem_team_t team, int16_t *target, const int16_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int32_alltoall(shmem_team_t team, int32_t *target, const int32_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int64_alltoall(shmem_team_t team, int64_t *target, const int64_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint8_alltoall(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint16_alltoall(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint32_alltoall(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint64_alltoall(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_size_alltoall(shmem_team_t team, size_t *target, const size_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ptrdiff_alltoall(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nelems); +#if OSHMEM_HAVE_C11 +#define shmem_alltoall(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_char_alltoall, \ + short*: shmem_short_alltoall, \ + int*: shmem_int_alltoall, \ + long*: shmem_long_alltoall, \ + long long*: shmem_longlong_alltoall, \ + signed char*: shmem_schar_alltoall, \ + unsigned char*: shmem_uchar_alltoall, \ + unsigned short*: shmem_ushort_alltoall, \ + unsigned int*: shmem_uint_alltoall, \ + unsigned long*: shmem_ulong_alltoall, \ + unsigned long long*: shmem_ulonglong_alltoall, \ + float*: shmem_float_alltoall, \ + double*: shmem_double_alltoall, \ + long double*: shmem_longdouble_alltoall, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC int shmem_alltoallmem(shmem_team_t team, void *target, const void *source, size_t nelems); + +/* Teams alltoalls */ +OSHMEM_DECLSPEC int shmem_char_alltoalls(shmem_team_t team, char *target, const char *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_short_alltoalls(shmem_team_t team, short *target, const short *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_int_alltoalls(shmem_team_t team, int *target, const int *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_long_alltoalls(shmem_team_t team, long *target, const long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_float_alltoalls(shmem_team_t team, float *target, const float *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_double_alltoalls(shmem_team_t team, double *target, const double *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_longlong_alltoalls(shmem_team_t team, long long *target, const long long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_schar_alltoalls(shmem_team_t team, signed char *target, const signed char *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_uchar_alltoalls(shmem_team_t team, unsigned char *target, const unsigned char *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_ushort_alltoalls(shmem_team_t team, unsigned short *target, const unsigned short *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint_alltoalls(shmem_team_t team, unsigned int *target, const unsigned int *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_ulong_alltoalls(shmem_team_t team, unsigned long *target, const unsigned long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_ulonglong_alltoalls(shmem_team_t team, unsigned long long *target, const unsigned long long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_longdouble_alltoalls(shmem_team_t team, long double *target, const long double *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_int8_alltoalls(shmem_team_t team, int8_t *target, const int8_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_int16_alltoalls(shmem_team_t team, int16_t *target, const int16_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_int32_alltoalls(shmem_team_t team, int32_t *target, const int32_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_int64_alltoalls(shmem_team_t team, int64_t *target, const int64_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint8_alltoalls(shmem_team_t team, uint8_t *target, const uint8_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint16_alltoalls(shmem_team_t team, uint16_t *target, const uint16_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint32_alltoalls(shmem_team_t team, uint32_t *target, const uint32_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint64_alltoalls(shmem_team_t team, uint64_t *target, const uint64_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_size_alltoalls(shmem_team_t team, size_t *target, const size_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +OSHMEM_DECLSPEC int shmem_ptrdiff_alltoalls(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +#if OSHMEM_HAVE_C11 +#define shmem_alltoalls(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_char_alltoalls, \ + short*: shmem_short_alltoalls, \ + int*: shmem_int_alltoalls, \ + long*: shmem_long_alltoalls, \ + long long*: shmem_longlong_alltoalls, \ + signed char*: shmem_schar_alltoalls, \ + unsigned char*: shmem_uchar_alltoalls, \ + unsigned short*: shmem_ushort_alltoalls, \ + unsigned int*: shmem_uint_alltoalls, \ + unsigned long*: shmem_ulong_alltoalls, \ + unsigned long long*: shmem_ulonglong_alltoalls, \ + float*: shmem_float_alltoalls, \ + double*: shmem_double_alltoalls, \ + long double*: shmem_longdouble_alltoalls, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC int shmem_alltoallsmem(shmem_team_t team, void *target, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); + + +/* Teams broadcast */ +OSHMEM_DECLSPEC int shmem_char_broadcast(shmem_team_t team, char *target, const char *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_short_broadcast(shmem_team_t team, short *target, const short *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_int_broadcast(shmem_team_t team, int *target, const int *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_long_broadcast(shmem_team_t team, long *target, const long *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_float_broadcast(shmem_team_t team, float *target, const float *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_double_broadcast(shmem_team_t team, double *target, const double *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_longlong_broadcast(shmem_team_t team, long long *target, const long long *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_schar_broadcast(shmem_team_t team, signed char *target, const signed char *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_uchar_broadcast(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_ushort_broadcast(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_uint_broadcast(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_ulong_broadcast(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_ulonglong_broadcast(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_longdouble_broadcast(shmem_team_t team, long double *target, const long double *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_int8_broadcast(shmem_team_t team, int8_t *target, const int8_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_int16_broadcast(shmem_team_t team, int16_t *target, const int16_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_int32_broadcast(shmem_team_t team, int32_t *target, const int32_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_int64_broadcast(shmem_team_t team, int64_t *target, const int64_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_uint8_broadcast(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_uint16_broadcast(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_uint32_broadcast(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_uint64_broadcast(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_size_broadcast(shmem_team_t team, size_t *target, const size_t *source, size_t nelems, int PE_root); +OSHMEM_DECLSPEC int shmem_ptrdiff_broadcast(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nelems, int PE_root); +#if OSHMEM_HAVE_C11 +#define shmem_broadcast(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_char_broadcast, \ + short*: shmem_short_broadcast, \ + int*: shmem_int_broadcast, \ + long*: shmem_long_broadcast, \ + long long*: shmem_longlong_broadcast, \ + signed char*: shmem_schar_broadcast, \ + unsigned char*: shmem_uchar_broadcast, \ + unsigned short*: shmem_ushort_broadcast, \ + unsigned int*: shmem_uint_broadcast, \ + unsigned long*: shmem_ulong_broadcast, \ + unsigned long long*: shmem_ulonglong_broadcast, \ + float*: shmem_float_broadcast, \ + double*: shmem_double_broadcast, \ + long double*: shmem_longdouble_broadcast, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC int shmem_broadcastmem(shmem_team_t team, void *target, const void *source, size_t nelems, int PE_root); + +/* Teams collect */ +OSHMEM_DECLSPEC int shmem_char_collect(shmem_team_t team, char *target, const char *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_short_collect(shmem_team_t team, short *target, const short *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int_collect(shmem_team_t team, int *target, const int *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_long_collect(shmem_team_t team, long *target, const long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_float_collect(shmem_team_t team, float *target, const float *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_double_collect(shmem_team_t team, double *target, const double *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_longlong_collect(shmem_team_t team, long long *target, const long long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_schar_collect(shmem_team_t team, signed char *target, const signed char *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uchar_collect(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ushort_collect(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint_collect(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ulong_collect(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ulonglong_collect(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_longdouble_collect(shmem_team_t team, long double *target, const long double *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int8_collect(shmem_team_t team, int8_t *target, const int8_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int16_collect(shmem_team_t team, int16_t *target, const int16_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int32_collect(shmem_team_t team, int32_t *target, const int32_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int64_collect(shmem_team_t team, int64_t *target, const int64_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint8_collect(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint16_collect(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint32_collect(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint64_collect(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_size_collect(shmem_team_t team, size_t *target, const size_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ptrdiff_collect(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nelems); +#if OSHMEM_HAVE_C11 +#define shmem_collect(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_char_collect, \ + short*: shmem_short_collect, \ + int*: shmem_int_collect, \ + long*: shmem_long_collect, \ + long long*: shmem_longlong_collect, \ + signed char*: shmem_schar_collect, \ + unsigned char*: shmem_uchar_collect, \ + unsigned short*: shmem_ushort_collect, \ + unsigned int*: shmem_uint_collect, \ + unsigned long*: shmem_ulong_collect, \ + unsigned long long*: shmem_ulonglong_collect, \ + float*: shmem_float_collect, \ + double*: shmem_double_collect, \ + long double*: shmem_longdouble_collect, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + + +OSHMEM_DECLSPEC int shmem_collectmem(shmem_team_t team, void *target, const void *source, size_t nelems); + +/* Teams fcollect */ +OSHMEM_DECLSPEC int shmem_char_fcollect(shmem_team_t team, char *target, const char *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_short_fcollect(shmem_team_t team, short *target, const short *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int_fcollect(shmem_team_t team, int *target, const int *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_long_fcollect(shmem_team_t team, long *target, const long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_float_fcollect(shmem_team_t team, float *target, const float *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_double_fcollect(shmem_team_t team, double *target, const double *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_longlong_fcollect(shmem_team_t team, long long *target, const long long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_schar_fcollect(shmem_team_t team, signed char *target, const signed char *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uchar_fcollect(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ushort_fcollect(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint_fcollect(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ulong_fcollect(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ulonglong_fcollect(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_longdouble_fcollect(shmem_team_t team, long double *target, const long double *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int8_fcollect(shmem_team_t team, int8_t *target, const int8_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int16_fcollect(shmem_team_t team, int16_t *target, const int16_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int32_fcollect(shmem_team_t team, int32_t *target, const int32_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_int64_fcollect(shmem_team_t team, int64_t *target, const int64_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint8_fcollect(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint16_fcollect(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint32_fcollect(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_uint64_fcollect(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_size_fcollect(shmem_team_t team, size_t *target, const size_t *source, size_t nelems); +OSHMEM_DECLSPEC int shmem_ptrdiff_fcollect(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nelems); +#if OSHMEM_HAVE_C11 +#define shmem_fcollect(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_char_fcollect, \ + short*: shmem_short_fcollect, \ + int*: shmem_int_fcollect, \ + long*: shmem_long_fcollect, \ + long long*: shmem_longlong_fcollect, \ + signed char*: shmem_schar_fcollect, \ + unsigned char*: shmem_uchar_fcollect, \ + unsigned short*: shmem_ushort_fcollect, \ + unsigned int*: shmem_uint_fcollect, \ + unsigned long*: shmem_ulong_fcollect, \ + unsigned long long*: shmem_ulonglong_fcollect, \ + float*: shmem_float_fcollect, \ + double*: shmem_double_fcollect, \ + long double*: shmem_longdouble_fcollect, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC int shmem_fcollectmem(shmem_team_t team, void *target, const void *source, size_t nelems); + + + +/* Teams reduction: AND */ +OSHMEM_DECLSPEC int shmem_uchar_and_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ushort_and_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint_and_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulong_and_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulonglong_and_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int_and_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longlong_and_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int8_and_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int16_and_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int32_and_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int64_and_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint8_and_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint16_and_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint32_and_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint64_and_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_size_and_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define shmem_and_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned char*: shmem_uchar_and_reduce, \ + unsigned short*: shmem_ushort_and_reduce, \ + unsigned int*: shmem_uint_and_reduce, \ + unsigned long*: shmem_ulong_and_reduce, \ + unsigned long long*: shmem_ulonglong_and_reduce, \ + int8_t*: shmem_int8_and_reduce, \ + int16_t*: shmem_int16_and_reduce, \ + int32_t*: shmem_int32_and_reduce, \ + int64_t*: shmem_int64_and_reduce, \ + long long*: shmem_longlong_and_reduce, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + + +/* Teams reduction: OR */ +OSHMEM_DECLSPEC int shmem_uchar_or_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ushort_or_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint_or_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulong_or_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulonglong_or_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int_or_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longlong_or_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int8_or_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int16_or_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int32_or_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int64_or_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint8_or_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint16_or_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint32_or_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint64_or_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_size_or_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define shmem_or_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned char*: shmem_uchar_or_reduce, \ + unsigned short*: shmem_ushort_or_reduce, \ + unsigned int*: shmem_uint_or_reduce, \ + unsigned long*: shmem_ulong_or_reduce, \ + unsigned long long*: shmem_ulonglong_or_reduce, \ + int8_t*: shmem_int8_or_reduce, \ + int16_t*: shmem_int16_or_reduce, \ + int32_t*: shmem_int32_or_reduce, \ + int64_t*: shmem_int64_or_reduce, \ + long long*: shmem_longlong_or_reduce, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + + + +/* Teams reduction: XOR */ +OSHMEM_DECLSPEC int shmem_uchar_xor_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ushort_xor_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint_xor_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulong_xor_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulonglong_xor_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int_xor_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longlong_xor_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int8_xor_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int16_xor_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int32_xor_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int64_xor_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint8_xor_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint16_xor_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint32_xor_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint64_xor_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_size_xor_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define shmem_xor_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned char*: shmem_uchar_xor_reduce, \ + unsigned short*: shmem_ushort_xor_reduce, \ + unsigned int*: shmem_uint_xor_reduce, \ + unsigned long*: shmem_ulong_xor_reduce, \ + unsigned long long*: shmem_ulonglong_xor_reduce, \ + int8_t*: shmem_int8_xor_reduce, \ + int16_t*: shmem_int16_xor_reduce, \ + int32_t*: shmem_int32_xor_reduce, \ + int64_t*: shmem_int64_xor_reduce, \ + long long*: shmem_longlong_xor_reduce, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + +/* Teams reduction: MAX */ +OSHMEM_DECLSPEC int shmem_char_max_reduce(shmem_team_t team, char *target, const char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_short_max_reduce(shmem_team_t team, short *target, const short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int_max_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_long_max_reduce(shmem_team_t team, long *target, const long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_float_max_reduce(shmem_team_t team, float *target, const float *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_double_max_reduce(shmem_team_t team, double *target, const double *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longlong_max_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_schar_max_reduce(shmem_team_t team, signed char *target, const signed char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uchar_max_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ushort_max_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint_max_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulong_max_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulonglong_max_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longdouble_max_reduce(shmem_team_t team, long double *target, const long double *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int8_max_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int16_max_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int32_max_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int64_max_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint8_max_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint16_max_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint32_max_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint64_max_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_size_max_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ptrdiff_max_reduce(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define shmem_max_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_char_max_reduce, \ + short*: shmem_short_max_reduce, \ + int*: shmem_int_max_reduce, \ + long*: shmem_long_max_reduce, \ + long long*: shmem_longlong_max_reduce, \ + signed char*: shmem_schar_max_reduce, \ + unsigned char*: shmem_uchar_max_reduce, \ + unsigned short*: shmem_ushort_max_reduce, \ + unsigned int*: shmem_uint_max_reduce, \ + unsigned long*: shmem_ulong_max_reduce, \ + unsigned long long*: shmem_ulonglong_max_reduce, \ + float*: shmem_float_max_reduce, \ + double*: shmem_double_max_reduce, \ + long double*: shmem_longdouble_max_reduce, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + + +/* Teams reduction: MIN */ +OSHMEM_DECLSPEC int shmem_char_min_reduce(shmem_team_t team, char *target, const char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_short_min_reduce(shmem_team_t team, short *target, const short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int_min_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_long_min_reduce(shmem_team_t team, long *target, const long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_float_min_reduce(shmem_team_t team, float *target, const float *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_double_min_reduce(shmem_team_t team, double *target, const double *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longlong_min_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_schar_min_reduce(shmem_team_t team, signed char *target, const signed char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uchar_min_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ushort_min_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint_min_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulong_min_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulonglong_min_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longdouble_min_reduce(shmem_team_t team, long double *target, const long double *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int8_min_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int16_min_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int32_min_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int64_min_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint8_min_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint16_min_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint32_min_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint64_min_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_size_min_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ptrdiff_min_reduce(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define shmem_min_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_char_min_reduce, \ + short*: shmem_short_min_reduce, \ + int*: shmem_int_min_reduce, \ + long*: shmem_long_min_reduce, \ + long long*: shmem_longlong_min_reduce, \ + signed char*: shmem_schar_min_reduce, \ + unsigned char*: shmem_uchar_min_reduce, \ + unsigned short*: shmem_ushort_min_reduce, \ + unsigned int*: shmem_uint_min_reduce, \ + unsigned long*: shmem_ulong_min_reduce, \ + unsigned long long*: shmem_ulonglong_min_reduce, \ + float*: shmem_float_min_reduce, \ + double*: shmem_double_min_reduce, \ + long double*: shmem_longdouble_min_reduce, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + + + +/* Teams reduction: SUM */ +OSHMEM_DECLSPEC int shmem_char_sum_reduce(shmem_team_t team, char *target, const char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_short_sum_reduce(shmem_team_t team, short *target, const short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int_sum_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_long_sum_reduce(shmem_team_t team, long *target, const long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_float_sum_reduce(shmem_team_t team, float *target, const float *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_double_sum_reduce(shmem_team_t team, double *target, const double *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longlong_sum_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_schar_sum_reduce(shmem_team_t team, signed char *target, const signed char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uchar_sum_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ushort_sum_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint_sum_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulong_sum_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulonglong_sum_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longdouble_sum_reduce(shmem_team_t team, long double *target, const long double *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int8_sum_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int16_sum_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int32_sum_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int64_sum_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint8_sum_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint16_sum_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint32_sum_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint64_sum_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_size_sum_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ptrdiff_sum_reduce(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_complexd_sum_reduce(shmem_team_t team, OSHMEM_COMPLEX_TYPE(double) *target, const OSHMEM_COMPLEX_TYPE(double) *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_complexf_sum_reduce(shmem_team_t team, OSHMEM_COMPLEX_TYPE(float) *target, const OSHMEM_COMPLEX_TYPE(float) *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define shmem_sum_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_char_sum_reduce, \ + short*: shmem_short_sum_reduce, \ + int*: shmem_int_sum_reduce, \ + long*: shmem_long_sum_reduce, \ + long long*: shmem_longlong_sum_reduce, \ + signed char*: shmem_schar_sum_reduce, \ + unsigned char*: shmem_uchar_sum_reduce, \ + unsigned short*: shmem_ushort_sum_reduce, \ + unsigned int*: shmem_uint_sum_reduce, \ + unsigned long*: shmem_ulong_sum_reduce, \ + unsigned long long*: shmem_ulonglong_sum_reduce, \ + float*: shmem_float_sum_reduce, \ + double*: shmem_double_sum_reduce, \ + long double*: shmem_longdouble_sum_reduce, \ + OSHMEM_COMPLEX_TYPE(double)*: shmem_complexd_sum_reduce, \ + OSHMEM_COMPLEX_TYPE(float)*: shmem_complexf_sum_reduce, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + + +/* Teams reduction: PROD */ +OSHMEM_DECLSPEC int shmem_char_prod_reduce(shmem_team_t team, char *target, const char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_short_prod_reduce(shmem_team_t team, short *target, const short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int_prod_reduce(shmem_team_t team, int *target, const int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_long_prod_reduce(shmem_team_t team, long *target, const long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_float_prod_reduce(shmem_team_t team, float *target, const float *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_double_prod_reduce(shmem_team_t team, double *target, const double *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longlong_prod_reduce(shmem_team_t team, long long *target, const long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_schar_prod_reduce(shmem_team_t team, signed char *target, const signed char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uchar_prod_reduce(shmem_team_t team, unsigned char *target, const unsigned char *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ushort_prod_reduce(shmem_team_t team, unsigned short *target, const unsigned short *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint_prod_reduce(shmem_team_t team, unsigned int *target, const unsigned int *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulong_prod_reduce(shmem_team_t team, unsigned long *target, const unsigned long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ulonglong_prod_reduce(shmem_team_t team, unsigned long long *target, const unsigned long long *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_longdouble_prod_reduce(shmem_team_t team, long double *target, const long double *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int8_prod_reduce(shmem_team_t team, int8_t *target, const int8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int16_prod_reduce(shmem_team_t team, int16_t *target, const int16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int32_prod_reduce(shmem_team_t team, int32_t *target, const int32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_int64_prod_reduce(shmem_team_t team, int64_t *target, const int64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint8_prod_reduce(shmem_team_t team, uint8_t *target, const uint8_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint16_prod_reduce(shmem_team_t team, uint16_t *target, const uint16_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint32_prod_reduce(shmem_team_t team, uint32_t *target, const uint32_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_uint64_prod_reduce(shmem_team_t team, uint64_t *target, const uint64_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_size_prod_reduce(shmem_team_t team, size_t *target, const size_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_ptrdiff_prod_reduce(shmem_team_t team, ptrdiff_t *target, const ptrdiff_t *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_complexd_prod_reduce(shmem_team_t team, OSHMEM_COMPLEX_TYPE(double) *target, const OSHMEM_COMPLEX_TYPE(double) *source, size_t nreduce); +OSHMEM_DECLSPEC int shmem_complexf_prod_reduce(shmem_team_t team, OSHMEM_COMPLEX_TYPE(float) *target, const OSHMEM_COMPLEX_TYPE(float) *source, size_t nreduce); +#if OSHMEM_HAVE_C11 +#define shmem_prod_reduce(...) \ + _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_char_prod_reduce, \ + short*: shmem_short_prod_reduce, \ + int*: shmem_int_prod_reduce, \ + long*: shmem_long_prod_reduce, \ + long long*: shmem_longlong_prod_reduce, \ + signed char*: shmem_schar_prod_reduce, \ + unsigned char*: shmem_uchar_prod_reduce, \ + unsigned short*: shmem_ushort_prod_reduce, \ + unsigned int*: shmem_uint_prod_reduce, \ + unsigned long*: shmem_ulong_prod_reduce, \ + unsigned long long*: shmem_ulonglong_prod_reduce, \ + float*: shmem_float_prod_reduce, \ + double*: shmem_double_prod_reduce, \ + long double*: shmem_longdouble_prod_reduce, \ + OSHMEM_COMPLEX_TYPE(double)*: shmem_complexd_prod_reduce, \ + OSHMEM_COMPLEX_TYPE(float)*: shmem_complexf_prod_reduce, \ + default: __oshmem_datatype_ignore)(__VA_ARGS__) +#endif + + /* * Elemental put routines */ @@ -596,6 +1170,216 @@ OSHMEM_DECLSPEC void shmem_put64_nbi(void *target, const void *source, size_t l OSHMEM_DECLSPEC void shmem_put128_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_putmem_nbi(void *target, const void *source, size_t len, int pe); +/* + * Signaled put routines + */ +OSHMEM_DECLSPEC void shmem_ctx_char_put_signal(shmem_ctx_t ctx, char *dest, const char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_put_signal(shmem_ctx_t ctx, short *dest, const short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_put_signal(shmem_ctx_t ctx, int *dest, const int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_put_signal(shmem_ctx_t ctx, long *dest, const long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_put_signal(shmem_ctx_t ctx, float *dest, const float *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_put_signal(shmem_ctx_t ctx, double *dest, const double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_put_signal(shmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_schar_put_signal(shmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uchar_put_signal(shmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ushort_put_signal(shmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_put_signal(shmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_put_signal(shmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_put_signal(shmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_put_signal(shmem_ctx_t ctx, long double *dest, const long double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int8_put_signal(shmem_ctx_t ctx, int8_t *dest, const int8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int16_put_signal(shmem_ctx_t ctx, int16_t *dest, const int16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_put_signal(shmem_ctx_t ctx, int32_t *dest, const int32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_put_signal(shmem_ctx_t ctx, int64_t *dest, const int64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint8_put_signal(shmem_ctx_t ctx, uint8_t *dest, const uint8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint16_put_signal(shmem_ctx_t ctx, uint16_t *dest, const uint16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_put_signal(shmem_ctx_t ctx, uint32_t *dest, const uint32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_put_signal(shmem_ctx_t ctx, uint64_t *dest, const uint64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_put_signal(shmem_ctx_t ctx, size_t *dest, const size_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_put_signal(shmem_ctx_t ctx, ptrdiff_t *dest, const ptrdiff_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void shmem_char_put_signal(char *dest, const char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_short_put_signal(short *dest, const short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int_put_signal(int *dest, const int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_long_put_signal(long *dest, const long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_float_put_signal(float *dest, const float *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_double_put_signal(double *dest, const double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_longlong_put_signal(long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_schar_put_signal(signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uchar_put_signal(unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ushort_put_signal(unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint_put_signal(unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ulong_put_signal(unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_put_signal(unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_longdouble_put_signal(long double *dest, const long double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int8_put_signal(int8_t *dest, const int8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int16_put_signal(int16_t *dest, const int16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int32_put_signal(int32_t *dest, const int32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int64_put_signal(int64_t *dest, const int64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint8_put_signal(uint8_t *dest, const uint8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint16_put_signal(uint16_t *dest, const uint16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint32_put_signal(uint32_t *dest, const uint32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint64_put_signal(uint64_t *dest, const uint64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_size_put_signal(size_t *dest, const size_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_put_signal(ptrdiff_t *dest, const ptrdiff_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_put_signal(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_ctx_char_put_signal, \ + short*: shmem_ctx_short_put_signal, \ + int*: shmem_ctx_int_put_signal, \ + long*: shmem_ctx_long_put_signal, \ + long long*: shmem_ctx_longlong_put_signal, \ + signed char*: shmem_ctx_schar_put_signal, \ + unsigned char*: shmem_ctx_uchar_put_signal, \ + unsigned short*: shmem_ctx_ushort_put_signal, \ + unsigned int*: shmem_ctx_uint_put_signal, \ + unsigned long*: shmem_ctx_ulong_put_signal, \ + unsigned long long*: shmem_ctx_ulonglong_put_signal, \ + float*: shmem_ctx_float_put_signal, \ + double*: shmem_ctx_double_put_signal, \ + long double*: shmem_ctx_longdouble_put_signal, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_put_signal, \ + short*: shmem_short_put_signal, \ + int*: shmem_int_put_signal, \ + long*: shmem_long_put_signal, \ + long long*: shmem_longlong_put_signal, \ + signed char*: shmem_schar_put_signal, \ + unsigned char*: shmem_uchar_put_signal, \ + unsigned short*: shmem_ushort_put_signal, \ + unsigned int*: shmem_uint_put_signal, \ + unsigned long*: shmem_ulong_put_signal, \ + unsigned long long*: shmem_ulonglong_put_signal, \ + float*: shmem_float_put_signal, \ + double*: shmem_double_put_signal, \ + long double*: shmem_longdouble_put_signal)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC void shmem_put8_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_put16_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_put32_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_put64_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_put128_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void shmem_ctx_put8_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put16_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put32_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put64_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put128_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void shmem_putmem_signal(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_putmem_signal(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +/* + * Nonblocking signaled put routines + */ +OSHMEM_DECLSPEC void shmem_ctx_char_put_signal_nbi(shmem_ctx_t ctx, char *dest, const char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_put_signal_nbi(shmem_ctx_t ctx, short *dest, const short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_put_signal_nbi(shmem_ctx_t ctx, int *dest, const int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_put_signal_nbi(shmem_ctx_t ctx, long *dest, const long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_put_signal_nbi(shmem_ctx_t ctx, float *dest, const float *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_put_signal_nbi(shmem_ctx_t ctx, double *dest, const double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_put_signal_nbi(shmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_schar_put_signal_nbi(shmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uchar_put_signal_nbi(shmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ushort_put_signal_nbi(shmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_put_signal_nbi(shmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_put_signal_nbi(shmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_put_signal_nbi(shmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_put_signal_nbi(shmem_ctx_t ctx, long double *dest, const long double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int8_put_signal_nbi(shmem_ctx_t ctx, int8_t *dest, const int8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int16_put_signal_nbi(shmem_ctx_t ctx, int16_t *dest, const int16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_put_signal_nbi(shmem_ctx_t ctx, int32_t *dest, const int32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_put_signal_nbi(shmem_ctx_t ctx, int64_t *dest, const int64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint8_put_signal_nbi(shmem_ctx_t ctx, uint8_t *dest, const uint8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint16_put_signal_nbi(shmem_ctx_t ctx, uint16_t *dest, const uint16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_put_signal_nbi(shmem_ctx_t ctx, uint32_t *dest, const uint32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_put_signal_nbi(shmem_ctx_t ctx, uint64_t *dest, const uint64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_put_signal_nbi(shmem_ctx_t ctx, size_t *dest, const size_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_put_signal_nbi(shmem_ctx_t ctx, ptrdiff_t *dest, const ptrdiff_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void shmem_char_put_signal_nbi(char *dest, const char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_short_put_signal_nbi(short *dest, const short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int_put_signal_nbi(int *dest, const int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_long_put_signal_nbi(long *dest, const long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_float_put_signal_nbi(float *dest, const float *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_double_put_signal_nbi(double *dest, const double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_longlong_put_signal_nbi(long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_schar_put_signal_nbi(signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uchar_put_signal_nbi(unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ushort_put_signal_nbi(unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint_put_signal_nbi(unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ulong_put_signal_nbi(unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_put_signal_nbi(unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_longdouble_put_signal_nbi(long double *dest, const long double *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int8_put_signal_nbi(int8_t *dest, const int8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int16_put_signal_nbi(int16_t *dest, const int16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int32_put_signal_nbi(int32_t *dest, const int32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_int64_put_signal_nbi(int64_t *dest, const int64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint8_put_signal_nbi(uint8_t *dest, const uint8_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint16_put_signal_nbi(uint16_t *dest, const uint16_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint32_put_signal_nbi(uint32_t *dest, const uint32_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_uint64_put_signal_nbi(uint64_t *dest, const uint64_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_size_put_signal_nbi(size_t *dest, const size_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_put_signal_nbi(ptrdiff_t *dest, const ptrdiff_t *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_put_signal_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_ctx_char_put_signal_nbi, \ + short*: shmem_ctx_short_put_signal_nbi, \ + int*: shmem_ctx_int_put_signal_nbi, \ + long*: shmem_ctx_long_put_signal_nbi, \ + long long*: shmem_ctx_longlong_put_signal_nbi, \ + signed char*: shmem_ctx_schar_put_signal_nbi, \ + unsigned char*: shmem_ctx_uchar_put_signal_nbi, \ + unsigned short*: shmem_ctx_ushort_put_signal_nbi, \ + unsigned int*: shmem_ctx_uint_put_signal_nbi, \ + unsigned long*: shmem_ctx_ulong_put_signal_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_put_signal_nbi, \ + float*: shmem_ctx_float_put_signal_nbi, \ + double*: shmem_ctx_double_put_signal_nbi, \ + long double*: shmem_ctx_longdouble_put_signal_nbi, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_put_signal_nbi, \ + short*: shmem_short_put_signal_nbi, \ + int*: shmem_int_put_signal_nbi, \ + long*: shmem_long_put_signal_nbi, \ + long long*: shmem_longlong_put_signal_nbi, \ + signed char*: shmem_schar_put_signal_nbi, \ + unsigned char*: shmem_uchar_put_signal_nbi, \ + unsigned short*: shmem_ushort_put_signal_nbi, \ + unsigned int*: shmem_uint_put_signal_nbi, \ + unsigned long*: shmem_ulong_put_signal_nbi, \ + unsigned long long*: shmem_ulonglong_put_signal_nbi, \ + float*: shmem_float_put_signal_nbi, \ + double*: shmem_double_put_signal_nbi, \ + long double*: shmem_longdouble_put_signal_nbi)(__VA_ARGS__) +#endif + +OSHMEM_DECLSPEC void shmem_put8_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_put16_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_put32_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_put64_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_put128_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void shmem_ctx_put8_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put16_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put32_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put64_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put128_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + +OSHMEM_DECLSPEC void shmem_putmem_signal_nbi(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +OSHMEM_DECLSPEC void shmem_ctx_putmem_signal_nbi(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + + +OSHMEM_DECLSPEC uint64_t shmem_signal_fetch(const uint64_t *sig_addr); + + /* * Elemental get routines */ @@ -996,6 +1780,12 @@ OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_swap(shmem_ctx_t ctx, unsig OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_swap(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC float shmem_ctx_float_atomic_swap(shmem_ctx_t ctx, float *target, float value, int pe); OSHMEM_DECLSPEC double shmem_ctx_double_atomic_swap(shmem_ctx_t ctx, double *target, double value, int pe); +OSHMEM_DECLSPEC int32_t shmem_ctx_int32_atomic_swap(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_ctx_int64_atomic_swap(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_ctx_uint32_atomic_swap(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_ctx_uint64_atomic_swap(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t shmem_ctx_size_atomic_swap(shmem_ctx_t ctx, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ctx_ptrdiff_atomic_swap(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t value, int pe); OSHMEM_DECLSPEC int shmem_int_atomic_swap(int *target, int value, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_swap(long *target, long value, int pe); @@ -1005,6 +1795,12 @@ OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_swap(unsigned long *target, uns OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_swap(unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC float shmem_float_atomic_swap(float *target, float value, int pe); OSHMEM_DECLSPEC double shmem_double_atomic_swap(double *target, double value, int pe); +OSHMEM_DECLSPEC int32_t shmem_int32_atomic_swap(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_int64_atomic_swap(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_uint32_atomic_swap(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_uint64_atomic_swap(uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t shmem_size_atomic_swap(size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ptrdiff_atomic_swap(ptrdiff_t *target, ptrdiff_t value, int pe); #if OSHMEM_HAVE_C11 #define shmem_atomic_swap(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1052,6 +1848,12 @@ OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_set(shmem_ctx_t ctx, unsigned long * OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_set(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_float_atomic_set(shmem_ctx_t ctx, float *target, float value, int pe); OSHMEM_DECLSPEC void shmem_ctx_double_atomic_set(shmem_ctx_t ctx, double *target, double value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_set(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_set(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_set(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_set(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_atomic_set(shmem_ctx_t ctx, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_atomic_set(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t value, int pe); OSHMEM_DECLSPEC void shmem_int_atomic_set(int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_long_atomic_set(long *target, long value, int pe); @@ -1061,6 +1863,13 @@ OSHMEM_DECLSPEC void shmem_ulong_atomic_set(unsigned long *target, unsigned long OSHMEM_DECLSPEC void shmem_ulonglong_atomic_set(unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void shmem_float_atomic_set(float *target, float value, int pe); OSHMEM_DECLSPEC void shmem_double_atomic_set(double *target, double value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_set(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_set(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_set(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_set(uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_size_atomic_set(size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_atomic_set(ptrdiff_t *target, ptrdiff_t value, int pe); + #if OSHMEM_HAVE_C11 #define shmem_atomic_set(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1106,6 +1915,12 @@ OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_compare_swap(shmem_ctx_t ctx OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_compare_swap(shmem_ctx_t ctx, unsigned int *target, unsigned int cond, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_compare_swap(shmem_ctx_t ctx, unsigned long *target, unsigned long cond, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_compare_swap(shmem_ctx_t ctx, unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t shmem_ctx_int32_atomic_compare_swap(shmem_ctx_t ctx, int32_t *target, int32_t cond, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_ctx_int64_atomic_compare_swap(shmem_ctx_t ctx, int64_t *target, int64_t cond, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_ctx_uint32_atomic_compare_swap(shmem_ctx_t ctx, uint32_t *target, uint32_t cond, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_ctx_uint64_atomic_compare_swap(shmem_ctx_t ctx, uint64_t *target, uint64_t cond, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t shmem_ctx_size_atomic_compare_swap(shmem_ctx_t ctx, size_t *target, size_t cond, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ctx_ptrdiff_atomic_compare_swap(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t cond, ptrdiff_t value, int pe); OSHMEM_DECLSPEC int shmem_int_atomic_compare_swap(int *target, int cond, int value, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_compare_swap(long *target, long cond, long value, int pe); @@ -1113,6 +1928,12 @@ OSHMEM_DECLSPEC long long shmem_longlong_atomic_compare_swap(long long *target, OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_compare_swap(unsigned int *target, unsigned int cond, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_compare_swap(unsigned long *target, unsigned long cond, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_compare_swap(unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t shmem_int32_atomic_compare_swap(int32_t *target, int32_t cond, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_int64_atomic_compare_swap(int64_t *target, int64_t cond, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_uint32_atomic_compare_swap(uint32_t *target, uint32_t cond, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_uint64_atomic_compare_swap(uint64_t *target, uint64_t cond, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t shmem_size_atomic_compare_swap(size_t *target, size_t cond, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ptrdiff_atomic_compare_swap(ptrdiff_t *target, ptrdiff_t cond, ptrdiff_t value, int pe); #if OSHMEM_HAVE_C11 #define shmem_atomic_compare_swap(...) \ @@ -1152,6 +1973,12 @@ OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch_add(shmem_ctx_t ctx, l OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_add(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_add(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_add(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t shmem_ctx_int32_atomic_fetch_add(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_ctx_int64_atomic_fetch_add(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_ctx_uint32_atomic_fetch_add(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_ctx_uint64_atomic_fetch_add(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t shmem_ctx_size_atomic_fetch_add(shmem_ctx_t ctx, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ctx_ptrdiff_atomic_fetch_add(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t value, int pe); OSHMEM_DECLSPEC int shmem_int_atomic_fetch_add(int *target, int value, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_fetch_add(long *target, long value, int pe); @@ -1159,6 +1986,12 @@ OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch_add(long long *target, lon OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_add(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_add(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_add(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t shmem_int32_atomic_fetch_add(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_int64_atomic_fetch_add(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_uint32_atomic_fetch_add(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_uint64_atomic_fetch_add(uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC size_t shmem_size_atomic_fetch_add(size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ptrdiff_atomic_fetch_add(ptrdiff_t *target, ptrdiff_t value, int pe); #if OSHMEM_HAVE_C11 #define shmem_atomic_fetch_add(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1321,6 +2154,12 @@ OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch(shmem_ctx_t ctx, cons OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch(shmem_ctx_t ctx, const unsigned long long *target, int pe); OSHMEM_DECLSPEC float shmem_ctx_float_atomic_fetch(shmem_ctx_t ctx, const float *target, int pe); OSHMEM_DECLSPEC double shmem_ctx_double_atomic_fetch(shmem_ctx_t ctx, const double *target, int pe); +OSHMEM_DECLSPEC int32_t shmem_ctx_int32_atomic_fetch(shmem_ctx_t ctx, const int32_t *target, int pe); +OSHMEM_DECLSPEC int64_t shmem_ctx_int64_atomic_fetch(shmem_ctx_t ctx, const int64_t *target, int pe); +OSHMEM_DECLSPEC uint32_t shmem_ctx_uint32_atomic_fetch(shmem_ctx_t ctx, const uint32_t *target, int pe); +OSHMEM_DECLSPEC uint64_t shmem_ctx_uint64_atomic_fetch(shmem_ctx_t ctx, const uint64_t *target, int pe); +OSHMEM_DECLSPEC size_t shmem_ctx_size_atomic_fetch(shmem_ctx_t ctx, const size_t *target, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ctx_ptrdiff_atomic_fetch(shmem_ctx_t ctx, const ptrdiff_t *target, int pe); OSHMEM_DECLSPEC int shmem_int_atomic_fetch(const int *target, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_fetch(const long *target, int pe); @@ -1330,6 +2169,12 @@ OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch(const unsigned long *targ OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch(const unsigned long long *target, int pe); OSHMEM_DECLSPEC float shmem_float_atomic_fetch(const float *target, int pe); OSHMEM_DECLSPEC double shmem_double_atomic_fetch(const double *target, int pe); +OSHMEM_DECLSPEC int32_t shmem_int32_atomic_fetch(const int32_t *target, int pe); +OSHMEM_DECLSPEC int64_t shmem_int64_atomic_fetch(const int64_t *target, int pe); +OSHMEM_DECLSPEC uint32_t shmem_uint32_atomic_fetch(const uint32_t *target, int pe); +OSHMEM_DECLSPEC uint64_t shmem_uint64_atomic_fetch(const uint64_t *target, int pe); +OSHMEM_DECLSPEC size_t shmem_size_atomic_fetch(const size_t *target, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ptrdiff_atomic_fetch(const ptrdiff_t *target, int pe); #if OSHMEM_HAVE_C11 #define shmem_atomic_fetch(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1375,6 +2220,12 @@ OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch_inc(shmem_ctx_t ctx, l OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_inc(shmem_ctx_t ctx, unsigned int *target, int pe); OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_inc(shmem_ctx_t ctx, unsigned long *target, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_inc(shmem_ctx_t ctx, unsigned long long *target, int pe); +OSHMEM_DECLSPEC int32_t shmem_ctx_int32_atomic_fetch_inc(shmem_ctx_t ctx, int32_t *target, int pe); +OSHMEM_DECLSPEC int64_t shmem_ctx_int64_atomic_fetch_inc(shmem_ctx_t ctx, int64_t *target, int pe); +OSHMEM_DECLSPEC uint32_t shmem_ctx_uint32_atomic_fetch_inc(shmem_ctx_t ctx, uint32_t *target, int pe); +OSHMEM_DECLSPEC uint64_t shmem_ctx_uint64_atomic_fetch_inc(shmem_ctx_t ctx, uint64_t *target, int pe); +OSHMEM_DECLSPEC size_t shmem_ctx_size_atomic_fetch_inc(shmem_ctx_t ctx, size_t *target, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ctx_ptrdiff_atomic_fetch_inc(shmem_ctx_t ctx, ptrdiff_t *target, int pe); OSHMEM_DECLSPEC int shmem_int_atomic_fetch_inc(int *target, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_fetch_inc(long *target, int pe); @@ -1382,6 +2233,13 @@ OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch_inc(long long *target, int OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_inc(unsigned int *target, int pe); OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_inc(unsigned long *target, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_inc(unsigned long long *target, int pe); +OSHMEM_DECLSPEC int32_t shmem_int32_atomic_fetch_inc(int32_t *target, int pe); +OSHMEM_DECLSPEC int64_t shmem_int64_atomic_fetch_inc(int64_t *target, int pe); +OSHMEM_DECLSPEC uint32_t shmem_uint32_atomic_fetch_inc(uint32_t *target, int pe); +OSHMEM_DECLSPEC uint64_t shmem_uint64_atomic_fetch_inc(uint64_t *target, int pe); +OSHMEM_DECLSPEC size_t shmem_size_atomic_fetch_inc(size_t *target, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ptrdiff_atomic_fetch_inc(ptrdiff_t *target, int pe); + #if OSHMEM_HAVE_C11 #define shmem_atomic_fetch_inc(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1419,6 +2277,12 @@ OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_add(shmem_ctx_t ctx, long long *t OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_add(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_add(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_add(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_add(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_add(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_add(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_add(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_atomic_add(shmem_ctx_t ctx, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_atomic_add(shmem_ctx_t ctx, ptrdiff_t *target, ptrdiff_t value, int pe); OSHMEM_DECLSPEC void shmem_int_atomic_add(int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_long_atomic_add(long *target, long value, int pe); @@ -1426,6 +2290,12 @@ OSHMEM_DECLSPEC void shmem_longlong_atomic_add(long long *target, long long valu OSHMEM_DECLSPEC void shmem_uint_atomic_add(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ulong_atomic_add(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ulonglong_atomic_add(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_add(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_add(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_add(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_add(uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_size_atomic_add(size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_atomic_add(ptrdiff_t *target, ptrdiff_t value, int pe); #if OSHMEM_HAVE_C11 #define shmem_atomic_add(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1589,6 +2459,12 @@ OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_inc(shmem_ctx_t ctx, long long *t OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_inc(shmem_ctx_t ctx, unsigned int *target, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_inc(shmem_ctx_t ctx, unsigned long *target, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_inc(shmem_ctx_t ctx, unsigned long long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_inc(shmem_ctx_t ctx, int32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_inc(shmem_ctx_t ctx, int64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_inc(shmem_ctx_t ctx, uint32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_inc(shmem_ctx_t ctx, uint64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_atomic_inc(shmem_ctx_t ctx, size_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_atomic_inc(shmem_ctx_t ctx, ptrdiff_t *target, int pe); OSHMEM_DECLSPEC void shmem_int_atomic_inc(int *target, int pe); OSHMEM_DECLSPEC void shmem_long_atomic_inc(long *target, int pe); @@ -1596,6 +2472,12 @@ OSHMEM_DECLSPEC void shmem_longlong_atomic_inc(long long *target, int pe); OSHMEM_DECLSPEC void shmem_uint_atomic_inc(unsigned int *target, int pe); OSHMEM_DECLSPEC void shmem_ulong_atomic_inc(unsigned long *target, int pe); OSHMEM_DECLSPEC void shmem_ulonglong_atomic_inc(unsigned long long *target, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_inc(int32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_inc(int64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_inc(uint32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_inc(uint64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_size_atomic_inc(size_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_atomic_inc(ptrdiff_t *target, int pe); #if OSHMEM_HAVE_C11 #define shmem_atomic_inc(...) \ _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ @@ -1626,6 +2508,372 @@ OSHMEM_DECLSPEC void shmem_longlong_inc(long long *target, int pe); long long*: shmem_longlong_inc)(dst, pe) #endif + +/* + * Nonblocking Atomic Memroy Operations + */ + +/* Atomic Nonblocking Fetch */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_fetch_nbi(shmem_ctx_t ctx, int *fetch, const int *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_fetch_nbi(shmem_ctx_t ctx, long *fetch, const long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_fetch_nbi(shmem_ctx_t ctx, long long *fetch, const long long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_fetch_nbi(shmem_ctx_t ctx, unsigned int *fetch, const unsigned int *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_fetch_nbi(shmem_ctx_t ctx, unsigned long *fetch, const unsigned long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_fetch_nbi(shmem_ctx_t ctx, unsigned long long *fetch, const unsigned long long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_atomic_fetch_nbi(shmem_ctx_t ctx, float *fetch, const float *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_atomic_fetch_nbi(shmem_ctx_t ctx, double *fetch, const double *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_fetch_nbi(shmem_ctx_t ctx, int32_t *fetch, const int32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_fetch_nbi(shmem_ctx_t ctx, int64_t *fetch, const int64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_fetch_nbi(shmem_ctx_t ctx, uint32_t *fetch, const uint32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_fetch_nbi(shmem_ctx_t ctx, uint64_t *fetch, const uint64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_atomic_fetch_nbi(shmem_ctx_t ctx, size_t *fetch, const size_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_atomic_fetch_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, const ptrdiff_t *target, int pe); + +OSHMEM_DECLSPEC void shmem_int_atomic_fetch_nbi(int *fetch, const int *target, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_fetch_nbi(long *fetch, const long *target, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_fetch_nbi(long long *fetch, const long long *target, int pe); +OSHMEM_DECLSPEC void shmem_uint_atomic_fetch_nbi(unsigned int *fetch, const unsigned int *target, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_fetch_nbi(unsigned long *fetch, const unsigned long *target, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_fetch_nbi(unsigned long long *fetch, const unsigned long long *target, int pe); +OSHMEM_DECLSPEC void shmem_float_atomic_fetch_nbi(float *fetch, const float *target, int pe); +OSHMEM_DECLSPEC void shmem_double_atomic_fetch_nbi(double *fetch, const double *target, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_fetch_nbi(int32_t *fetch, const int32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_fetch_nbi(int64_t *fetch, const int64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_fetch_nbi(uint32_t *fetch, const uint32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_fetch_nbi(uint64_t *fetch, const uint64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_size_atomic_fetch_nbi(size_t *fetch, const size_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_atomic_fetch_nbi(ptrdiff_t *fetch, const ptrdiff_t *target, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_fetch_nbi, \ + long*: shmem_ctx_long_atomic_fetch_nbi, \ + long long*: shmem_ctx_longlong_atomic_fetch_nbi, \ + unsigned int*: shmem_ctx_uint_atomic_fetch_nbi, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_nbi, \ + float*: shmem_ctx_float_atomic_fetch_nbi, \ + double*: shmem_ctx_double_atomic_fetch_nbi, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_fetch_nbi, \ + long*: shmem_long_atomic_fetch_nbi, \ + long long*: shmem_longlong_atomic_fetch_nbi, \ + unsigned int*: shmem_uint_atomic_fetch_nbi, \ + unsigned long*: shmem_ulong_atomic_fetch_nbi, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_nbi, \ + float*: shmem_float_atomic_fetch_nbi, \ + double*: shmem_double_atomic_fetch_nbi)(__VA_ARGS__) +#endif + + +/* Atomic Nonblocking Compare and Swap */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_compare_swap_nbi(shmem_ctx_t ctx, int *fetch, int *target, int cond, int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_compare_swap_nbi(shmem_ctx_t ctx, long *fetch, long *target, long cond, long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_compare_swap_nbi(shmem_ctx_t ctx, long long *fetch, long long *target, long long cond, long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_compare_swap_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int cond, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_compare_swap_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long cond, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_compare_swap_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_compare_swap_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t cond, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_compare_swap_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t cond, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_compare_swap_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t cond, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_compare_swap_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t cond, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_atomic_compare_swap_nbi(shmem_ctx_t ctx, size_t *fetch, size_t *target, size_t cond, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_atomic_compare_swap_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t cond, ptrdiff_t value, int pe); + +OSHMEM_DECLSPEC void shmem_int_atomic_compare_swap_nbi(int *fetch, int *target, int cond, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_compare_swap_nbi(long *fetch, long *target, long cond, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_compare_swap_nbi(long long *fetch, long long *target, long long cond, long long value, int pe); +OSHMEM_DECLSPEC void shmem_uint_atomic_compare_swap_nbi(unsigned int *fetch, unsigned int *target, unsigned int cond, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_compare_swap_nbi(unsigned long *fetch, unsigned long *target, unsigned long cond, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_compare_swap_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_compare_swap_nbi(int32_t *fetch, int32_t *target, int32_t cond, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_compare_swap_nbi(int64_t *fetch, int64_t *target, int64_t cond, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_compare_swap_nbi(uint32_t *fetch, uint32_t *target, uint32_t cond, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_compare_swap_nbi(uint64_t *fetch, uint64_t *target, uint64_t cond, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_size_atomic_compare_swap_nbi(size_t *fetch, size_t *target, size_t cond, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_atomic_compare_swap_nbi(ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t cond, ptrdiff_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_compare_swap_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_compare_swap_nbi, \ + long*: shmem_ctx_long_atomic_compare_swap_nbi, \ + long long*: shmem_ctx_longlong_atomic_compare_swap_nbi, \ + unsigned int*: shmem_ctx_uint_atomic_compare_swap_nbi, \ + unsigned long*: shmem_ctx_ulong_atomic_compare_swap_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_compare_swap_nbi, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_compare_swap_nbi, \ + long*: shmem_long_atomic_compare_swap_nbi, \ + long long*: shmem_longlong_atomic_compare_swap_nbi, \ + unsigned int*: shmem_uint_atomic_compare_swap_nbi, \ + unsigned long*: shmem_ulong_atomic_compare_swap_nbi, \ + unsigned long long*: shmem_ulonglong_atomic_compare_swap_nbi)(__VA_ARGS__) +#endif + +/* Atomic Nonblocking Swap */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_swap_nbi(shmem_ctx_t ctx, int *fetch, int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_swap_nbi(shmem_ctx_t ctx, long *fetch, long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_swap_nbi(shmem_ctx_t ctx, long long *fetch, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_swap_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_swap_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_swap_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_atomic_swap_nbi(shmem_ctx_t ctx, float *fetch, float *target, float value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_atomic_swap_nbi(shmem_ctx_t ctx, double *fetch, double *target, double value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_swap_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_swap_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_swap_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_swap_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_atomic_swap_nbi(shmem_ctx_t ctx, size_t *fetch, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_atomic_swap_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t value, int pe); + +OSHMEM_DECLSPEC void shmem_int_atomic_swap_nbi(int *fetch, int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_swap_nbi(long *fetch, long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_swap_nbi(long long *fetch, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_uint_atomic_swap_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_swap_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_swap_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_float_atomic_swap_nbi(float *fetch, float *target, float value, int pe); +OSHMEM_DECLSPEC void shmem_double_atomic_swap_nbi(double *fetch, double *target, double value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_swap_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_swap_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_swap_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_swap_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_size_atomic_swap_nbi(size_t *fetch, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_atomic_swap_nbi(ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_swap_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_swap_nbi, \ + long*: shmem_ctx_long_atomic_swap_nbi, \ + long long*: shmem_ctx_longlong_atomic_swap_nbi, \ + unsigned int*: shmem_ctx_uint_atomic_swap_nbi, \ + unsigned long*: shmem_ctx_ulong_atomic_swap_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_swap_nbi, \ + float*: shmem_ctx_float_atomic_swap_nbi, \ + double*: shmem_ctx_double_atomic_swap_nbi, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_swap_nbi, \ + long*: shmem_long_atomic_swap_nbi, \ + long long*: shmem_longlong_atomic_swap_nbi, \ + unsigned int*: shmem_uint_atomic_swap_nbi, \ + unsigned long*: shmem_ulong_atomic_swap_nbi, \ + unsigned long long*: shmem_ulonglong_atomic_swap_nbi, \ + float*: shmem_float_atomic_swap_nbi, \ + double*: shmem_double_atomic_swap_nbi)(__VA_ARGS__) +#endif + + + +/* Atomic Nonblocking Fetch and Increment */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_fetch_inc_nbi(shmem_ctx_t ctx, int *fetch, int *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_fetch_inc_nbi(shmem_ctx_t ctx, long *fetch, long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_fetch_inc_nbi(shmem_ctx_t ctx, long long *fetch, long long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_fetch_inc_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_fetch_inc_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_fetch_inc_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_fetch_inc_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_fetch_inc_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_fetch_inc_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_fetch_inc_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_atomic_fetch_inc_nbi(shmem_ctx_t ctx, size_t *fetch, size_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_atomic_fetch_inc_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, ptrdiff_t *target, int pe); + +OSHMEM_DECLSPEC void shmem_int_atomic_fetch_inc_nbi(int *fetch, int *target, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_fetch_inc_nbi(long *fetch, long *target, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_fetch_inc_nbi(long long *fetch, long long *target, int pe); +OSHMEM_DECLSPEC void shmem_uint_atomic_fetch_inc_nbi(unsigned int *fetch, unsigned int *target, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_fetch_inc_nbi(unsigned long *fetch, unsigned long *target, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_fetch_inc_nbi(unsigned long long *fetch, unsigned long long *target, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_fetch_inc_nbi(int32_t *fetch, int32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_fetch_inc_nbi(int64_t *fetch, int64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_fetch_inc_nbi(uint32_t *fetch, uint32_t *target, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_fetch_inc_nbi(uint64_t *fetch, uint64_t *target, int pe); +OSHMEM_DECLSPEC void shmem_size_atomic_fetch_inc_nbi(size_t *fetch, size_t *target, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_atomic_fetch_inc_nbi(ptrdiff_t *fetch, ptrdiff_t *target, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_inc_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_fetch_inc_nbi, \ + long*: shmem_ctx_long_atomic_fetch_inc_nbi, \ + long long*: shmem_ctx_longlong_atomic_fetch_inc_nbi, \ + unsigned int*: shmem_ctx_uint_atomic_fetch_inc_nbi, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_inc_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_inc_nbi, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_fetch_inc_nbi, \ + long*: shmem_long_atomic_fetch_inc_nbi, \ + long long*: shmem_longlong_atomic_fetch_inc_nbi, \ + unsigned int*: shmem_uint_atomic_fetch_inc_nbi, \ + unsigned long*: shmem_ulong_atomic_fetch_inc_nbi, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_inc_nbi)(__VA_ARGS__) +#endif + + +/* Atomic Nonblocking Fetch and Add */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_fetch_add_nbi(shmem_ctx_t ctx, int *fetch, int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_fetch_add_nbi(shmem_ctx_t ctx, long *fetch, long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_fetch_add_nbi(shmem_ctx_t ctx, long long *fetch, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_fetch_add_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_fetch_add_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_fetch_add_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_fetch_add_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_fetch_add_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_fetch_add_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_fetch_add_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_atomic_fetch_add_nbi(shmem_ctx_t ctx, size_t *fetch, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_atomic_fetch_add_nbi(shmem_ctx_t ctx, ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t value, int pe); + +OSHMEM_DECLSPEC void shmem_int_atomic_fetch_add_nbi(int *fetch, int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_fetch_add_nbi(long *fetch, long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_fetch_add_nbi(long long *fetch, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_uint_atomic_fetch_add_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_fetch_add_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_fetch_add_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_fetch_add_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_fetch_add_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_fetch_add_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_fetch_add_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_size_atomic_fetch_add_nbi(size_t *fetch, size_t *target, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_atomic_fetch_add_nbi(ptrdiff_t *fetch, ptrdiff_t *target, ptrdiff_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_add_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_fetch_add_nbi, \ + long*: shmem_ctx_long_atomic_fetch_add_nbi, \ + long long*: shmem_ctx_longlong_atomic_fetch_add_nbi, \ + unsigned int*: shmem_ctx_uint_atomic_fetch_add_nbi, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_add_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_add_nbi, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_fetch_add_nbi, \ + long*: shmem_long_atomic_fetch_add_nbi, \ + long long*: shmem_longlong_atomic_fetch_add_nbi, \ + unsigned int*: shmem_uint_atomic_fetch_add_nbi, \ + unsigned long*: shmem_ulong_atomic_fetch_add_nbi, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_add_nbi)(__VA_ARGS__) +#endif + + +/* Atomic Nonblocking Fetch and And */ +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_fetch_and_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_fetch_and_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_fetch_and_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_fetch_and_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_fetch_and_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_fetch_and_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_fetch_and_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void shmem_uint_atomic_fetch_and_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_fetch_and_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_fetch_and_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_fetch_and_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_fetch_and_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_fetch_and_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_fetch_and_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_and_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned int*: shmem_ctx_uint_atomic_fetch_and_nbi, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_and_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_and_nbi, \ + int32_t*: shmem_ctx_int32_atomic_fetch_and_nbi, \ + int64_t*: shmem_ctx_int64_atomic_fetch_and_nbi, \ + default: __oshmem_datatype_ignore), \ + unsigned int*: shmem_uint_atomic_fetch_and_nbi, \ + unsigned long*: shmem_ulong_atomic_fetch_and_nbi, \ + int32_t*: shmem_int32_atomic_fetch_and_nbi, \ + int64_t*: shmem_int64_atomic_fetch_and_nbi, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_and_nbi)(__VA_ARGS__) +#endif + +/* Atomic Nonblocking Fetch and OR */ +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_fetch_or_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_fetch_or_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_fetch_or_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_fetch_or_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_fetch_or_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_fetch_or_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_fetch_or_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void shmem_uint_atomic_fetch_or_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_fetch_or_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_fetch_or_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_fetch_or_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_fetch_or_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_fetch_or_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_fetch_or_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_or_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned int*: shmem_ctx_uint_atomic_fetch_or_nbi, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_or_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_or_nbi, \ + int32_t*: shmem_ctx_int32_atomic_fetch_or_nbi, \ + int64_t*: shmem_ctx_int64_atomic_fetch_or_nbi, \ + default: __oshmem_datatype_ignore), \ + unsigned int*: shmem_uint_atomic_fetch_or_nbi, \ + unsigned long*: shmem_ulong_atomic_fetch_or_nbi, \ + int32_t*: shmem_int32_atomic_fetch_or_nbi, \ + int64_t*: shmem_int64_atomic_fetch_or_nbi, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_or_nbi)(__VA_ARGS__) +#endif + + +/* Atomic Nonblocking Fetch and XOR */ +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_fetch_xor_nbi(shmem_ctx_t ctx, unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_fetch_xor_nbi(shmem_ctx_t ctx, unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_fetch_xor_nbi(shmem_ctx_t ctx, unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_fetch_xor_nbi(shmem_ctx_t ctx, int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_fetch_xor_nbi(shmem_ctx_t ctx, int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_fetch_xor_nbi(shmem_ctx_t ctx, uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_fetch_xor_nbi(shmem_ctx_t ctx, uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void shmem_uint_atomic_fetch_xor_nbi(unsigned int *fetch, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_fetch_xor_nbi(unsigned long *fetch, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_fetch_xor_nbi(unsigned long long *fetch, unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_fetch_xor_nbi(int32_t *fetch, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_fetch_xor_nbi(int64_t *fetch, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_fetch_xor_nbi(uint32_t *fetch, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_fetch_xor_nbi(uint64_t *fetch, uint64_t *target, uint64_t value, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_xor_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + unsigned int*: shmem_ctx_uint_atomic_fetch_xor_nbi, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_xor_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_xor_nbi, \ + int32_t*: shmem_ctx_int32_atomic_fetch_xor_nbi, \ + int64_t*: shmem_ctx_int64_atomic_fetch_xor_nbi, \ + default: __oshmem_datatype_ignore), \ + unsigned int*: shmem_uint_atomic_fetch_xor_nbi, \ + unsigned long*: shmem_ulong_atomic_fetch_xor_nbi, \ + int32_t*: shmem_int32_atomic_fetch_xor_nbi, \ + int64_t*: shmem_int64_atomic_fetch_xor_nbi, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_xor_nbi)(__VA_ARGS__) +#endif + +/* + * + * Control of profiling + * + */ +OSHMEM_DECLSPEC void shmem_pcontrol(int level, ...); + /* * Lock functions */ @@ -1669,6 +2917,170 @@ OSHMEM_DECLSPEC void shmem_ptrdiff_wait_until(volatile ptrdiff_t *addr, int cmp unsigned long long*: shmem_ulonglong_wait_until)(addr, cmp, value) #endif +OSHMEM_DECLSPEC void shmem_short_wait_until_all(volatile short *ivars, size_t nelems, const int *status, int cmp, short value); +OSHMEM_DECLSPEC void shmem_ushort_wait_until_all(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC void shmem_int_wait_until_all(volatile int *ivars, size_t nelems, const int *status, int cmp, int value); +OSHMEM_DECLSPEC void shmem_long_wait_until_all(volatile long *ivars, size_t nelems, const int *status, int cmp, long value); +OSHMEM_DECLSPEC void shmem_longlong_wait_until_all(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC void shmem_uint_wait_until_all(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC void shmem_ulong_wait_until_all(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC void shmem_ulonglong_wait_until_all(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC void shmem_int32_wait_until_all(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC void shmem_int64_wait_until_all(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC void shmem_uint32_wait_until_all(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC void shmem_uint64_wait_until_all(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC void shmem_size_wait_until_all(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC void shmem_ptrdiff_wait_until_all(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define shmem_wait_until_all(ivars, nelems, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: shmem_short_wait_until_all, \ + unsigned short*: shmem_ushort_wait_until_all, \ + int*: shmem_int_wait_until_all, \ + long*: shmem_long_wait_until_all, \ + long long*: shmem_longlong_wait_until_all, \ + unsigned int*: shmem_uint_wait_until_all, \ + unsigned long*: shmem_ulong_wait_until_all, \ + unsigned long long*: shmem_ulonglong_wait_until_all)(ivars, nelems, status, cmp, value) +#endif + +OSHMEM_DECLSPEC size_t shmem_short_wait_until_any(volatile short *ivars, size_t nelems, const int *status, int cmp, short value); +OSHMEM_DECLSPEC size_t shmem_ushort_wait_until_any(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC size_t shmem_int_wait_until_any(volatile int *ivars, size_t nelems, const int *status, int cmp, int value); +OSHMEM_DECLSPEC size_t shmem_long_wait_until_any(volatile long *ivars, size_t nelems, const int *status, int cmp, long value); +OSHMEM_DECLSPEC size_t shmem_longlong_wait_until_any(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC size_t shmem_uint_wait_until_any(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC size_t shmem_ulong_wait_until_any(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC size_t shmem_ulonglong_wait_until_any(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC size_t shmem_int32_wait_until_any(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC size_t shmem_int64_wait_until_any(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC size_t shmem_uint32_wait_until_any(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC size_t shmem_uint64_wait_until_any(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC size_t shmem_size_wait_until_any(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC size_t shmem_ptrdiff_wait_until_any(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define shmem_wait_until_any(ivars, nelems, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: shmem_short_wait_until_any, \ + unsigned short*: shmem_ushort_wait_until_any, \ + int*: shmem_int_wait_until_any, \ + long*: shmem_long_wait_until_any, \ + long long*: shmem_longlong_wait_until_any, \ + unsigned int*: shmem_uint_wait_until_any, \ + unsigned long*: shmem_ulong_wait_until_any, \ + unsigned long long*: shmem_ulonglong_wait_until_any)(ivars, nelems, status, cmp, value) +#endif + +OSHMEM_DECLSPEC size_t shmem_short_wait_until_some(volatile short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, short value); +OSHMEM_DECLSPEC size_t shmem_ushort_wait_until_some(volatile unsigned short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC size_t shmem_int_wait_until_some(volatile int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int value); +OSHMEM_DECLSPEC size_t shmem_long_wait_until_some(volatile long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long value); +OSHMEM_DECLSPEC size_t shmem_longlong_wait_until_some(volatile long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC size_t shmem_uint_wait_until_some(volatile unsigned int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC size_t shmem_ulong_wait_until_some(volatile unsigned long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC size_t shmem_ulonglong_wait_until_some(volatile unsigned long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC size_t shmem_int32_wait_until_some(volatile int32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC size_t shmem_int64_wait_until_some(volatile int64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC size_t shmem_uint32_wait_until_some(volatile uint32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC size_t shmem_uint64_wait_until_some(volatile uint64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC size_t shmem_size_wait_until_some(volatile size_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC size_t shmem_ptrdiff_wait_until_some(volatile ptrdiff_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define shmem_wait_until_some(ivars, nelems, indices, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: shmem_short_wait_until_some, \ + unsigned short*: shmem_ushort_wait_until_some, \ + int*: shmem_int_wait_until_some, \ + long*: shmem_long_wait_until_some, \ + long long*: shmem_longlong_wait_until_some, \ + unsigned int*: shmem_uint_wait_until_some, \ + unsigned long*: shmem_ulong_wait_until_some, \ + unsigned long long*: shmem_ulonglong_wait_until_some)(ivars, nelems, indices, status, cmp, value) +#endif + +OSHMEM_DECLSPEC void shmem_short_wait_until_all_vector(volatile short *ivars, size_t nelems, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC void shmem_ushort_wait_until_all_vector(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC void shmem_int_wait_until_all_vector(volatile int *ivars, size_t nelems, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC void shmem_long_wait_until_all_vector(volatile long *ivars, size_t nelems, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC void shmem_longlong_wait_until_all_vector(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC void shmem_uint_wait_until_all_vector(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC void shmem_ulong_wait_until_all_vector(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC void shmem_ulonglong_wait_until_all_vector(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC void shmem_int32_wait_until_all_vector(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC void shmem_int64_wait_until_all_vector(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC void shmem_uint32_wait_until_all_vector(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC void shmem_uint64_wait_until_all_vector(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC void shmem_size_wait_until_all_vector(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC void shmem_ptrdiff_wait_until_all_vector(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define shmem_wait_until_all_vector(ivars, nelems, status, cmp, values) \ + _Generic(&*(ivars), \ + short*: shmem_short_wait_until_all_vector, \ + unsigned short*: shmem_ushort_wait_until_all_vector, \ + int*: shmem_int_wait_until_all_vector, \ + long*: shmem_long_wait_until_all_vector, \ + long long*: shmem_longlong_wait_until_all_vector, \ + unsigned int*: shmem_uint_wait_until_all_vector, \ + unsigned long*: shmem_ulong_wait_until_all_vector, \ + unsigned long long*: shmem_ulonglong_wait_until_all_vector)(ivars, nelems, status, cmp, values) +#endif + + +OSHMEM_DECLSPEC size_t shmem_short_wait_until_any_vector(volatile short *ivars, size_t nelems, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC size_t shmem_ushort_wait_until_any_vector(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC size_t shmem_int_wait_until_any_vector(volatile int *ivars, size_t nelems, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC size_t shmem_long_wait_until_any_vector(volatile long *ivars, size_t nelems, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC size_t shmem_longlong_wait_until_any_vector(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC size_t shmem_uint_wait_until_any_vector(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC size_t shmem_ulong_wait_until_any_vector(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC size_t shmem_ulonglong_wait_until_any_vector(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC size_t shmem_int32_wait_until_any_vector(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC size_t shmem_int64_wait_until_any_vector(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC size_t shmem_uint32_wait_until_any_vector(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC size_t shmem_uint64_wait_until_any_vector(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC size_t shmem_size_wait_until_any_vector(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC size_t shmem_ptrdiff_wait_until_any_vector(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define shmem_wait_until_any_vector(ivars, nelems, status, cmp, values) \ + _Generic(&*(ivars), \ + short*: shmem_short_wait_until_any_vector, \ + unsigned short*: shmem_ushort_wait_until_any_vector, \ + int*: shmem_int_wait_until_any_vector, \ + long*: shmem_long_wait_until_any_vector, \ + long long*: shmem_longlong_wait_until_any_vector, \ + unsigned int*: shmem_uint_wait_until_any_vector, \ + unsigned long*: shmem_ulong_wait_until_any_vector, \ + unsigned long long*: shmem_ulonglong_wait_until_any_vector)(ivars, nelems, status, cmp, values) +#endif + + +OSHMEM_DECLSPEC size_t shmem_short_wait_until_some_vector(volatile short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC size_t shmem_ushort_wait_until_some_vector(volatile unsigned short *ivars, size_t nelems, size_t *indices, const int *status , int cmp, unsigned short *values); +OSHMEM_DECLSPEC size_t shmem_int_wait_until_some_vector(volatile int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC size_t shmem_long_wait_until_some_vector(volatile long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC size_t shmem_longlong_wait_until_some_vector(volatile long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC size_t shmem_uint_wait_until_some_vector(volatile unsigned int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC size_t shmem_ulong_wait_until_some_vector(volatile unsigned long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC size_t shmem_ulonglong_wait_until_some_vector(volatile unsigned long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC size_t shmem_int32_wait_until_some_vector(volatile int32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC size_t shmem_int64_wait_until_some_vector(volatile int64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC size_t shmem_uint32_wait_until_some_vector(volatile uint32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC size_t shmem_uint64_wait_until_some_vector(volatile uint64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC size_t shmem_size_wait_until_some_vector(volatile size_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC size_t shmem_ptrdiff_wait_until_some_vector(volatile ptrdiff_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define shmem_wait_until_some_vector(ivars, nelems, indices, status, cmp, values)\ + _Generic(&*(ivars), \ + short*: shmem_short_wait_until_some_vector, \ + unsigned short*: shmem_ushort_wait_until_some_vector, \ + int*: shmem_int_wait_until_some_vector, \ + long*: shmem_long_wait_until_some_vector, \ + long long*: shmem_longlong_wait_until_some_vector, \ + unsigned int*: shmem_uint_wait_until_some_vector, \ + unsigned long*: shmem_ulong_wait_until_some_vector, \ + unsigned long long*: shmem_ulonglong_wait_until_some_vector)(ivars, nelems, indices, status, cmp, values) +#endif + OSHMEM_DECLSPEC int shmem_short_test(volatile short *addr, int cmp, short value); OSHMEM_DECLSPEC int shmem_int_test(volatile int *addr, int cmp, int value); OSHMEM_DECLSPEC int shmem_long_test(volatile long *addr, int cmp, long value); @@ -1696,24 +3108,193 @@ OSHMEM_DECLSPEC int shmem_ptrdiff_test(volatile ptrdiff_t *addr, int cmp, ptrdi unsigned long long*: shmem_ulonglong_test)(addr, cmp, value) #endif +OSHMEM_DECLSPEC int shmem_short_test_all(volatile short *ivars, size_t nelems, const int *status, int cmp, short value); +OSHMEM_DECLSPEC int shmem_ushort_test_all(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC int shmem_int_test_all(volatile int *ivars, size_t nelems, const int *status, int cmp, int value); +OSHMEM_DECLSPEC int shmem_long_test_all(volatile long *ivars, size_t nelems, const int *status, int cmp, long value); +OSHMEM_DECLSPEC int shmem_longlong_test_all(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC int shmem_uint_test_all(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC int shmem_ulong_test_all(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC int shmem_ulonglong_test_all(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC int shmem_int32_test_all(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC int shmem_int64_test_all(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC int shmem_uint32_test_all(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC int shmem_uint64_test_all(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC int shmem_size_test_all(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC int shmem_ptrdiff_test_all(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define shmem_test_all(ivars, nelems, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: shmem_short_test_all, \ + unsigned short*: shmem_ushort_test_all, \ + int*: shmem_int_test_all, \ + long*: shmem_long_test_all, \ + long long*: shmem_longlong_test_all, \ + unsigned int*: shmem_uint_test_all, \ + unsigned long*: shmem_ulong_test_all, \ + unsigned long long*: shmem_ulonglong_test_all)(ivars, nelems, status, cmp, value) +#endif + +OSHMEM_DECLSPEC size_t shmem_short_test_any(volatile short *ivars, size_t nelems, const int *status, int cmp, short value); +OSHMEM_DECLSPEC size_t shmem_ushort_test_any(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC size_t shmem_int_test_any(volatile int *ivars, size_t nelems, const int *status, int cmp, int value); +OSHMEM_DECLSPEC size_t shmem_long_test_any(volatile long *ivars, size_t nelems, const int *status, int cmp, long value); +OSHMEM_DECLSPEC size_t shmem_longlong_test_any(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC size_t shmem_uint_test_any(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC size_t shmem_ulong_test_any(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC size_t shmem_ulonglong_test_any(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC size_t shmem_int32_test_any(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC size_t shmem_int64_test_any(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC size_t shmem_uint32_test_any(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC size_t shmem_uint64_test_any(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC size_t shmem_size_test_any(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC size_t shmem_ptrdiff_test_any(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define shmem_test_any(ivars, nelems, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: shmem_short_test_any, \ + unsigned short*: shmem_ushort_test_any, \ + int*: shmem_int_test_any, \ + long*: shmem_long_test_any, \ + long long*: shmem_longlong_test_any, \ + unsigned int*: shmem_uint_test_any, \ + unsigned long*: shmem_ulong_test_any, \ + unsigned long long*: shmem_ulonglong_test_any)(ivars, nelems, status, cmp, value) +#endif + +OSHMEM_DECLSPEC size_t shmem_short_test_some(volatile short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, short value) ; +OSHMEM_DECLSPEC size_t shmem_ushort_test_some(volatile unsigned short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned short value); +OSHMEM_DECLSPEC size_t shmem_int_test_some(volatile int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int value); +OSHMEM_DECLSPEC size_t shmem_long_test_some(volatile long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long value); +OSHMEM_DECLSPEC size_t shmem_longlong_test_some(volatile long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long long value); +OSHMEM_DECLSPEC size_t shmem_uint_test_some(volatile unsigned int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned int value); +OSHMEM_DECLSPEC size_t shmem_ulong_test_some(volatile unsigned long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long value); +OSHMEM_DECLSPEC size_t shmem_ulonglong_test_some(volatile unsigned long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long long value); +OSHMEM_DECLSPEC size_t shmem_int32_test_some(volatile int32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int32_t value); +OSHMEM_DECLSPEC size_t shmem_int64_test_some(volatile int64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int64_t value); +OSHMEM_DECLSPEC size_t shmem_uint32_test_some(volatile uint32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint32_t value); +OSHMEM_DECLSPEC size_t shmem_uint64_test_some(volatile uint64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint64_t value); +OSHMEM_DECLSPEC size_t shmem_size_test_some(volatile size_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, size_t value); +OSHMEM_DECLSPEC size_t shmem_ptrdiff_test_some(volatile ptrdiff_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, ptrdiff_t value); +#if OSHMEM_HAVE_C11 +#define shmem_test_some(ivars, nelems, indices, status, cmp, value) \ + _Generic(&*(ivars), \ + short*: shmem_short_test_some, \ + unsigned short*: shmem_ushort_test_some, \ + int*: shmem_int_test_some, \ + long*: shmem_long_test_some, \ + long long*: shmem_longlong_test_some, \ + unsigned int*: shmem_uint_test_some, \ + unsigned long*: shmem_ulong_test_some, \ + unsigned long long*: shmem_ulonglong_test_some)(ivars, nelems, indices, status, cmp, value) +#endif + +OSHMEM_DECLSPEC int shmem_short_test_all_vector(volatile short *ivars, size_t nelems, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC int shmem_ushort_test_all_vector(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC int shmem_int_test_all_vector(volatile int *ivars, size_t nelems, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC int shmem_long_test_all_vector(volatile long *ivars, size_t nelems, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC int shmem_longlong_test_all_vector(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC int shmem_uint_test_all_vector(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC int shmem_ulong_test_all_vector(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC int shmem_ulonglong_test_all_vector(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC int shmem_int32_test_all_vector(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC int shmem_int64_test_all_vector(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC int shmem_uint32_test_all_vector(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC int shmem_uint64_test_all_vector(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC int shmem_size_test_all_vector(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC int shmem_ptrdiff_test_all_vector(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define shmem_test_all_vector(ivars, nelems, status, cmp, values) \ + _Generic(&*(ivars), \ + short*: shmem_short_test_all_vector, \ + unsigned short*: shmem_ushort_test_all_vector, \ + int*: shmem_int_test_all_vector, \ + long*: shmem_long_test_all_vector, \ + long long*: shmem_longlong_test_all_vector, \ + unsigned int*: shmem_uint_test_all_vector, \ + unsigned long*: shmem_ulong_test_all_vector, \ + unsigned long long*: shmem_ulonglong_test_all_vector)(ivars, nelems, status, cmp, values) +#endif + + +OSHMEM_DECLSPEC size_t shmem_short_test_any_vector(volatile short *ivars, size_t nelems, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC size_t shmem_ushort_test_any_vector(volatile unsigned short *ivars, size_t nelems, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC size_t shmem_int_test_any_vector(volatile int *ivars, size_t nelems, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC size_t shmem_long_test_any_vector(volatile long *ivars, size_t nelems, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC size_t shmem_longlong_test_any_vector(volatile long long *ivars, size_t nelems, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC size_t shmem_uint_test_any_vector(volatile unsigned int *ivars, size_t nelems, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC size_t shmem_ulong_test_any_vector(volatile unsigned long *ivars, size_t nelems, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC size_t shmem_ulonglong_test_any_vector(volatile unsigned long long *ivars, size_t nelems, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC size_t shmem_int32_test_any_vector(volatile int32_t *ivars, size_t nelems, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC size_t shmem_int64_test_any_vector(volatile int64_t *ivars, size_t nelems, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC size_t shmem_uint32_test_any_vector(volatile uint32_t *ivars, size_t nelems, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC size_t shmem_uint64_test_any_vector(volatile uint64_t *ivars, size_t nelems, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC size_t shmem_size_test_any_vector(volatile size_t *ivars, size_t nelems, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC size_t shmem_ptrdiff_test_any_vector(volatile ptrdiff_t *ivars, size_t nelems, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define shmem_test_any_vector(ivars, nelems, status, cmp, values) \ + _Generic(&*(ivars), \ + short*: shmem_short_test_any_vector, \ + unsigned short*: shmem_ushort_test_any_vector, \ + int*: shmem_int_test_any_vector, \ + long*: shmem_long_test_any_vector, \ + long long*: shmem_longlong_test_any_vector, \ + unsigned int*: shmem_uint_test_any_vector, \ + unsigned long*: shmem_ulong_test_any_vector, \ + unsigned long long*: shmem_ulonglong_test_any_vector)(ivars, nelems, status, cmp, values) +#endif + + +OSHMEM_DECLSPEC size_t shmem_short_test_some_vector(volatile short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, short *values); +OSHMEM_DECLSPEC size_t shmem_ushort_test_some_vector(volatile unsigned short *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned short *values); +OSHMEM_DECLSPEC size_t shmem_int_test_some_vector(volatile int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int *values); +OSHMEM_DECLSPEC size_t shmem_long_test_some_vector(volatile long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long *values); +OSHMEM_DECLSPEC size_t shmem_longlong_test_some_vector(volatile long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, long long *values); +OSHMEM_DECLSPEC size_t shmem_uint_test_some_vector(volatile unsigned int *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned int *values); +OSHMEM_DECLSPEC size_t shmem_ulong_test_some_vector(volatile unsigned long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long *values); +OSHMEM_DECLSPEC size_t shmem_ulonglong_test_some_vector(volatile unsigned long long *ivars, size_t nelems, size_t *indices, const int *status, int cmp, unsigned long long *values); +OSHMEM_DECLSPEC size_t shmem_int32_test_some_vector(volatile int32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int32_t *values); +OSHMEM_DECLSPEC size_t shmem_int64_test_some_vector(volatile int64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, int64_t *values); +OSHMEM_DECLSPEC size_t shmem_uint32_test_some_vector(volatile uint32_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint32_t *values); +OSHMEM_DECLSPEC size_t shmem_uint64_test_some_vector(volatile uint64_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, uint64_t *values); +OSHMEM_DECLSPEC size_t shmem_size_test_some_vector(volatile size_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, size_t *values); +OSHMEM_DECLSPEC size_t shmem_ptrdiff_test_some_vector(volatile ptrdiff_t *ivars, size_t nelems, size_t *indices, const int *status, int cmp, ptrdiff_t *values); +#if OSHMEM_HAVE_C11 +#define shmem_test_some_vector(ivars, nelems, indices, status, cmp, values) \ + _Generic(&*(ivars), \ + short*: shmem_short_test_some_vector, \ + unsigned short*: shmem_ushort_test_some_vector, \ + int*: shmem_int_test_some_vector, \ + long*: shmem_long_test_some_vector, \ + long long*: shmem_longlong_test_some_vector, \ + unsigned int*: shmem_uint_test_some_vector, \ + unsigned long*: shmem_ulong_test_some_vector, \ + unsigned long long*: shmem_ulonglong_test_some_vector)(ivars, nelems, indices, status, cmp, values) +#endif + /* * Barrier sync routines */ OSHMEM_DECLSPEC void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void shmem_barrier_all(void); -OSHMEM_DECLSPEC void shmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync); +OSHMEM_DECLSPEC void shmem_sync_deprecated(int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void shmem_sync_all(void); OSHMEM_DECLSPEC void shmem_fence(void); OSHMEM_DECLSPEC void shmem_ctx_fence(shmem_ctx_t ctx); OSHMEM_DECLSPEC void shmem_quiet(void); OSHMEM_DECLSPEC void shmem_ctx_quiet(shmem_ctx_t ctx); +#if OSHMEM_HAVE_C11 +#define shmem_sync(...) \ + _Generic((__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_team_t: shmem_team_sync, \ + int: shmem_sync_deprecated)(__VA_ARGS__) +#endif /* * Collective routines */ OSHMEM_DECLSPEC void shmem_broadcast32(void *target, const void *source, size_t nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void shmem_broadcast64(void *target, const void *source, size_t nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); -OSHMEM_DECLSPEC void shmem_broadcast(void *target, const void *source, size_t nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void shmem_collect32(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void shmem_collect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void shmem_fcollect32(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync); diff --git a/oshmem/include/shmemx.h b/oshmem/include/shmemx.h index f7e7de68295..db4ffb5e81c 100644 --- a/oshmem/include/shmemx.h +++ b/oshmem/include/shmemx.h @@ -36,11 +36,6 @@ enum { * file. These extensions shall use the shmemx_ prefix for all routine, variable, and constant names. */ -/* - * Symmetric heap routines - */ -OSHMEM_DECLSPEC void* shmemx_malloc_with_hint(size_t size, long hint); - /* * Elemental put routines */ diff --git a/oshmem/mca/atomic/atomic.h b/oshmem/mca/atomic/atomic.h index 912f2a71955..fcdd87fff87 100644 --- a/oshmem/mca/atomic/atomic.h +++ b/oshmem/mca/atomic/atomic.h @@ -105,6 +105,42 @@ BEGIN_C_DECLS target, value, pe); \ } +#define DO_OSHMEM_TYPE_FOP_NBI(ctx, type_name, type, op, fetch, target, value, pe) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + type out_value; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(target); \ + \ + size = sizeof(out_value); \ + rc = MCA_ATOMIC_CALL(f##op##_nb( \ + ctx, \ + fetch, \ + (void*)target, \ + (void*)&out_value, \ + value, \ + size, \ + pe)); \ + RUNTIME_CHECK_RC(rc); \ + \ + return ; \ + } while (0) + +#define OSHMEM_TYPE_FOP_NBI(type_name, type, prefix, op) \ + void prefix##_##type_name##_atomic_fetch_##op##_nbi(type *fetch, type *target, type value, int pe) \ + { \ + DO_OSHMEM_TYPE_FOP_NBI(oshmem_ctx_default, type_name, type, op, \ + fetch, target, value, pe); \ + } + +#define OSHMEM_CTX_TYPE_FOP_NBI(type_name, type, prefix, op) \ + void prefix##_ctx_##type_name##_atomic_fetch_##op##_nbi(shmem_ctx_t ctx, type *fetch, type *target, type value, int pe) \ + { \ + DO_OSHMEM_TYPE_FOP_NBI(ctx, type_name, type, op, \ + fetch, target, value, pe); \ + } /* ******************************************************************** */ struct oshmem_op_t; @@ -216,6 +252,50 @@ struct mca_atomic_base_module_1_0_0_t { uint64_t value, size_t size, int pe); + int (*atomic_fadd_nb)(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe); + int (*atomic_cswap_nb)(shmem_ctx_t ctx, + void *fetch, + void *target, + uint64_t *prev, /* prev is used internally by wrapper, we may + always use 64-bit value */ + uint64_t cond, + uint64_t value, + size_t size, + int pe); + int (*atomic_swap_nb)(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe); + int (*atomic_fand_nb)(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe); + int (*atomic_for_nb)(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe); + int (*atomic_fxor_nb)(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe); }; typedef struct mca_atomic_base_module_1_0_0_t mca_atomic_base_module_1_0_0_t; diff --git a/oshmem/mca/atomic/ucx/Makefile.am b/oshmem/mca/atomic/ucx/Makefile.am index 66ce2106818..2a7e78c57ca 100644 --- a/oshmem/mca/atomic/ucx/Makefile.am +++ b/oshmem/mca/atomic/ucx/Makefile.am @@ -35,7 +35,7 @@ mcacomponent_LTLIBRARIES = $(component_install) mca_atomic_ucx_la_SOURCES = $(ucx_sources) mca_atomic_ucx_la_LIBADD = $(top_builddir)/oshmem/liboshmem.la \ $(atomic_ucx_LIBS) \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/ucx/@OPAL_LIB_NAME@mca_common_ucx.la + $(OPAL_TOP_BUILDDIR)/opal/mca/common/ucx/lib@OPAL_LIB_NAME@mca_common_ucx.la mca_atomic_ucx_la_LDFLAGS = -module -avoid-version $(atomic_ucx_LDFLAGS) noinst_LTLIBRARIES = $(component_noinst) diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c b/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c index 45b0ce00692..110a54344ae 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c @@ -48,7 +48,8 @@ int mca_atomic_ucx_cswap(shmem_ctx_t ctx, assert(NULL != prev); *prev = value; - ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self); + ucx_mkey = mca_spml_ucx_ctx_mkey_by_va(ctx, pe, target, (void *)&rva, mca_spml_self); + assert(NULL != ucx_mkey); #if HAVE_DECL_UCP_ATOMIC_OP_NBX status_ptr = ucp_atomic_op_nbx(ucx_ctx->ucp_peers[pe].ucp_conn, UCP_ATOMIC_OP_CSWAP, &cond, 1, rva, @@ -71,3 +72,4 @@ int mca_atomic_ucx_cswap(shmem_ctx_t ctx, "ucp_atomic_fetch_nb"); #endif } + diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_module.c b/oshmem/mca/atomic/ucx/atomic_ucx_module.c index 8a9a4a06311..2f84d6cdb04 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_module.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_module.c @@ -55,36 +55,38 @@ int mca_atomic_ucx_op(shmem_ctx_t ctx, ucp_atomic_post_op_t op) #endif { - ucs_status_t status; spml_ucx_mkey_t *ucx_mkey; uint64_t rva; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; #if HAVE_DECL_UCP_ATOMIC_OP_NBX ucs_status_ptr_t status_ptr; +#else + ucs_status_t status; #endif + int res; assert((8 == size) || (4 == size)); - ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self); - + ucx_mkey = mca_spml_ucx_ctx_mkey_by_va(ctx, pe, target, (void *)&rva, mca_spml_self); + assert(NULL != ucx_mkey); #if HAVE_DECL_UCP_ATOMIC_OP_NBX status_ptr = ucp_atomic_op_nbx(ucx_ctx->ucp_peers[pe].ucp_conn, op, &value, 1, rva, ucx_mkey->rkey, &mca_spml_ucp_request_params[size >> 3]); - if (OPAL_LIKELY(!UCS_PTR_IS_ERR(status_ptr))) { - mca_spml_ucx_remote_op_posted(ucx_ctx, pe); - } - status = UCS_PTR_STATUS(status_ptr); + res = opal_common_ucx_wait_request(status_ptr, ucx_ctx->ucp_worker[0], + "ucp_atomic_op_nbx post"); #else status = ucp_atomic_post(ucx_ctx->ucp_peers[pe].ucp_conn, op, value, size, rva, ucx_mkey->rkey); + res = ucx_status_to_oshmem(status); #endif - if (OPAL_LIKELY(UCS_OK == status)) { + + if (OPAL_LIKELY(OSHMEM_SUCCESS == res)) { mca_spml_ucx_remote_op_posted(ucx_ctx, pe); } - return ucx_status_to_oshmem(status); + return res; } static inline @@ -115,7 +117,8 @@ int mca_atomic_ucx_fop(shmem_ctx_t ctx, assert((8 == size) || (4 == size)); - ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self); + ucx_mkey = mca_spml_ucx_ctx_mkey_by_va(ctx, pe, target, (void *)&rva, mca_spml_self); + assert(NULL != ucx_mkey); #if HAVE_DECL_UCP_ATOMIC_OP_NBX status_ptr = ucp_atomic_op_nbx(ucx_ctx->ucp_peers[pe].ucp_conn, op, &value, 1, rva, ucx_mkey->rkey, ¶m); @@ -265,6 +268,77 @@ static int mca_atomic_ucx_swap(shmem_ctx_t ctx, #endif } +static int mca_atomic_ucx_fadd_nb(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +static int mca_atomic_ucx_fand_nb(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +static int mca_atomic_ucx_for_nb(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +static int mca_atomic_ucx_fxor_nb(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +static int mca_atomic_ucx_swap_nb(shmem_ctx_t ctx, + void *fetch, + void *target, + void *prev, + uint64_t value, + size_t size, + int pe) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + + +int mca_atomic_ucx_cswap_nb(shmem_ctx_t ctx, + void *fetch, + void *target, + uint64_t *prev, + uint64_t cond, + uint64_t value, + size_t size, + int pe) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + + + + mca_atomic_base_module_t * mca_atomic_ucx_query(int *priority) @@ -285,6 +359,12 @@ mca_atomic_ucx_query(int *priority) module->super.atomic_fxor = mca_atomic_ucx_fxor; module->super.atomic_swap = mca_atomic_ucx_swap; module->super.atomic_cswap = mca_atomic_ucx_cswap; + module->super.atomic_fadd_nb = mca_atomic_ucx_fadd_nb; + module->super.atomic_fand_nb = mca_atomic_ucx_fand_nb; + module->super.atomic_for_nb = mca_atomic_ucx_for_nb; + module->super.atomic_fxor_nb = mca_atomic_ucx_fxor_nb; + module->super.atomic_swap_nb = mca_atomic_ucx_swap_nb; + module->super.atomic_cswap_nb = mca_atomic_ucx_cswap_nb; return &(module->super); } diff --git a/oshmem/mca/memheap/base/base.h b/oshmem/mca/memheap/base/base.h index 64cdb01ca4f..92772ecb653 100644 --- a/oshmem/mca/memheap/base/base.h +++ b/oshmem/mca/memheap/base/base.h @@ -176,13 +176,35 @@ static inline int memheap_is_va_in_segment(void *va, int segno) return map_segment_is_va_in(&memheap_find_seg(segno)->super, va); } -static inline int memheap_find_segnum(void *va) +static inline int memheap_find_segnum(void *va, int pe) { int i; - - for (i = 0; i < mca_memheap_base_map.n_segments; i++) { - if (memheap_is_va_in_segment(va, i)) { - return i; + int my_pe = oshmem_my_proc_id(); + + if (pe == my_pe) { + /* Find segment number for local segment using va_base + * TODO: Merge local and remote segment information in mkeys_cache + */ + for (i = 0; i < mca_memheap_base_map.n_segments; i++) { + if (memheap_is_va_in_segment(va, i)) { + return i; + } + } + } else { + /* Find segment number for remote segments using va_base */ + for (i = 0; i < mca_memheap_base_map.n_segments; i++) { + map_segment_t *seg = memheap_find_seg(i); + if (seg) { + sshmem_mkey_t **mkeys_cache = seg->mkeys_cache; + if (mkeys_cache) { + if (mkeys_cache[pe]) { + if ((va >= mkeys_cache[pe]->va_base) && + ((char*)va < (char*)mkeys_cache[pe]->va_base + mkeys_cache[pe]->len)) { + return i; + } + } + } + } } } return MEMHEAP_SEG_INVALID; @@ -200,22 +222,6 @@ static inline void *map_segment_va2rva(mkey_segment_t *seg, void *va) return memheap_va2rva(va, seg->super.va_base, seg->rva_base); } -static inline map_base_segment_t *map_segment_find_va(map_base_segment_t *segs, - size_t elem_size, void *va) -{ - map_base_segment_t *rseg; - int i; - - for (i = 0; i < MCA_MEMHEAP_MAX_SEGMENTS; i++) { - rseg = (map_base_segment_t *)((char *)segs + elem_size * i); - if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) { - return rseg; - } - } - - return NULL; -} - void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno); static inline map_segment_t *memheap_find_va(void* va) diff --git a/oshmem/mca/memheap/base/memheap_base_mkey.c b/oshmem/mca/memheap/base/memheap_base_mkey.c index db682498f73..a765285a93f 100644 --- a/oshmem/mca/memheap/base/memheap_base_mkey.c +++ b/oshmem/mca/memheap/base/memheap_base_mkey.c @@ -151,9 +151,6 @@ static void unpack_remote_mkeys(shmem_ctx_t ctx, pmix_data_buffer_t *msg, int re int32_t n; int32_t tr_id; int i; - ompi_proc_t *proc; - - proc = oshmem_proc_group_find(oshmem_group_all, remote_pe); cnt = 1; PMIx_Data_unpack(NULL, msg, &n, &cnt, PMIX_UINT32); for (i = 0; i < n; i++) { @@ -168,7 +165,7 @@ static void unpack_remote_mkeys(shmem_ctx_t ctx, pmix_data_buffer_t *msg, int re if (0 == memheap_oob.mkeys[tr_id].va_base) { cnt = 1; PMIx_Data_unpack(NULL, msg, &memheap_oob.mkeys[tr_id].u.key, &cnt, PMIX_UINT64); - if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) { + if (oshmem_proc_on_local_node(remote_pe)) { memheap_attach_segment(&memheap_oob.mkeys[tr_id], tr_id); } } else { @@ -775,7 +772,6 @@ void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno) s = memheap_find_seg(segno); assert(NULL != s); - seg->super.va_base = s->super.va_base; seg->super.va_end = s->super.va_end; seg->rva_base = mkey->va_base; diff --git a/oshmem/mca/memheap/base/memheap_base_register.c b/oshmem/mca/memheap/base/memheap_base_register.c index ea742b2eb5e..4412d9ac74a 100644 --- a/oshmem/mca/memheap/base/memheap_base_register.c +++ b/oshmem/mca/memheap/base/memheap_base_register.c @@ -86,7 +86,7 @@ static int _dereg_segment(map_segment_t *s) continue; if (s->mkeys_cache[j]) { if (s->mkeys_cache[j]->len) { - MCA_SPML_CALL(rmkey_free(s->mkeys_cache[j])); + MCA_SPML_CALL(rmkey_free(s->mkeys_cache[j], j)); free(s->mkeys_cache[j]->u.data); s->mkeys_cache[j]->len = 0; } diff --git a/oshmem/mca/scoll/basic/scoll_basic_alltoall.c b/oshmem/mca/scoll/basic/scoll_basic_alltoall.c index 6a87e85578f..f539e5d0e1f 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_alltoall.c +++ b/oshmem/mca/scoll/basic/scoll_basic_alltoall.c @@ -116,7 +116,7 @@ get_dst_pe(struct oshmem_group_t *group, int src_blk_idx, int dst_blk_idx, int * (*dst_pe_idx) = (dst_blk_idx + src_blk_idx) % group->proc_count; /* convert to the global pe */ - return oshmem_proc_pe(group->proc_array[*dst_pe_idx]); + return oshmem_proc_pe_vpid(group, *dst_pe_idx); } static int a2as_alg_simple(struct oshmem_group_t *group, diff --git a/oshmem/mca/scoll/basic/scoll_basic_barrier.c b/oshmem/mca/scoll/basic/scoll_basic_barrier.c index e4d0ee23d3d..fd60d10c28c 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_barrier.c +++ b/oshmem/mca/scoll/basic/scoll_basic_barrier.c @@ -103,7 +103,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, int rc = OSHMEM_SUCCESS; long value = SHMEM_SYNC_INIT; int root_id = 0; - int PE_root = oshmem_proc_pe(group->proc_array[root_id]); + int PE_root = oshmem_proc_pe_vpid(group, root_id); int i = 0; SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Central Counter", group->my_pe); @@ -124,7 +124,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, wait_pe_count = group->proc_count; for (i = 0; i < group->proc_count; i++) { - wait_pe_array[i] = oshmem_proc_pe(group->proc_array[i]); + wait_pe_array[i] = oshmem_proc_pe_vpid(group, i); } wait_pe_array[root_id] = OSHMEM_PE_INVALID; wait_pe_count--; @@ -151,7 +151,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, value = SHMEM_SYNC_RUN; for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) { - pe_cur = oshmem_proc_pe(group->proc_array[i]); + pe_cur = oshmem_proc_pe_vpid(group, i); if (pe_cur != PE_root) { rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, pe_cur)); } @@ -238,7 +238,7 @@ static int _algorithm_tournament(struct oshmem_group_t *group, long *pSync) SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round); rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG)); } else { - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); #if 1 /* It is ugly implementation of compare and swap operation Usage of this hack does not give performance improvement but @@ -284,7 +284,7 @@ static int _algorithm_tournament(struct oshmem_group_t *group, long *pSync) for (peer_id = 1; (peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS); peer_id++) { - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } } @@ -333,7 +333,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, if (my_id >= floor2_proc) { /* I am in extra group, my partner is node (my_id-y) in basic group */ peer_id = my_id - floor2_proc; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); SCOLL_VERBOSE(14, "[#%d] is extra and signal to #%d", @@ -357,7 +357,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, if ((group->proc_count - floor2_proc) > my_id) { /* I am in basic group, my partner is node (my_id+y) in extra group */ peer_id = my_id + floor2_proc; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); SCOLL_VERBOSE(14, "[#%d] wait a signal from #%d", @@ -376,8 +376,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, /* Update exit condition and round counter */ exit_flag >>= 1; round++; - - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); #if 1 /* It is ugly implementation of compare and swap operation Usage of this hack does not give performance improvement but @@ -420,7 +419,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, if ((group->proc_count - floor2_proc) > my_id) { /* I am in basic group, my partner is node (my_id+y) in extra group */ peer_id = my_id + floor2_proc; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe); value = SHMEM_SYNC_RUN; @@ -462,8 +461,7 @@ static int _algorithm_dissemination(struct oshmem_group_t *group, long *pSync) for (round = 0; (round <= log2_proc) && (rc == OSHMEM_SUCCESS); round++) { /* Define a peer to send signal */ peer_id = (my_id + (1 << round)) % group->proc_count; - - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); #if 1 /* It is ugly implementation of compare and swap operation Usage of this hack does not give performance improvement but @@ -502,7 +500,7 @@ static int _algorithm_basic(struct oshmem_group_t *group, long *pSync) { int rc = OSHMEM_SUCCESS; int root_id = 0; - int PE_root = oshmem_proc_pe(group->proc_array[root_id]); + int PE_root = oshmem_proc_pe_vpid(group, root_id); int i = 0; SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Basic", group->my_pe); @@ -525,7 +523,7 @@ static int _algorithm_basic(struct oshmem_group_t *group, long *pSync) int pe_cur = 0; for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) { - pe_cur = oshmem_proc_pe(group->proc_array[i]); + pe_cur = oshmem_proc_pe_vpid(group, i); if (pe_cur != PE_root) { rc = MCA_SPML_CALL(recv(NULL, 0, pe_cur)); } @@ -535,7 +533,7 @@ static int _algorithm_basic(struct oshmem_group_t *group, long *pSync) } for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) { - pe_cur = oshmem_proc_pe(group->proc_array[i]); + pe_cur = oshmem_proc_pe_vpid(group, i); if (pe_cur != PE_root) { rc = MCA_SPML_CALL(send(NULL, 0, pe_cur, MCA_SPML_BASE_PUT_STANDARD)); } @@ -564,7 +562,7 @@ static int _algorithm_adaptive(struct oshmem_group_t *group, long *pSync) if (i == my_id) continue; - if (!OPAL_PROC_ON_LOCAL_NODE(group->proc_array[i]->super.proc_flags)) { + if (!oshmem_proc_on_local_node(i)) { local_peers_only = false; break; } diff --git a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c index 5ba4686eacc..b07e379ff85 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c +++ b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c @@ -144,7 +144,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, "[#%d] send data to all PE in the group", group->my_pe); for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) { - pe_cur = oshmem_proc_pe(group->proc_array[i]); + pe_cur = oshmem_proc_pe_vpid(group, i); if (pe_cur != PE_root) { SCOLL_VERBOSE(15, "[#%d] send data to #%d", @@ -233,7 +233,7 @@ static int _algorithm_binomial_tree(struct oshmem_group_t *group, if (peer_id < group->proc_count) { /* Wait for the child to be ready to receive (pSync must have the initial value) */ peer_id = (peer_id + root_id) % group->proc_count; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); SCOLL_VERBOSE(14, "[#%d] check remote pe is ready to receive #%d", diff --git a/oshmem/mca/scoll/basic/scoll_basic_collect.c b/oshmem/mca/scoll/basic/scoll_basic_collect.c index 5689780d7f1..4631bfeb7b0 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_collect.c +++ b/oshmem/mca/scoll/basic/scoll_basic_collect.c @@ -155,7 +155,7 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group, { int rc = OSHMEM_SUCCESS; int i = 0; - int PE_root = oshmem_proc_pe(group->proc_array[0]); + int PE_root = oshmem_proc_pe_vpid(group, 0); SCOLL_VERBOSE(12, "[#%d] Collect algorithm: Central Counter (identical size)", @@ -174,7 +174,7 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group, group->my_pe); for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) { /* Get PE ID of a peer from the group */ - pe_cur = oshmem_proc_pe(group->proc_array[i]); + pe_cur = oshmem_proc_pe_vpid(group, i); if (pe_cur == group->my_pe) continue; @@ -221,7 +221,7 @@ static int _algorithm_f_tournament(struct oshmem_group_t *group, int my_id = oshmem_proc_group_find_id(group, group->my_pe); int peer_id = 0; int peer_pe = 0; - int PE_root = oshmem_proc_pe(group->proc_array[0]); + int PE_root = oshmem_proc_pe_vpid(group, 0); SCOLL_VERBOSE(12, "[#%d] Collect algorithm: Tournament (identical size)", @@ -255,7 +255,7 @@ static int _algorithm_f_tournament(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round); rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG)); } else { - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); #if 1 /* It is ugly implementation of compare and swap operation Usage of this hack does not give performance improvement but @@ -294,7 +294,7 @@ static int _algorithm_f_tournament(struct oshmem_group_t *group, for (peer_id = 1; (peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS); peer_id++) { - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } } @@ -339,7 +339,7 @@ static int _algorithm_f_ring(struct oshmem_group_t *group, SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]); peer_id = (my_id + 1) % group->proc_count; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); memcpy((void*) ((unsigned char*) target + my_id * nlong), (void *) source, nlong); @@ -420,13 +420,12 @@ static int _algorithm_f_recursive_doubling(struct oshmem_group_t *group, /* I am in extra group, my partner is node (my_id-y) in basic group */ peer_id = my_id - floor2_proc; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) { if (i == my_id) continue; - - pe_cur = oshmem_proc_pe(group->proc_array[i]); + pe_cur = oshmem_proc_pe_vpid(group, i); SCOLL_VERBOSE(14, "[#%d] is extra send data to #%d", @@ -450,7 +449,7 @@ static int _algorithm_f_recursive_doubling(struct oshmem_group_t *group, if ((group->proc_count - floor2_proc) > my_id) { /* I am in basic group, my partner is node (my_id+y) in extra group */ peer_id = my_id + floor2_proc; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); SCOLL_VERBOSE(14, "[#%d] wait a signal from #%d", @@ -469,8 +468,7 @@ static int _algorithm_f_recursive_doubling(struct oshmem_group_t *group, /* Update exit condition and round counter */ exit_flag >>= 1; round++; - - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); #if 1 /* It is ugly implementation of compare and swap operation Usage of this hack does not give performance improvement but @@ -507,7 +505,7 @@ static int _algorithm_f_recursive_doubling(struct oshmem_group_t *group, if ((group->proc_count - floor2_proc) > my_id) { /* I am in basic group, my partner is node (my_id+y) in extra group */ peer_id = my_id + floor2_proc; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); SCOLL_VERBOSE(14, "[#%d] is extra send data to #%d", @@ -542,7 +540,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group, int rc = OSHMEM_SUCCESS; size_t offset = 0; int i = 0; - int PE_root = oshmem_proc_pe(group->proc_array[0]); + int PE_root = oshmem_proc_pe_vpid(group, 0); SCOLL_VERBOSE(12, "[#%d] Collect algorithm: Central Counter (vary size)", @@ -573,7 +571,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group, for (i = 1; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) { if (wait_pe_array[i] == 0) { - pe_cur = oshmem_proc_pe(group->proc_array[i]); + pe_cur = oshmem_proc_pe_vpid(group, i); value = 0; rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, pe_cur)); if ((rc == OSHMEM_SUCCESS) @@ -602,7 +600,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group, } /* Get PE ID of a peer from the group */ - pe_cur = oshmem_proc_pe(group->proc_array[i]); + pe_cur = oshmem_proc_pe_vpid(group, i); /* Get data from the current peer */ rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void *)source, (size_t)wait_pe_array[i], (void*)((unsigned char*)target + offset), pe_cur)); diff --git a/oshmem/mca/scoll/basic/scoll_basic_reduce.c b/oshmem/mca/scoll/basic/scoll_basic_reduce.c index b8ecb9e7daf..a7837c4d872 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_reduce.c +++ b/oshmem/mca/scoll/basic/scoll_basic_reduce.c @@ -186,7 +186,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, { int rc = OSHMEM_SUCCESS; int i = 0; - int PE_root = oshmem_proc_pe(group->proc_array[0]); + int PE_root = oshmem_proc_pe_vpid(group, 0); SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Central Counter", group->my_pe); @@ -204,7 +204,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) { /* Get PE ID of a peer from the group */ - pe_cur = oshmem_proc_pe(group->proc_array[i]); + pe_cur = oshmem_proc_pe_vpid(group, i); if (pe_cur == group->my_pe) continue; @@ -265,7 +265,7 @@ static int _algorithm_tournament(struct oshmem_group_t *group, int peer_id = 0; int peer_pe = 0; void *target_cur = NULL; - int PE_root = oshmem_proc_pe(group->proc_array[0]); + int PE_root = oshmem_proc_pe_vpid(group, 0); SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Tournament", group->my_pe); SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]); @@ -304,7 +304,7 @@ static int _algorithm_tournament(struct oshmem_group_t *group, op->o_func.c_fn(target, target_cur, nlong / op->dt_size); } } else { - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); #if 1 /* It is ugly implementation of compare and swap operation Usage of this hack does not give performance improvement but @@ -345,7 +345,7 @@ static int _algorithm_tournament(struct oshmem_group_t *group, for (peer_id = 1; (peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS); peer_id++) { - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } } @@ -416,7 +416,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, if (my_id >= floor2_proc) { /* I am in extra group, my partner is node (my_id-y) in basic group */ peer_id = my_id - floor2_proc; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); /* Special procedure is needed in case target and source are the same */ if (source == target) { @@ -448,7 +448,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, if ((group->proc_count - floor2_proc) > my_id) { /* I am in basic group, my partner is node (my_id+y) in extra group */ peer_id = my_id + floor2_proc; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); /* Special procedure is needed in case target and source are the same */ if (source == target) { @@ -481,8 +481,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, /* Update exit condition and round counter */ exit_flag >>= 1; round++; - - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); #if 1 /* It is ugly implementation of compare and swap operation Usage of this hack does not give performance improvement but @@ -524,7 +523,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, if ((group->proc_count - floor2_proc) > my_id) { /* I am in basic group, my partner is node (my_id+y) in extra group */ peer_id = my_id + floor2_proc; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); SCOLL_VERBOSE(14, "[#%d] is extra send data to #%d", @@ -566,7 +565,7 @@ static int _algorithm_linear(struct oshmem_group_t *group, rank = group->my_pe; size = group->proc_count; int root_id = size - 1; - int root_pe = oshmem_proc_pe(group->proc_array[root_id]); + int root_pe = oshmem_proc_pe_vpid(group, root_id); SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Basic", group->my_pe); @@ -592,7 +591,7 @@ static int _algorithm_linear(struct oshmem_group_t *group, memcpy(target, (void *) source, nlong); } else { peer_id = size - 1; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); rc = MCA_SPML_CALL(recv(target, nlong, peer_pe)); } if (OSHMEM_SUCCESS != rc) { @@ -609,7 +608,7 @@ static int _algorithm_linear(struct oshmem_group_t *group, inbuf = (char*) source; } else { peer_id = i; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); rc = MCA_SPML_CALL(recv(pml_buffer, nlong, peer_pe)); if (OSHMEM_SUCCESS != rc) { if (NULL != free_buffer) { @@ -671,7 +670,7 @@ static int _algorithm_log(struct oshmem_group_t *group, int peer_id = 0; int peer_pe = 0; int root_id = 0; - int root_pe = oshmem_proc_pe(group->proc_array[root_id]); + int root_pe = oshmem_proc_pe_vpid(group, root_id); int dim = 0; /* Initialize */ @@ -719,7 +718,7 @@ static int _algorithm_log(struct oshmem_group_t *group, if (vrank & mask) { peer_id = vrank & ~mask; peer_id = (peer_id + root_id) % size; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); rc = MCA_SPML_CALL(send((void*)snd_buffer, nlong, peer_pe, MCA_SPML_BASE_PUT_STANDARD)); if (OSHMEM_SUCCESS != rc) { @@ -738,7 +737,7 @@ static int _algorithm_log(struct oshmem_group_t *group, continue; } peer_id = (peer_id + root_id) % size; - peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); + peer_pe = oshmem_proc_pe_vpid(group, peer_id); /* Most of the time (all except the first one for commutative * operations) we receive in the user provided buffer diff --git a/oshmem/mca/scoll/mpi/scoll_mpi_module.c b/oshmem/mca/scoll/mpi/scoll_mpi_module.c index ca487caa696..a0f15ca0477 100644 --- a/oshmem/mca/scoll/mpi/scoll_mpi_module.c +++ b/oshmem/mca/scoll/mpi/scoll_mpi_module.c @@ -145,7 +145,7 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority) /* Fill the map "group_rank-to-world_rank" in order to create a new proc group */ for (i = 0; i < osh_group->proc_count; i++) { - ranks[i] = osh_group->proc_array[i]->super.proc_name.vpid; + ranks[i] = oshmem_proc_pe_vpid(osh_group, i); } OPAL_TIMING_ENV_NEXT(comm_query, "build_ranks"); diff --git a/oshmem/mca/scoll/ucc/scoll_ucc.h b/oshmem/mca/scoll/ucc/scoll_ucc.h index f160e0ca832..4df8f60fceb 100644 --- a/oshmem/mca/scoll/ucc/scoll_ucc.h +++ b/oshmem/mca/scoll/ucc/scoll_ucc.h @@ -42,6 +42,7 @@ struct mca_scoll_ucc_component_t { int ucc_np; char * cls; char * cts; + int nr_modules; bool libucc_initialized; ucc_lib_h ucc_lib; ucc_lib_attr_t ucc_lib_attr; diff --git a/oshmem/mca/scoll/ucc/scoll_ucc_alltoall.c b/oshmem/mca/scoll/ucc/scoll_ucc_alltoall.c index 3615bb4e19a..08f843e9d69 100644 --- a/oshmem/mca/scoll/ucc/scoll_ucc_alltoall.c +++ b/oshmem/mca/scoll/ucc/scoll_ucc_alltoall.c @@ -12,24 +12,34 @@ #include -static inline ucc_status_t mca_scoll_ucc_alltoall_init(const void *sbuf, void *rbuf, - int count, - mca_scoll_ucc_module_t * ucc_module, - ucc_coll_req_h * req) +static inline ucc_status_t mca_scoll_ucc_alltoall_init(const void *sbuf, void *rbuf, + int count, size_t elem_size, + mca_scoll_ucc_module_t *ucc_module, + ucc_coll_req_h *req) { + ucc_datatype_t dt; + + if (elem_size == 8) { + dt = UCC_DT_INT64; + } else if (elem_size == 4) { + dt = UCC_DT_INT32; + } else { + dt = UCC_DT_INT8; + } + ucc_coll_args_t coll = { .mask = 0, .coll_type = UCC_COLL_TYPE_ALLTOALL, .src.info = { .buffer = (void *)sbuf, - .count = count, - .datatype = UCC_DT_UINT8, + .count = count * ucc_module->group->proc_count, + .datatype = dt, .mem_type = UCC_MEMORY_TYPE_UNKNOWN }, .dst.info = { .buffer = rbuf, - .count = count, - .datatype = UCC_DT_UINT8, + .count = count * ucc_module->group->proc_count, + .datatype = dt, .mem_type = UCC_MEMORY_TYPE_UNKNOWN }, }; @@ -56,14 +66,15 @@ int mca_scoll_ucc_alltoall(struct oshmem_group_t *group, UCC_VERBOSE(3, "running ucc alltoall"); ucc_module = (mca_scoll_ucc_module_t *) group->g_scoll.scoll_alltoall_module; - count = nelems * element_size; + count = nelems; /* Do nothing on zero-length request */ if (OPAL_UNLIKELY(!nelems)) { return OSHMEM_SUCCESS; } - SCOLL_UCC_CHECK(mca_scoll_ucc_alltoall_init(source, target, count, ucc_module, &req)); + SCOLL_UCC_CHECK(mca_scoll_ucc_alltoall_init(source, target, count, + element_size, ucc_module, &req)); SCOLL_UCC_CHECK(ucc_collective_post(req)); SCOLL_UCC_CHECK(scoll_ucc_req_wait(req)); return OSHMEM_SUCCESS; diff --git a/oshmem/mca/scoll/ucc/scoll_ucc_component.c b/oshmem/mca/scoll/ucc/scoll_ucc_component.c index 0f1612642fb..a63e78799a4 100644 --- a/oshmem/mca/scoll/ucc/scoll_ucc_component.c +++ b/oshmem/mca/scoll/ucc/scoll_ucc_component.c @@ -61,7 +61,9 @@ mca_scoll_ucc_component_t mca_scoll_ucc_component = { 0, /* ucc_enable */ 2, /* ucc_np */ "basic", /* cls */ - SCOLL_UCC_CTS_STR /* cts */ + SCOLL_UCC_CTS_STR, /* cts */ + 0, /* nr_modules */ + false /* libucc_initialized */ }; static int ucc_register(void) diff --git a/oshmem/mca/scoll/ucc/scoll_ucc_module.c b/oshmem/mca/scoll/ucc/scoll_ucc_module.c index d44349ea0c4..aaabf8724cd 100644 --- a/oshmem/mca/scoll/ucc/scoll_ucc_module.c +++ b/oshmem/mca/scoll/ucc/scoll_ucc_module.c @@ -18,8 +18,6 @@ #include "scoll_ucc.h" #include "scoll_ucc_debug.h" -#include "oshmem/mca/spml/spml.h" - #include #define OBJ_RELEASE_IF_NOT_NULL( obj ) if( NULL != (obj) ) OBJ_RELEASE( obj ); @@ -51,17 +49,21 @@ int mca_scoll_ucc_progress(void) static void mca_scoll_ucc_module_destruct(mca_scoll_ucc_module_t *ucc_module) { - ucc_team_destroy(ucc_module->ucc_team); + if (ucc_module->ucc_team) { + ucc_team_destroy(ucc_module->ucc_team); + --mca_scoll_ucc_component.nr_modules; + } - if (ucc_module->group->ompi_comm == (ompi_communicator_t *) &oshmem_comm_world) { - if (mca_scoll_ucc_component.libucc_initialized) { + if (1 == mca_scoll_ucc_component.nr_modules) { + if (mca_scoll_ucc_component.libucc_initialized) { UCC_VERBOSE(1, "finalizing ucc library"); opal_progress_unregister(mca_scoll_ucc_progress); ucc_context_destroy(mca_scoll_ucc_component.ucc_context); ucc_finalize(mca_scoll_ucc_component.ucc_lib); + mca_scoll_ucc_component.libucc_initialized = false; } - } - + } + OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_alltoall_module); OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_collect_module); OBJ_RELEASE_IF_NOT_NULL(ucc_module->previous_reduce_module); @@ -142,15 +144,14 @@ static inline ucc_status_t oob_probe_test(oob_allgather_req_t *oob_req) static ucc_status_t oob_allgather_test(void *req) { oob_allgather_req_t *oob_req = (oob_allgather_req_t*) req; - oshmem_group_t *osh_group = (oshmem_group_t *) oob_req->oob_coll_ctx; - ompi_communicator_t *comm = osh_group->ompi_comm; + ompi_communicator_t *comm = (ompi_communicator_t *) oob_req->oob_coll_ctx; char *tmpsend = NULL; char *tmprecv = NULL; size_t msglen = oob_req->msglen; int rank, size, sendto, recvfrom, recvdatafrom, senddatafrom; - size = osh_group->proc_count; - rank = osh_group->my_pe; + rank = ompi_comm_rank(comm); + size = ompi_comm_size(comm); if (0 == oob_req->iter) { tmprecv = (char *)oob_req->rbuf + (ptrdiff_t)rank * (ptrdiff_t)msglen; @@ -229,8 +230,10 @@ static int mca_scoll_ucc_init_ctx(oshmem_group_t *osh_group) ctx_params.oob.allgather = oob_allgather; ctx_params.oob.req_test = oob_allgather_test; ctx_params.oob.req_free = oob_allgather_free; - ctx_params.oob.coll_info = (void *) osh_group; - ctx_params.oob.participants = osh_group->proc_count; + ctx_params.oob.coll_info = (void *) oshmem_comm_world; + ctx_params.oob.n_oob_eps = ompi_comm_size(oshmem_comm_world); + ctx_params.oob.oob_ep = ompi_comm_rank(oshmem_comm_world); + if (UCC_OK != ucc_context_config_read(cm->ucc_lib, NULL, &ctx_config)) { UCC_ERROR("UCC context config read failed"); goto cleanup_lib; @@ -278,7 +281,9 @@ static int mca_scoll_ucc_module_enable(mca_scoll_base_module_t *module, { mca_scoll_ucc_component_t *cm = &mca_scoll_ucc_component; mca_scoll_ucc_module_t *ucc_module = (mca_scoll_ucc_module_t *) module; - ucc_status_t status; + ucc_status_t status = UCC_OK; + + ucc_module->ucc_team = NULL; ucc_team_params_t team_params = { .mask = UCC_TEAM_PARAM_FIELD_EP | @@ -288,10 +293,11 @@ static int mca_scoll_ucc_module_enable(mca_scoll_base_module_t *module, .allgather = oob_allgather, .req_test = oob_allgather_test, .req_free = oob_allgather_free, - .coll_info = (void *)osh_group, - .participants = osh_group->proc_count, + .coll_info = (void *)osh_group->ompi_comm, + .n_oob_eps = ompi_comm_size(osh_group->ompi_comm), + .oob_ep = ompi_comm_rank(osh_group->ompi_comm), }, - .ep = osh_group->my_pe, + .ep = ompi_comm_rank(osh_group->ompi_comm), .ep_range = UCC_COLLECTIVE_EP_RANGE_CONTIG, }; @@ -304,15 +310,18 @@ static int mca_scoll_ucc_module_enable(mca_scoll_base_module_t *module, return OSHMEM_ERROR; } + + ++cm->nr_modules; + if (cm->ucc_context) { + if (UCC_OK != ucc_team_create_post(&cm->ucc_context, 1, + &team_params, &ucc_module->ucc_team)) { + UCC_ERROR("ucc_team_create_post failed"); + } - if (UCC_OK != ucc_team_create_post(&cm->ucc_context, 1, - &team_params, &ucc_module->ucc_team)) { - UCC_ERROR("ucc_team_create_post failed"); - } - - while (UCC_INPROGRESS == (status = ucc_team_create_test(ucc_module->ucc_team))) { - opal_progress(); - } + while (UCC_INPROGRESS == (status = ucc_team_create_test(ucc_module->ucc_team))) { + opal_progress(); + } + } if (UCC_OK != status) { UCC_ERROR("ucc_team_create_test failed"); @@ -347,10 +356,11 @@ static int mca_scoll_ucc_module_enable(mca_scoll_base_module_t *module, mca_scoll_base_module_t * mca_scoll_ucc_comm_query(oshmem_group_t *osh_group, int *priority) { - mca_scoll_base_module_t *module; - mca_scoll_ucc_module_t *ucc_module; - *priority = 0; + mca_scoll_base_module_t *module; + mca_scoll_ucc_module_t *ucc_module; mca_scoll_ucc_component_t *cm; + + *priority = 0; cm = &mca_scoll_ucc_component; if (!cm->ucc_enable) { @@ -363,9 +373,11 @@ mca_scoll_ucc_comm_query(oshmem_group_t *osh_group, int *priority) OPAL_TIMING_ENV_INIT(comm_query); if (!cm->libucc_initialized) { - if (OSHMEM_SUCCESS != mca_scoll_ucc_init_ctx(osh_group)) { - cm->ucc_enable = 0; - return NULL; + if (0 < cm->nr_modules) { + if (OSHMEM_SUCCESS != mca_scoll_ucc_init_ctx(osh_group)) { + cm->ucc_enable = 0; + return NULL; + } } } diff --git a/oshmem/mca/scoll/ucc/scoll_ucc_reduce.c b/oshmem/mca/scoll/ucc/scoll_ucc_reduce.c index d2710a36eb0..30d3bb96544 100644 --- a/oshmem/mca/scoll/ucc/scoll_ucc_reduce.c +++ b/oshmem/mca/scoll/ucc/scoll_ucc_reduce.c @@ -33,7 +33,7 @@ static inline ucc_status_t mca_scoll_ucc_reduce_init(const void *sbuf, void *rbu } ucc_coll_args_t coll = { - .mask = UCC_COLL_ARGS_FIELD_PREDEFINED_REDUCTIONS, + .mask = 0, .coll_type = UCC_COLL_TYPE_ALLREDUCE, .src.info = { .buffer = (void *)sbuf, @@ -43,11 +43,11 @@ static inline ucc_status_t mca_scoll_ucc_reduce_init(const void *sbuf, void *rbu }, .dst.info = { .buffer = rbuf, + .count = count, + .datatype = ucc_dt, .mem_type = UCC_MEMORY_TYPE_UNKNOWN }, - .reduce = { - .predefined_op = ucc_op, - }, + .op = ucc_op, }; if (sbuf == rbuf) { diff --git a/oshmem/mca/spml/base/base.h b/oshmem/mca/spml/base/base.h index 75a4eaec18d..857e1935699 100644 --- a/oshmem/mca/spml/base/base.h +++ b/oshmem/mca/spml/base/base.h @@ -78,7 +78,7 @@ OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(shmem_ctx_t ctx, sshmem_mkey_t *mkeys); OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t seg, int pe, int tr_id); -OSHMEM_DECLSPEC void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey); +OSHMEM_DECLSPEC void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey, int pe); OSHMEM_DECLSPEC void *mca_spml_base_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *mkey, int pe); OSHMEM_DECLSPEC int mca_spml_base_put_nb(void *dst_addr, diff --git a/oshmem/mca/spml/base/spml_base.c b/oshmem/mca/spml/base/spml_base.c index 27e7d7a58e8..80c2412032a 100644 --- a/oshmem/mca/spml/base/spml_base.c +++ b/oshmem/mca/spml/base/spml_base.c @@ -256,7 +256,7 @@ void mca_spml_base_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t s { } -void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey) +void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey, int pe) { } diff --git a/oshmem/mca/spml/spml.h b/oshmem/mca/spml/spml.h index ca62b5f0bd4..da14e8e1cbe 100644 --- a/oshmem/mca/spml/spml.h +++ b/oshmem/mca/spml/spml.h @@ -150,7 +150,7 @@ typedef void * (*mca_spml_base_module_mkey_ptr_fn_t)(const void *dst_addr, sshme * * @param mkey remote mkey */ -typedef void (*mca_spml_base_module_mkey_free_fn_t)(sshmem_mkey_t *); +typedef void (*mca_spml_base_module_mkey_free_fn_t)(sshmem_mkey_t *, int pe); /** * Register (Pinn) a buffer of 'size' bits starting in address addr @@ -193,9 +193,9 @@ typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(shmem_ctx_t ctx, int pe, * @return OSHMEM_SUCCESS or failure status. * */ -typedef int (*mca_spml_base_module_add_procs_fn_t)(ompi_proc_t** procs, +typedef int (*mca_spml_base_module_add_procs_fn_t)(struct oshmem_group_t* group, size_t nprocs); -typedef int (*mca_spml_base_module_del_procs_fn_t)(ompi_proc_t** procs, +typedef int (*mca_spml_base_module_del_procs_fn_t)(struct oshmem_group_t* group, size_t nprocs); @@ -253,6 +253,648 @@ typedef int (*mca_spml_base_module_put_nb_fn_t)(shmem_ctx_t ctx, int dst, void **handle); + + +/** + * The put-with-signal routines provide a method for copying data from a + * contiguous local data object to a data object on a specified PE and + * subsequently updating a remote flag to signal completion. + * + * @param ctx A context handle specifying the context on which to perform the + * operation. When this argument is not provided, the operation is + * performed on the default context. + * @param dst_addr The address in the remote PE of the object being written. + * @param size The number of bytes to be written. + * @param src_addr An address on the local PE holdng the value to be written. + * @param sig_addr Symmetric address of the signal data object to be updated on the + * remote PE as a signal. + * @param signal Unsigned 64-bit value that is used for updating the remote sig_addr + * signal data object. + * @param sig_op Signal operator that represents the type of update to be performed + * on the remote sig_addr signal data object. + * @param pe PE number of the remote PE. + * + * @return OSHMEM_SUCCESS or failure status. + */ + +typedef int (*mca_spml_base_module_put_signal_fn_t)(shmem_ctx_t ctx, + void* dst_addr, + size_t size, + void* src_addr, + uint64_t *sig_addr, + uint64_t signal, + int sig_op, + int dst); + + +/** + * The nonblocking put-with-signal routines provide a method for copying data + * from a contiguous local data object to a data object on a specified PE and + * subsequently updating a remote flag to signal completion. + * + * @param ctx A context handle specifying the context on which to perform the + * operation. When this argument is not provided, the operation is + * performed on the default context. + * @param dst_addr The address in the remote PE of the object being written. + * @param size The number of bytes to be written. + * @param src_addr An address on the local PE holdng the value to be written. + * @param sig_addr Symmetric address of the signal data object to be updated on the + * remote PE as a signal. + * @param signal Unsigned 64-bit value that is used for updating the remote sig_addr + * signal data object. + * @param sig_op Signal operator that represents the type of update to be performed + * on the remote sig_addr signal data object. + * @param pe PE number of the remote PE. + * + * @return OSHMEM_SUCCESS or failure status. + */ +typedef int (*mca_spml_base_module_put_signal_nb_fn_t) (shmem_ctx_t ctx, + void* dst_addr, + size_t size, + void* src_addr, + uint64_t *sig_addr, + uint64_t signal, + int sig_op, + int dst); + +/* + * Wait on an array of variables on the local PE until all variables + * meet the specified wait condition. + * + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with cmp_value. + * @param cmp_value The value to be compared with the objects pointed to by ivars. + * @param nelems The number of elements in the ivars array. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the wait set. + * @param datatype Type of the objects + * + * @return None + */ +typedef void(*mca_spml_base_module_wait_until_all_fn_t)(void *ivars, + int cmp, + void *cmp_value, + size_t nelems, + const int *status, + int datatype); + +/* + * Wait on an array of variables on the local PE until any one variable + * meets the specified wait condition. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with cmp_value. + * @param cmp_value The value to be compared with the objects pointed to by ivars. + * @param nelems The number of elements in the ivars array. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the wait set. + * @param datatype Type of the objects + * + * @return Returns the index of an element in the ivars array that satisfies the + * wait condition. If the wait set is empty, this routine returns SIZE_MAX. + */ +typedef size_t (*mca_spml_base_module_wait_until_any_fn_t)(void *ivars, + int cmp, + void *cmp_value, + size_t nelems, + const int *status, + int datatype); + + +/* + * Wait on an array of variables on the local PE until at least one variable + * meets the specified wait condition. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with cmp_value. + * @param cmp_value The value to be compared with the objects pointed to by ivars. + * @param nelems The number of elements in the ivars array. + * @param indices Local address of an array of indices of length at least nelems into + * ivars that satisfied the wait condition. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the wait set. + * @param datatype Type of the objects + * + * @return Returns the number of indices returned in the indices array. If the wait + * set is empty, this routine returns 0. + */ +typedef size_t (*mca_spml_base_module_wait_until_some_fn_t)(void *ivars, + int cmp, + void *cmp_value, + size_t nelems, + size_t *indices, + const int *status, + int datatype); + + +/* + * Wait on an array of variables on the local PE until all variables meet the + * specified wait conditions. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with elements + * of cmp_values. + * @param cmp_values Local address of an array of length nelems containing values to be + * compared with the respective objects in ivars. + * @param nelems The number of elements in the ivars array. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the wait set. + * @param datatype Type of the objects + * + * @return None + * + */ +typedef void (*mca_spml_base_module_wait_until_all_vector_fn_t)(void *ivars, + int cmp, + void *cmp_values, + size_t nelems, + const int *status, + int datatype); + +/* + * Wait on an array of variables on the local PE until any one variable + * meets the specified wait condition. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with cmp_value. + * @param cmp_value The value to be compared with the objects pointed to by ivars. + * @param nelems The number of elements in the ivars array. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the wait set. + * @param datatype Type of the objects + * + * @return Returns the index of an element in the ivars array that satisfies the + * test condition. If the test set is empty or no conditions in the test + * set are satisfied, this routine returns SIZE_MAX. + */ +typedef size_t (*mca_spml_base_module_wait_until_any_vector_fn_t)(void *ivars, + int cmp, + void *cmp_value, + size_t nelems, + const int *status, + int datatype); + +/* + * Wait on an array of variables on the local PE until at least one variable meets the + * its specified wait condition. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with elements + * of cmp_values. + * @param cmp_values Local address of an array of length nelems containing values to be + * compared with the respective objects in ivars. + * @param nelems The number of elements in the ivars array. + * @param indices Local address of an array of indices of length at least nelems into ivars + * that satisfied the wait condition. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the wait set. + * @param datatype Type of the objects + * + * @return Returns the number of indices returned in the indices array. If the test + * set is empty, this routine returns 0. + */ +typedef size_t (*mca_spml_base_module_wait_until_some_vector_fn_t)(void *ivars, + int cmp, + void *cmp_value, + size_t nelems, + size_t *indices, + const int *status, + int datatype); + + +/* + * Indicate whether all variables within an array of variables on the local PE meet + * a specified test condition. + * + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with cmp_value. + * @param cmp_value The value to be compared with the objects pointed to by ivars. + * @param nelems The number of elements in the ivars array. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the test set. + * @param datatype Type of the objects + * + * @return Returns 1 if all variables in ivars satisfy the test condition or if + * nelems is 0, otherwise this routine returns 0. + */ +typedef int (*mca_spml_base_module_test_all_fn_t)(void *ivars, + int cmp, + void *cmp_value, + size_t nelems, + const int *status, + int datatype); + +/* + * Indicate whether any one variable within an array of variables on the local PE meets + * a specified test condition. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with cmp_value. + * @param cmp_value The value to be compared with the objects pointed to by ivars. + * @param nelems The number of elements in the ivars array. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the test set. + * @param datatype Type of the objects + * + * @return Returns the index of an element in the ivars array that satisfies the + * test condition. If the test set is empty or no conditions in the test + * set are satisfied, this routine returns SIZE_MAX.. + */ +typedef size_t (*mca_spml_base_module_test_any_fn_t)(void *ivars, + int cmp, + void *cmp_value, + size_t nelems, + const int *status, + int datatype); + + +/* + * Indicate whether at least one variable within an array of variables on the local PE meets + * a specified test condition. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with cmp_value. + * @param cmp_value The value to be compared with the objects pointed to by ivars. + * @param nelems The number of elements in the ivars array. + * @param indices Local address of an array of indices of length at least nelems into + * ivars that satisfied the wait condition. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the test set. + * @param datatype Type of the objects + * + * @return Returns the number of indices returned in the indices array. If the test + * set is empty, this routine returns 0. + */ +typedef size_t (*mca_spml_base_module_test_some_fn_t)(void *ivars, + int cmp, + void *cmp_value, + size_t nelems, + size_t *indices, + const int *status, + int datatype); + + +/* + * Indicate whether all variables within an array of variables on the local PE meet the + * specified test conditions. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with elements + * of cmp_values. + * @param cmp_values Local address of an array of length nelems containing values to be + * compared with the respective objects in ivars. + * @param nelems The number of elements in the ivars array. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the test set. + * @param datatype Type of the objects + * + * @return Returns 1 if all variables in ivars satisfy the test conditions or if + * nelems is 0, otherwise this routine returns 0. + */ +typedef int (*mca_spml_base_module_test_all_vector_fn_t)(void *ivars, + int cmp, + void *cmp_values, + size_t nelems, + const int *status, + int datatype); + +/* + * Indicate whether any one variable within an array of variables on the local PE meets + * its specified test condition. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with cmp_value. + * @param cmp_values Local address of an array of length nelems containing values to be + * compared with the respective objects in ivars. + * @param nelems The number of elements in the ivars array. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the test set. + * @param datatype Type of the objects + * + * @return OSHMEM_SUCCESS or failure status. + */ +typedef int (*mca_spml_base_module_test_any_vector_fn_t)(void *ivars, + int cmp, + void *cmp_values, + size_t nelems, + const int *status, + int datatype); + +/* + * Indicate whether at least one variable within an array of variables on the local PE meets + * its specified test condition. + * + * @param ivars Symmetric address of an array of remotely accessible + * data objects. The type of ivars should match that + * implied in the SYNOPSIS section. + * @param cmp A comparison operator that compares elements of ivars with elements + * of cmp_values. + * @param cmp_values Local address of an array of length nelems containing values to be + * compared with the respective objects in ivars. + * @param nelems The number of elements in the ivars array. + * @param indices Local address of an array of indices of length at least nelems into ivars + * that satisfied the wait condition. + * @param status Local address of an optional mask array of length nelems that indicates + * which elements in ivars are excluded from the test set. + * @param datatype Type of the objects + * + * @return OSHMEM_SUCCESS or failure status. + */ +typedef int (*mca_spml_base_module_test_some_vector_fn_t)(void *ivars, + int cmp, + void *cmp_values, + size_t nelems, + size_t *indices, + const int *status, + int datatype); + +/* + * Registers the arrival of a PE at a synchronization point. + * This routine does not return until all other PEs in a given + * OpenSHMEM team or active set arrive at this synchronization point. + * + * + * @param team An OpenSHMEM team handle. + * + * @return OSHMEM_SUCCESS or failure status. + * Zero on successful local completion. Nonzero otherwise. + */ +typedef int (*mca_spml_base_module_team_sync_fn_t)(shmem_team_t team); + + +/* + * Returns the number of the calling PE within a specified team. + * + * @param team An OpenSHMEM team handle. + * + * @return The number of the calling PE within the specified + * team, or the value -1 if the team handle compares + * equal to SHMEM_TEAM_INVALID + */ +typedef int (*mca_spml_base_module_team_my_pe_fn_t)(shmem_team_t team); + + +/* + * Returns the number of PEs in a specified team. + * + * @param team An OpenSHMEM team handle. + * + * @return The number of PEs in the specified team, or the + * value -1 if the team handle compares equal to + * SHMEM_TEAM_INVALID. + */ +typedef int (*mca_spml_base_module_team_n_pes_fn_t)(shmem_team_t team); + + + +/* + * Return the configuration parameters of a given team + * + * @param team An OpenSHMEM team handle. + * + * @param config_mask The bitwise mask representing the set of + * configuration parameters to fetch from the + * given team. + * + * @param config A pointer to the configuration parameters for the + * given team. + * + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_get_config_fn_t)(shmem_team_t team, long + config_mask, shmem_team_config_t *config); + +/* + * Translate a given PE number from one team to the corresponding PE number in + * another team. + * + * @param src_team An OpenSHMEM team handle. + * @param src_pe A PE number in src_team. + * @param dest_team An OpenSHMEM team handle. + * + * + * @return The specified PE’s number in the dest_team, or a value + * of -1 if any team handle arguments are invalid or the + * src_pe is not in both the source and destination teams. + */ +typedef int (*mca_spml_base_module_team_translate_pe_fn_t)(shmem_team_t src_team, + int src_pe, shmem_team_t dest_team); + + + +/* + * Create a new OpenSHMEM team from a subset of the existing parent team PEs, + * where the subset is defined by the PE triplet (start, stride, and size) + * supplied to the routine. + * + * @param parent_team An OpenSHMEM team handle. + * @param start The lowest PE number of the subset of PEs from the parent team + * that will form the new team. + * @param stride The stride between team PE numbers in the parent team that comprise the subset + * of PEs that will form the new team. + * @param size The number of PEs from the parent team in the subset of PEs that + * will form the new team. size must be a positive integer. + * @param config A pointer to the configuration parameters for the new team. + * @param config_mask The bitwise mask representing the set of configuration parameters + * to use from config. + * @param new_team An OpenSHMEM team handle. Upon successful creation, it references an OpenSHMEM + * team that contains the subset of all PEs in the parent team + * specified by the PE triplet provided.m + * + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_split_strided_fn_t)(shmem_team_t + parent_team, int start, int stride, int size, const shmem_team_config_t + *config, long config_mask, shmem_team_t *new_team); + + +/* + * Create two new teams by splitting an existing parent team into two subsets + * based on a 2D Cartesian space defined by the xrange argument and a y + * dimension that is derived from xrange and the parent team size. + * + * @param parent_team An OpenSHMEM team handle. + * @param xrange A positive integer representing the number of elements in the first dimension. + * @param xaxis_config A pointer to the configuration parameters for the new x-axis team. + * @param xaxis_mask The bitwise mask representing the set of configuration parameters to + * use from xaxis_config. + * @param xaxis_team A new PE team handle representing a PE subset consisting of all the + * PEs that have the same coordinate along the y-axis as the calling PE.. + * @param yaxis_config A pointer to the configuration parameters for the new y-axis team. + * @param yaxis_mask The bitwise mask representing the set of configuration parameters to use + * from yaxis_config. + * @param yaxis_team A new PE team handle representing a PE subset consisting of all the PEs + * that have the same coordinate along the x-axis as the calling PE. + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_split_2d_fn_t)(shmem_team_t parent_team, + int xrange, const shmem_team_config_t *xaxis_config, long xaxis_mask, + shmem_team_t *xaxis_team, const shmem_team_config_t *yaxis_config, long + yaxis_mask, shmem_team_t *yaxis_team); + + +/* + * Destroy an existing team. + * + * @param team An OpenSHMEM team handle. + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_destroy_fn_t)(shmem_team_t team); + +/* + * Retrieve the team associated with the communication context. + * + * @param ctx A handle to a communication context. + * @param team A pointer to a handle to the associated PE team. + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_get_fn_t)(shmem_ctx_t ctx, shmem_team_t *team); + +/* + * Create a communication context from a team. + * + * @param team An OpenSHMEM team handle. + * @param options The set of options requested for the given context. + * @param ctx A handle to the newly created context. + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_create_ctx_fn_t)(shmem_team_t team, long options, shmem_ctx_t *ctx); + +/* + * Exchanges a fixed amount of contiguous data blocks between all pairs of + * PEs participating in the collective routine.. + * + * @param team An OpenSHMEM team handle. + * @param dest Symmetric address of a data object large enough to + * receive the combined total of nelems elements from each PE in the active set. + * @param source Symmetric address of a data object that contains nelems elements of data + * for each PE in the active set, ordered according to destination PE. + * @param nelems The number of elements to exchange for each PE. + * @param datatype Datatype of the elements + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_alltoall_fn_t)(shmem_team_t team, void + *dest, const void *source, size_t nelems, int datatype); + +/* + * Exchanges a fixed amount of strided data blocks between all pairs of PEs + * participating in the collective routine. + * + * @param team An OpenSHMEM team handle. + * @param dest Symmetric address of a data object large enough to + * receive the combined total of nelems elements from each PE in the active set. + * @param source Symmetric address of a data object that contains nelems elements of data + * for each PE in the active set, ordered according to destination PE. + * @param dst The stride between consecutive elements of the dest data object. The stride + * is scaled by the element size. A value of 1 indicates contiguous data. + * @param sst The stride between consecutive elements of the source data object. The stride + * is scaled by the element size. A value of 1 indicates contiguous data + * @param nelems The number of elements to exchange for each PE. + * @param datatype Datatype of the elements + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_alltoalls_fn_t)(shmem_team_t team, void + *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, + int datatype); + + +/* + * Broadcasts a block of data from one PE to one or more destination PEs. + * + * @param team An OpenSHMEM team handle. + * @param dest Symmetric address of destination data object. + * @param source Symmetric address of the source data object. + * @param nelems The number of elements in source and dest arrays + * @param PE_root Zero-based ordinal of the PE, with respect to the team or + * active set, from which the data is copied.. + * @param datatype Datatype of the elements + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_broadcast_fn_t)(shmem_team_t team, void + *dest, const void *source, size_t nelems, int PE_root, int datatype); + + + +/* + * Concatenates blocks of data from multiple PEs to an array in every PE participating in + * the collective routine. + * + * @param team An OpenSHMEM team handle. + * @param dest Symmetric address of an array large enough to accept the + * concatenation of the source arrays on all participating PEs. + * @param source Symmetric address of the source data object. + * @param nelems The number of elements in source array. + * @param datatype Datatype of the elements + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_collect_fn_t)(shmem_team_t team, void + *dest, const void *source, size_t nelems, int datatype); +typedef int (*mca_spml_base_module_team_fcollect_fn_t)(shmem_team_t team, void + *dest, const void *source, size_t nelems, int datatype); + +/* + * Performs a math reduction across a set of PEs. + * + * @param team An OpenSHMEM team handle. + * @param dest Symmetric address of an array, of length nreduce elements, + * to receive the result of the reduction routines. + * @param source Symmetric address of an array, of length nreduce elements, that + * contains one element for each separate reduction routine. + * @param nreduce The number of elements in the dest and source arrays. + * @param operation Operations from list of supported oshmem ops + * @param datatype Datatype of the elements + * + * @return OSHMEM_SUCCESS or failure status. + * + */ +typedef int (*mca_spml_base_module_team_reduce_fn_t)(shmem_team_t team, void + *dest, const void *source, size_t nreduce, int operation, int datatype); + + /** * Blocking data transfer from remote PE. * Read data from remote PE. @@ -393,15 +1035,50 @@ struct mca_spml_base_module_1_0_0_t { mca_spml_base_module_put_fn_t spml_put; mca_spml_base_module_put_nb_fn_t spml_put_nb; + mca_spml_base_module_put_signal_fn_t spml_put_signal; + mca_spml_base_module_put_signal_nb_fn_t spml_put_signal_nb; + mca_spml_base_module_get_fn_t spml_get; mca_spml_base_module_get_nb_fn_t spml_get_nb; mca_spml_base_module_recv_fn_t spml_recv; mca_spml_base_module_send_fn_t spml_send; - mca_spml_base_module_wait_fn_t spml_wait; - mca_spml_base_module_wait_nb_fn_t spml_wait_nb; - mca_spml_base_module_test_fn_t spml_test; + mca_spml_base_module_wait_fn_t spml_wait; + mca_spml_base_module_wait_nb_fn_t spml_wait_nb; + mca_spml_base_module_wait_until_all_fn_t spml_wait_until_all; + mca_spml_base_module_wait_until_any_fn_t spml_wait_until_any; + mca_spml_base_module_wait_until_some_fn_t spml_wait_until_some; + mca_spml_base_module_wait_until_all_vector_fn_t spml_wait_until_all_vector; + mca_spml_base_module_wait_until_any_vector_fn_t spml_wait_until_any_vector; + mca_spml_base_module_wait_until_some_vector_fn_t spml_wait_until_some_vector; + + mca_spml_base_module_test_fn_t spml_test; + mca_spml_base_module_test_all_fn_t spml_test_all; + mca_spml_base_module_test_any_fn_t spml_test_any; + mca_spml_base_module_test_some_fn_t spml_test_some; + mca_spml_base_module_test_all_vector_fn_t spml_test_all_vector; + mca_spml_base_module_test_any_vector_fn_t spml_test_any_vector; + mca_spml_base_module_test_some_vector_fn_t spml_test_some_vector; + + mca_spml_base_module_team_sync_fn_t spml_team_sync; + mca_spml_base_module_team_my_pe_fn_t spml_team_my_pe; + mca_spml_base_module_team_n_pes_fn_t spml_team_n_pes; + mca_spml_base_module_team_get_config_fn_t spml_team_get_config; + mca_spml_base_module_team_translate_pe_fn_t spml_team_translate_pe; + mca_spml_base_module_team_split_strided_fn_t spml_team_split_strided; + mca_spml_base_module_team_split_2d_fn_t spml_team_split_2d; + mca_spml_base_module_team_destroy_fn_t spml_team_destroy; + mca_spml_base_module_team_get_fn_t spml_team_get; + mca_spml_base_module_team_create_ctx_fn_t spml_team_create_ctx; + + mca_spml_base_module_team_alltoall_fn_t spml_team_alltoall; + mca_spml_base_module_team_alltoalls_fn_t spml_team_alltoalls; + mca_spml_base_module_team_broadcast_fn_t spml_team_broadcast; + mca_spml_base_module_team_collect_fn_t spml_team_collect; + mca_spml_base_module_team_fcollect_fn_t spml_team_fcollect; + mca_spml_base_module_team_reduce_fn_t spml_team_reduce; + mca_spml_base_module_fence_fn_t spml_fence; mca_spml_base_module_quiet_fn_t spml_quiet; diff --git a/oshmem/mca/spml/ucx/Makefile.am b/oshmem/mca/spml/ucx/Makefile.am index 4f61fbe2750..7d9e9137e5b 100644 --- a/oshmem/mca/spml/ucx/Makefile.am +++ b/oshmem/mca/spml/ucx/Makefile.am @@ -35,7 +35,7 @@ mcacomponent_LTLIBRARIES = $(component_install) mca_spml_ucx_la_SOURCES = $(ucx_sources) mca_spml_ucx_la_LIBADD = $(top_builddir)/oshmem/liboshmem.la \ $(spml_ucx_LIBS) \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/ucx/@OPAL_LIB_NAME@mca_common_ucx.la + $(OPAL_TOP_BUILDDIR)/opal/mca/common/ucx/lib@OPAL_LIB_NAME@mca_common_ucx.la mca_spml_ucx_la_LDFLAGS = -module -avoid-version $(spml_ucx_LDFLAGS) noinst_LTLIBRARIES = $(component_noinst) diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index fba5b2cb806..6a06bb259c2 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -10,7 +10,7 @@ * * $HEADER$ */ - + #define _GNU_SOURCE #include @@ -57,13 +57,12 @@ mca_spml_ucx_t mca_spml_ucx = { .spml_ctx_destroy = mca_spml_ucx_ctx_destroy, .spml_put = mca_spml_ucx_put, .spml_put_nb = mca_spml_ucx_put_nb, + .spml_put_signal = mca_spml_ucx_put_signal, + .spml_put_signal_nb = mca_spml_ucx_put_signal_nb, .spml_get = mca_spml_ucx_get, .spml_get_nb = mca_spml_ucx_get_nb, .spml_recv = mca_spml_ucx_recv, .spml_send = mca_spml_ucx_send, - .spml_wait = mca_spml_base_wait, - .spml_wait_nb = mca_spml_base_wait_nb, - .spml_test = mca_spml_base_test, .spml_fence = mca_spml_ucx_fence, .spml_quiet = mca_spml_ucx_quiet, .spml_rmkey_unpack = mca_spml_ucx_rmkey_unpack, @@ -71,7 +70,38 @@ mca_spml_ucx_t mca_spml_ucx = { .spml_rmkey_ptr = mca_spml_ucx_rmkey_ptr, .spml_memuse_hook = mca_spml_ucx_memuse_hook, .spml_put_all_nb = mca_spml_ucx_put_all_nb, - .self = (void*)&mca_spml_ucx + .spml_wait = mca_spml_base_wait, + .spml_wait_nb = mca_spml_base_wait_nb, + .spml_wait_until_all = mca_spml_ucx_wait_until_all, + .spml_wait_until_any = mca_spml_ucx_wait_until_any, + .spml_wait_until_some = mca_spml_ucx_wait_until_some, + .spml_wait_until_all_vector = mca_spml_ucx_wait_until_all_vector, + .spml_wait_until_any_vector = mca_spml_ucx_wait_until_any_vector, + .spml_wait_until_some_vector = mca_spml_ucx_wait_until_some_vector, + .spml_test = mca_spml_base_test, + .spml_test_all = mca_spml_ucx_test_all, + .spml_test_any = mca_spml_ucx_test_any, + .spml_test_some = mca_spml_ucx_test_some, + .spml_test_all_vector = mca_spml_ucx_test_all_vector, + .spml_test_any_vector = mca_spml_ucx_test_any_vector, + .spml_test_some_vector = mca_spml_ucx_test_some_vector, + .spml_team_sync = mca_spml_ucx_team_sync, + .spml_team_my_pe = mca_spml_ucx_team_my_pe, + .spml_team_n_pes = mca_spml_ucx_team_n_pes, + .spml_team_get_config = mca_spml_ucx_team_get_config, + .spml_team_translate_pe = mca_spml_ucx_team_translate_pe, + .spml_team_split_strided = mca_spml_ucx_team_split_strided, + .spml_team_split_2d = mca_spml_ucx_team_split_2d, + .spml_team_destroy = mca_spml_ucx_team_destroy, + .spml_team_get = mca_spml_ucx_team_get, + .spml_team_create_ctx = mca_spml_ucx_team_create_ctx, + .spml_team_alltoall = mca_spml_ucx_team_alltoall, + .spml_team_alltoalls = mca_spml_ucx_team_alltoalls, + .spml_team_broadcast = mca_spml_ucx_team_broadcast, + .spml_team_collect = mca_spml_ucx_team_collect, + .spml_team_fcollect = mca_spml_ucx_team_fcollect, + .spml_team_reduce = mca_spml_ucx_team_reduce, + .self = (void*)&mca_spml_ucx }, .ucp_context = NULL, @@ -85,11 +115,15 @@ mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = { .ucp_worker = NULL, .ucp_peers = NULL, .options = 0, - .synchronized_quiet = false + .synchronized_quiet = false, + .strong_sync = SPML_UCX_STRONG_ORDERING_NONE }; -#if HAVE_DECL_UCP_ATOMIC_OP_NBX +#ifdef HAVE_UCP_REQUEST_PARAM_T static ucp_request_param_t mca_spml_ucx_request_param = {0}; +static ucp_request_param_t mca_spml_ucx_request_param_b = { + .op_attr_mask = UCP_OP_ATTR_FLAG_FAST_CMPL +}; #endif int mca_spml_ucx_enable(bool enable) @@ -104,7 +138,151 @@ int mca_spml_ucx_enable(bool enable) return OSHMEM_SUCCESS; } -int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs) +/* initialize the mkey cache */ +void mca_spml_ucx_peer_mkey_cache_init(mca_spml_ucx_ctx_t *ucx_ctx, int pe) +{ + ucx_ctx->ucp_peers[pe].mkeys = NULL; + ucx_ctx->ucp_peers[pe].mkeys_cnt = 0; +} + +/* add a new mkey and update the mkeys_cnt */ +int mca_spml_ucx_peer_mkey_cache_add(ucp_peer_t *ucp_peer, int index) +{ + /* Allocate an array to hold the pointers to the ucx_cached_mkey */ + if (index >= (int)ucp_peer->mkeys_cnt){ + int old_size = ucp_peer->mkeys_cnt; + if (MCA_MEMHEAP_MAX_SEGMENTS <= (index + 1)) { + SPML_UCX_ERROR("Failed to get new mkey for segment: max number (%d) of segment descriptor is exhausted", + MCA_MEMHEAP_MAX_SEGMENTS); + return OSHMEM_ERROR; + } + ucp_peer->mkeys_cnt = index + 1; + ucp_peer->mkeys = realloc(ucp_peer->mkeys, sizeof(ucp_peer->mkeys[0]) * ucp_peer->mkeys_cnt); + if (NULL == ucp_peer->mkeys) { + SPML_UCX_ERROR("Failed to obtain new mkey: OOM - failed to expand the descriptor buffer"); + return OSHMEM_ERR_OUT_OF_RESOURCE; + } + /* NOTE: release code checks for the rkey != NULL as a sign of used element: + Account for the following scenario below by zero'ing the unused elements: + |MKEY1|00000|MKEY2|??????|NEW-MKEY| + |<--- old_size -->| + */ + memset(ucp_peer->mkeys + old_size, 0, (ucp_peer->mkeys_cnt - old_size) * sizeof(ucp_peer->mkeys[0])); + } else { + /* Make sure we don't leak memory */ + assert(NULL == ucp_peer->mkeys[index]); + } + + ucp_peer->mkeys[index] = (spml_ucx_cached_mkey_t *) malloc(sizeof(*ucp_peer->mkeys[0])); + if (NULL == ucp_peer->mkeys[index]) { + SPML_UCX_ERROR("Failed to obtain new ucx_cached_mkey: OOM - failed to expand the descriptor buffer"); + return OSHMEM_ERR_OUT_OF_RESOURCE; + } + return OSHMEM_SUCCESS; +} + +/* Release individual mkeys */ +int mca_spml_ucx_peer_mkey_cache_del(ucp_peer_t *ucp_peer, int segno) +{ + if (((int)ucp_peer->mkeys_cnt <= segno) || (segno < 0)) { + return OSHMEM_ERR_NOT_AVAILABLE; + } + if (NULL != ucp_peer->mkeys[segno]) { + free(ucp_peer->mkeys[segno]); + ucp_peer->mkeys[segno] = NULL; + } + return OSHMEM_SUCCESS; +} + +/* Release the memkey map from a ucp_peer if it has any element in memkey */ +void mca_spml_ucx_peer_mkey_cache_release(ucp_peer_t *ucp_peer) +{ + size_t i; + if (ucp_peer->mkeys_cnt) { + for(i = 0; i < ucp_peer->mkeys_cnt; i++) { + assert(NULL == ucp_peer->mkeys[i]); + } + free(ucp_peer->mkeys); + ucp_peer->mkeys = NULL; + } +} + +int mca_spml_ucx_ctx_mkey_new(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segno, spml_ucx_mkey_t **mkey) +{ + ucp_peer_t *ucp_peer; + spml_ucx_cached_mkey_t *ucx_cached_mkey; + int rc; + ucp_peer = &(ucx_ctx->ucp_peers[pe]); + rc = mca_spml_ucx_peer_mkey_cache_add(ucp_peer, segno); + if (OSHMEM_SUCCESS != rc) { + return rc; + } + rc = mca_spml_ucx_peer_mkey_get(ucp_peer, segno, &ucx_cached_mkey); + if (OSHMEM_SUCCESS != rc) { + return rc; + } + *mkey = &(ucx_cached_mkey->key); + return OSHMEM_SUCCESS; +} + +int mca_spml_ucx_ctx_mkey_cache(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe) +{ + ucp_peer_t *peer; + spml_ucx_cached_mkey_t *ucx_cached_mkey; + int rc; + + peer = &(ucx_ctx->ucp_peers[dst_pe]); + rc = mca_spml_ucx_peer_mkey_get(peer, segno, &ucx_cached_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_peer_mkey_get failed"); + return rc; + } + mkey_segment_init(&ucx_cached_mkey->super, mkey, segno); + return OSHMEM_SUCCESS; +} + +int mca_spml_ucx_ctx_mkey_add(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segno, sshmem_mkey_t *mkey, spml_ucx_mkey_t **ucx_mkey) +{ + int rc; + ucs_status_t err; + + rc = mca_spml_ucx_ctx_mkey_new(ucx_ctx, pe, segno, ucx_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_new failed"); + return rc; + } + + if (mkey->u.data) { + err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, mkey->u.data, &((*ucx_mkey)->rkey)); + if (UCS_OK != err) { + SPML_UCX_ERROR("failed to unpack rkey: %s", ucs_status_string(err)); + return OSHMEM_ERROR; + } + rc = mca_spml_ucx_ctx_mkey_cache(ucx_ctx, mkey, segno, pe); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_cache failed"); + return rc; + } + } + return OSHMEM_SUCCESS; +} + +int mca_spml_ucx_ctx_mkey_del(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segno, spml_ucx_mkey_t *ucx_mkey) +{ + ucp_peer_t *ucp_peer; + int rc; + ucp_peer = &(ucx_ctx->ucp_peers[pe]); + ucp_rkey_destroy(ucx_mkey->rkey); + ucx_mkey->rkey = NULL; + rc = mca_spml_ucx_peer_mkey_cache_del(ucp_peer, segno); + if(OSHMEM_SUCCESS != rc){ + SPML_UCX_ERROR("mca_spml_ucx_peer_mkey_cache_del failed"); + return rc; + } + return OSHMEM_SUCCESS; +} + +int mca_spml_ucx_del_procs(oshmem_group_t* group, size_t nprocs) { size_t ucp_workers = mca_spml_ucx.ucp_workers; opal_common_ucx_del_proc_t *del_procs; @@ -128,6 +306,8 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs) /* mark peer as disconnected */ mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn = NULL; + /* release the cached_ep_mkey buffer */ + mca_spml_ucx_peer_mkey_cache_release(&(mca_spml_ucx_ctx_default.ucp_peers[i])); } ret = opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(), @@ -248,13 +428,11 @@ static int oshmem_shmem_xchng( } -static char spml_ucx_transport_ids[1] = { 0 }; - int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs) { int res; - if (mca_spml_ucx.synchronized_quiet) { + if (mca_spml_ucx_is_strong_ordering(ctx)) { ctx->put_proc_indexes = malloc(nprocs * sizeof(*ctx->put_proc_indexes)); if (NULL == ctx->put_proc_indexes) { return OSHMEM_ERR_OUT_OF_RESOURCE; @@ -276,7 +454,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs) int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx) { - if (mca_spml_ucx.synchronized_quiet && ctx->put_proc_indexes) { + if (mca_spml_ucx_is_strong_ordering(ctx) && ctx->put_proc_indexes) { OBJ_DESTRUCT(&ctx->put_op_bitmap); free(ctx->put_proc_indexes); } @@ -284,7 +462,7 @@ int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx) return OSHMEM_SUCCESS; } -int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) +int mca_spml_ucx_add_procs(oshmem_group_t* group, size_t nprocs) { int rc = OSHMEM_ERROR; int my_rank = oshmem_my_proc_id(); @@ -292,7 +470,7 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) unsigned int *wk_roffs = NULL; unsigned int *wk_rsizes = NULL; char *wk_raddrs = NULL; - size_t i, j, w, n; + size_t i, w, n; ucs_status_t err; ucp_address_t **wk_local_addr; unsigned int *wk_addr_len; @@ -359,12 +537,8 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) goto error2; } - OSHMEM_PROC_DATA(procs[i])->num_transports = 1; - OSHMEM_PROC_DATA(procs[i])->transport_ids = spml_ucx_transport_ids; - - for (j = 0; j < MCA_MEMHEAP_MAX_SEGMENTS; j++) { - mca_spml_ucx_ctx_default.ucp_peers[i].mkeys[j].key.rkey = NULL; - } + /* Initialize mkeys as NULL for all processes */ + mca_spml_ucx_peer_mkey_cache_init(&mca_spml_ucx_ctx_default, i); } for (i = 0; i < mca_spml_ucx.ucp_workers; i++) { @@ -419,15 +593,27 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) } -void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey) +void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey, int pe) { spml_ucx_mkey_t *ucx_mkey; + uint32_t segno; + int rc; if (!mkey->spml_context) { return; } + segno = memheap_find_segnum(mkey->va_base, pe); + if (MEMHEAP_SEG_INVALID == segno) { + SPML_UCX_ERROR("mca_spml_ucx_rmkey_free failed because of invalid " + "segment number: %d\n", segno); + return; + } + ucx_mkey = (spml_ucx_mkey_t *)(mkey->spml_context); - ucp_rkey_destroy(ucx_mkey->rkey); + rc = mca_spml_ucx_ctx_mkey_del(&mca_spml_ucx_ctx_default, pe, segno, ucx_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_del failed\n"); + } } void *mca_spml_ucx_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *mkey, int pe) @@ -451,22 +637,16 @@ void mca_spml_ucx_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t se { spml_ucx_mkey_t *ucx_mkey; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; - ucs_status_t err; - - ucx_mkey = &ucx_ctx->ucp_peers[pe].mkeys[segno].key; + int rc; - err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, - mkey->u.data, - &ucx_mkey->rkey); - if (UCS_OK != err) { - SPML_UCX_ERROR("failed to unpack rkey: %s", ucs_status_string(err)); + rc = mca_spml_ucx_ctx_mkey_add(ucx_ctx, pe, segno, mkey, &ucx_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_cache failed"); goto error_fatal; } - if (ucx_ctx == &mca_spml_ucx_ctx_default) { mkey->spml_context = ucx_mkey; } - mca_spml_ucx_cache_mkey(ucx_ctx, mkey, segno, pe); return; error_fatal: @@ -480,14 +660,18 @@ void mca_spml_ucx_memuse_hook(void *addr, size_t length) spml_ucx_mkey_t *ucx_mkey; ucp_mem_advise_params_t params; ucs_status_t status; + int rc; if (!(mca_spml_ucx.heap_reg_nb && memheap_is_va_in_segment(addr, HEAP_SEG_INDEX))) { return; } - my_pe = oshmem_my_proc_id(); - ucx_mkey = &mca_spml_ucx_ctx_default.ucp_peers[my_pe].mkeys[HEAP_SEG_INDEX].key; - + my_pe = oshmem_my_proc_id(); + rc = mca_spml_ucx_ctx_mkey_by_seg(&mca_spml_ucx_ctx_default, my_pe, HEAP_SEG_INDEX, &ucx_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_by_seg failed"); + return; + } params.field_mask = UCP_MEM_ADVISE_PARAM_FIELD_ADDRESS | UCP_MEM_ADVISE_PARAM_FIELD_LENGTH | UCP_MEM_ADVISE_PARAM_FIELD_ADVICE; @@ -513,10 +697,12 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr, spml_ucx_mkey_t *ucx_mkey; size_t len; ucp_mem_map_params_t mem_map_params; - int segno; + uint32_t segno; map_segment_t *mem_seg; unsigned flags; int my_pe = oshmem_my_proc_id(); + int rc; + ucp_mem_h mem_h; *count = 0; mkeys = (sshmem_mkey_t *) calloc(1, sizeof(*mkeys)); @@ -524,12 +710,14 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr, return NULL; } - segno = memheap_find_segnum(addr); + segno = memheap_find_segnum(addr, my_pe); + if (MEMHEAP_SEG_INVALID == segno) { + SPML_UCX_ERROR("mca_spml_ucx_register failed because of invalid " + "segment number: %d\n", segno); + return NULL; + } mem_seg = memheap_find_seg(segno); - ucx_mkey = &mca_spml_ucx_ctx_default.ucp_peers[my_pe].mkeys[segno].key; - mkeys[0].spml_context = ucx_mkey; - /* if possible use mem handle already created by ucx allocator */ if (MAP_SEGMENT_ALLOC_UCX != mem_seg->type) { flags = 0; @@ -544,18 +732,18 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr, mem_map_params.length = size; mem_map_params.flags = flags; - status = ucp_mem_map(mca_spml_ucx.ucp_context, &mem_map_params, &ucx_mkey->mem_h); + status = ucp_mem_map(mca_spml_ucx.ucp_context, &mem_map_params, &mem_h); if (UCS_OK != status) { goto error_out; } } else { mca_sshmem_ucx_segment_context_t *ctx = mem_seg->context; - ucx_mkey->mem_h = ctx->ucp_memh; + mem_h = ctx->ucp_memh; } - status = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h, - &mkeys[0].u.data, &len); + status = ucp_rkey_pack(mca_spml_ucx.ucp_context, mem_h, + &mkeys[SPML_UCX_TRANSP_IDX].u.data, &len); if (UCS_OK != status) { goto error_unmap; } @@ -565,19 +753,16 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr, 0xffff); oshmem_shmem_abort(-1); } - - status = ucp_ep_rkey_unpack(mca_spml_ucx_ctx_default.ucp_peers[oshmem_group_self->my_pe].ucp_conn, - mkeys[0].u.data, - &ucx_mkey->rkey); - if (UCS_OK != status) { - SPML_UCX_ERROR("failed to unpack rkey"); + mkeys[SPML_UCX_TRANSP_IDX].len = len; + mkeys[SPML_UCX_TRANSP_IDX].va_base = addr; + *count = SPML_UCX_TRANSP_CNT; + rc = mca_spml_ucx_ctx_mkey_add(&mca_spml_ucx_ctx_default, my_pe, segno, &mkeys[SPML_UCX_TRANSP_IDX], &ucx_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_cache failed"); goto error_unmap; } - - mkeys[0].len = len; - mkeys[0].va_base = addr; - *count = 1; - mca_spml_ucx_cache_mkey(&mca_spml_ucx_ctx_default, &mkeys[0], segno, my_pe); + ucx_mkey->mem_h = mem_h; + mkeys[SPML_UCX_TRANSP_IDX].spml_context = ucx_mkey; return mkeys; error_unmap: @@ -592,29 +777,42 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys) { spml_ucx_mkey_t *ucx_mkey; map_segment_t *mem_seg; + int my_pe = oshmem_my_proc_id(); + int rc; + uint32_t segno; MCA_SPML_CALL(quiet(oshmem_ctx_default)); if (!mkeys) return OSHMEM_SUCCESS; - if (!mkeys[0].spml_context) + if (!mkeys[SPML_UCX_TRANSP_IDX].spml_context) return OSHMEM_SUCCESS; - mem_seg = memheap_find_va(mkeys[0].va_base); - ucx_mkey = (spml_ucx_mkey_t*)mkeys[0].spml_context; - + mem_seg = memheap_find_va(mkeys[SPML_UCX_TRANSP_IDX].va_base); if (OPAL_UNLIKELY(NULL == mem_seg)) { return OSHMEM_ERROR; } + segno = memheap_find_segnum(mkeys[SPML_UCX_TRANSP_IDX].va_base, my_pe); + if (MEMHEAP_SEG_INVALID == segno) { + SPML_UCX_ERROR("mca_spml_ucx_deregister failed because of invalid " + "segment number: %d\n", segno); + return OSHMEM_ERROR; + } + + ucx_mkey = (spml_ucx_mkey_t*)mkeys[SPML_UCX_TRANSP_IDX].spml_context; + if (MAP_SEGMENT_ALLOC_UCX != mem_seg->type) { ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h); } - ucp_rkey_destroy(ucx_mkey->rkey); - ucx_mkey->rkey = NULL; - if (0 < mkeys[0].len) { - ucp_rkey_buffer_release(mkeys[0].u.data); + rc = mca_spml_ucx_ctx_mkey_del(&mca_spml_ucx_ctx_default, my_pe, segno, ucx_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_del failed\n"); + return rc; + } + if (0 < mkeys[SPML_UCX_TRANSP_IDX].len) { + ucp_rkey_buffer_release(mkeys[SPML_UCX_TRANSP_IDX].u.data); } free(mkeys); @@ -674,6 +872,7 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx ucx_ctx->ucp_worker = calloc(1, sizeof(ucp_worker_h)); ucx_ctx->ucp_workers = 1; ucx_ctx->synchronized_quiet = mca_spml_ucx_ctx_default.synchronized_quiet; + ucx_ctx->strong_sync = mca_spml_ucx_ctx_default.strong_sync; params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED) { @@ -713,16 +912,10 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx for (j = 0; j < memheap_map->n_segments; j++) { mkey = &memheap_map->mem_segs[j].mkeys_cache[i][0]; - ucx_mkey = &ucx_ctx->ucp_peers[i].mkeys[j].key; - if (mkey->u.data) { - err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn, - mkey->u.data, - &ucx_mkey->rkey); - if (UCS_OK != err) { - SPML_UCX_ERROR("failed to unpack rkey"); - goto error2; - } - mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i); + rc = mca_spml_ucx_ctx_mkey_add(ucx_ctx, i, j, mkey, &ucx_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_add failed"); + goto error2; } } } @@ -815,8 +1008,9 @@ void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx) int mca_spml_ucx_get(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_addr, int src) { - void *rva; - spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_get_mkey(ctx, src, src_addr, &rva, &mca_spml_ucx); + void *rva = NULL; + spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_ctx_mkey_by_va(ctx, src, src_addr, &rva, &mca_spml_ucx); + assert(NULL != ucx_mkey); mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; #if (HAVE_DECL_UCP_GET_NBX || HAVE_DECL_UCP_GET_NB) ucs_status_ptr_t request; @@ -826,7 +1020,7 @@ int mca_spml_ucx_get(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_add #if HAVE_DECL_UCP_GET_NBX request = ucp_get_nbx(ucx_ctx->ucp_peers[src].ucp_conn, dst_addr, size, - (uint64_t)rva, ucx_mkey->rkey, &mca_spml_ucx_request_param); + (uint64_t)rva, ucx_mkey->rkey, &mca_spml_ucx_request_param_b); return opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker[0], "ucp_get_nbx"); #elif HAVE_DECL_UCP_GET_NB request = ucp_get_nb(ucx_ctx->ucp_peers[src].ucp_conn, dst_addr, size, @@ -841,9 +1035,10 @@ int mca_spml_ucx_get(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_add int mca_spml_ucx_get_nb(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_addr, int src, void **handle) { - void *rva; + void *rva = NULL; ucs_status_t status; - spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_get_mkey(ctx, src, src_addr, &rva, &mca_spml_ucx); + spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_ctx_mkey_by_va(ctx, src, src_addr, &rva, &mca_spml_ucx); + assert(NULL != ucx_mkey); mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; #if HAVE_DECL_UCP_GET_NBX ucs_status_ptr_t status_ptr; @@ -868,9 +1063,10 @@ int mca_spml_ucx_get_nb(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_ int mca_spml_ucx_get_nb_wprogress(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_addr, int src, void **handle) { unsigned int i; - void *rva; + void *rva = NULL; ucs_status_t status; - spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_get_mkey(ctx, src, src_addr, &rva, &mca_spml_ucx); + spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_ctx_mkey_by_va(ctx, src, src_addr, &rva, &mca_spml_ucx); + assert(NULL != ucx_mkey); mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; #if HAVE_DECL_UCP_GET_NBX ucs_status_ptr_t status_ptr; @@ -905,8 +1101,9 @@ int mca_spml_ucx_get_nb_wprogress(shmem_ctx_t ctx, void *src_addr, size_t size, int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_addr, int dst) { - void *rva; - spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_get_mkey(ctx, dst, dst_addr, &rva, &mca_spml_ucx); + void *rva = NULL; + spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_ctx_mkey_by_va(ctx, dst, dst_addr, &rva, &mca_spml_ucx); + assert(NULL != ucx_mkey); mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; int res; #if (HAVE_DECL_UCP_PUT_NBX || HAVE_DECL_UCP_PUT_NB) @@ -917,7 +1114,7 @@ int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_add #if HAVE_DECL_UCP_PUT_NBX request = ucp_put_nbx(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, - (uint64_t)rva, ucx_mkey->rkey, &mca_spml_ucx_request_param); + (uint64_t)rva, ucx_mkey->rkey, &mca_spml_ucx_request_param_b); res = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker[0], "ucp_put_nbx"); #elif HAVE_DECL_UCP_PUT_NB request = ucp_put_nb(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, @@ -938,8 +1135,9 @@ int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_add int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_addr, int dst, void **handle) { - void *rva; - spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_get_mkey(ctx, dst, dst_addr, &rva, &mca_spml_ucx); + void *rva = NULL; + spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_ctx_mkey_by_va(ctx, dst, dst_addr, &rva, &mca_spml_ucx); + assert(NULL != ucx_mkey); mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; ucs_status_t status; #if HAVE_DECL_UCP_PUT_NBX @@ -970,9 +1168,10 @@ int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_ int mca_spml_ucx_put_nb_wprogress(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_addr, int dst, void **handle) { unsigned int i; - void *rva; + void *rva = NULL; ucs_status_t status; - spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_get_mkey(ctx, dst, dst_addr, &rva, &mca_spml_ucx); + spml_ucx_mkey_t *ucx_mkey = mca_spml_ucx_ctx_mkey_by_va(ctx, dst, dst_addr, &rva, &mca_spml_ucx); + assert(NULL != ucx_mkey); mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; #if HAVE_DECL_UCP_PUT_NBX ucs_status_ptr_t status_ptr; @@ -1008,14 +1207,81 @@ int mca_spml_ucx_put_nb_wprogress(shmem_ctx_t ctx, void* dst_addr, size_t size, return ucx_status_to_oshmem_nb(status); } +static int mca_spml_ucx_strong_sync(shmem_ctx_t ctx) +{ + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; + ucs_status_ptr_t request; + static int flush_get_data; + unsigned i; + int ret; + int idx; +#if !(HAVE_DECL_UCP_EP_FLUSH_NBX || HAVE_DECL_UCP_EP_FLUSH_NB) + ucs_status_t status; +#endif + + for (i = 0; i < ucx_ctx->put_proc_count; i++) { + idx = ucx_ctx->put_proc_indexes[i]; + + switch (ucx_ctx->strong_sync) { + case SPML_UCX_STRONG_ORDERING_NONE: + case SPML_UCX_STRONG_ORDERING_GETNB: + ret = mca_spml_ucx_get_nb(ctx, + ucx_ctx->ucp_peers[idx].mkeys[SPML_UCX_SERVICE_SEG]->super.super.va_base, + sizeof(flush_get_data), &flush_get_data, idx, NULL); + break; + case SPML_UCX_STRONG_ORDERING_GET: + ret = mca_spml_ucx_get(ctx, + ucx_ctx->ucp_peers[idx].mkeys[SPML_UCX_SERVICE_SEG]->super.super.va_base, + sizeof(flush_get_data), &flush_get_data, idx); + break; +#if HAVE_DECL_UCP_EP_FLUSH_NBX + case SPML_UCX_STRONG_ORDERING_FLUSH: + request = ucp_ep_flush_nbx(ucx_ctx->ucp_peers[idx].ucp_conn, + &mca_spml_ucx_request_param_b); + ret = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker[0], "ucp_flush_nbx"); +#elif HAVE_DECL_UCP_EP_FLUSH_NB + request = ucp_ep_flush_nb(ucx_ctx->ucp_peers[idx].ucp_conn, 0, opal_common_ucx_empty_complete_cb); + ret = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker[0], "ucp_flush_nb"); +#else + status = ucp_ep_flush(ucx_ctx->ucp_peers[idx].ucp_conn); + ret = (status == UCS_OK) ? OPAL_SUCCESS : OPAL_ERROR; +#endif + break; + default: + /* unknown mode */ + ret = OMPI_SUCCESS; + break; + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + oshmem_shmem_abort(-1); + return ret; + } + + opal_bitmap_clear_bit(&ucx_ctx->put_op_bitmap, idx); + } + + ucx_ctx->put_proc_count = 0; + return OSHMEM_SUCCESS; +} + int mca_spml_ucx_fence(shmem_ctx_t ctx) { + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; ucs_status_t err; + int ret; unsigned int i = 0; - mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; opal_atomic_wmb(); + if (ucx_ctx->strong_sync != SPML_UCX_STRONG_ORDERING_NONE) { + ret = mca_spml_ucx_strong_sync(ctx); + if (ret != OSHMEM_SUCCESS) { + oshmem_shmem_abort(-1); + return ret; + } + } + for (i=0; i < ucx_ctx->ucp_workers; i++) { if (ucx_ctx->ucp_worker[i] != NULL) { err = ucp_worker_fence(ucx_ctx->ucp_worker[i]); @@ -1031,26 +1297,16 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx) int mca_spml_ucx_quiet(shmem_ctx_t ctx) { - int flush_get_data; int ret; unsigned i; - int idx; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; - if (mca_spml_ucx.synchronized_quiet) { - for (i = 0; i < ucx_ctx->put_proc_count; i++) { - idx = ucx_ctx->put_proc_indexes[i]; - ret = mca_spml_ucx_get_nb(ctx, - ucx_ctx->ucp_peers[idx].mkeys->super.super.va_base, - sizeof(flush_get_data), &flush_get_data, idx, NULL); - if (OMPI_SUCCESS != ret) { - oshmem_shmem_abort(-1); - return ret; - } - - opal_bitmap_clear_bit(&ucx_ctx->put_op_bitmap, idx); + if (ucx_ctx->synchronized_quiet) { + ret = mca_spml_ucx_strong_sync(ctx); + if (ret != OSHMEM_SUCCESS) { + oshmem_shmem_abort(-1); + return ret; } - ucx_ctx->put_proc_count = 0; } opal_atomic_wmb(); @@ -1210,3 +1466,217 @@ int mca_spml_ucx_put_all_nb(void *dest, const void *source, size_t size, long *c return OSHMEM_SUCCESS; } + +/* This routine is not implemented */ +int mca_spml_ucx_put_signal(shmem_ctx_t ctx, void* dst_addr, size_t size, void* + src_addr, uint64_t *sig_addr, uint64_t signal, int sig_op, int dst) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_put_signal_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, + void* src_addr, uint64_t *sig_addr, uint64_t signal, int sig_op, int + dst) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +void mca_spml_ucx_wait_until_all(void *ivars, int cmp, void + *cmp_value, size_t nelems, const int *status, int datatype) +{ + return ; +} + +/* This routine is not implemented */ +size_t mca_spml_ucx_wait_until_any(void *ivars, int cmp, void + *cmp_value, size_t nelems, const int *status, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +size_t mca_spml_ucx_wait_until_some(void *ivars, int cmp, void + *cmp_value, size_t nelems, size_t *indices, const int *status, int + datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +void mca_spml_ucx_wait_until_all_vector(void *ivars, int cmp, void + *cmp_values, size_t nelems, const int *status, int datatype) +{ + return ; +} + +/* This routine is not implemented */ +size_t mca_spml_ucx_wait_until_any_vector(void *ivars, int cmp, void + *cmp_value, size_t nelems, const int *status, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +size_t mca_spml_ucx_wait_until_some_vector(void *ivars, int cmp, void + *cmp_value, size_t nelems, size_t *indices, const int *status, int + datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_test_all(void *ivars, int cmp, void *cmp_value, + size_t nelems, const int *status, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +size_t mca_spml_ucx_test_any(void *ivars, int cmp, void *cmp_value, + size_t nelems, const int *status, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +size_t mca_spml_ucx_test_some(void *ivars, int cmp, void *cmp_value, + size_t nelems, size_t *indices, const int *status, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_test_all_vector(void *ivars, int cmp, void + *cmp_values, size_t nelems, const int *status, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +size_t mca_spml_ucx_test_any_vector(void *ivars, int cmp, void + *cmp_values, size_t nelems, const int *status, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +size_t mca_spml_ucx_test_some_vector(void *ivars, int cmp, void + *cmp_values, size_t nelems, size_t *indices, const int *status, int + datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_sync(shmem_team_t team) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_my_pe(shmem_team_t team) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_n_pes(shmem_team_t team) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_get_config(shmem_team_t team, long config_mask, + shmem_team_config_t *config) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_translate_pe(shmem_team_t src_team, int src_pe, + shmem_team_t dest_team) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_split_strided(shmem_team_t parent_team, int start, int + stride, int size, const shmem_team_config_t *config, long config_mask, + shmem_team_t *new_team) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_split_2d(shmem_team_t parent_team, int xrange, const + shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t + *xaxis_team, const shmem_team_config_t *yaxis_config, long yaxis_mask, + shmem_team_t *yaxis_team) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_destroy(shmem_team_t team) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_get(shmem_ctx_t ctx, shmem_team_t *team) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_create_ctx(shmem_team_t team, long options, shmem_ctx_t *ctx) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_alltoall(shmem_team_t team, void + *dest, const void *source, size_t nelems, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_alltoalls(shmem_team_t team, void + *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, + int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_broadcast(shmem_team_t team, void + *dest, const void *source, size_t nelems, int PE_root, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_collect(shmem_team_t team, void + *dest, const void *source, size_t nelems, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_fcollect(shmem_team_t team, void + *dest, const void *source, size_t nelems, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +/* This routine is not implemented */ +int mca_spml_ucx_team_reduce(shmem_team_t team, void + *dest, const void *source, size_t nreduce, int operation, int datatype) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + + diff --git a/oshmem/mca/spml/ucx/spml_ucx.h b/oshmem/mca/spml/ucx/spml_ucx.h index b2802e7161a..6c79a9a08c8 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.h +++ b/oshmem/mca/spml/ucx/spml_ucx.h @@ -44,6 +44,16 @@ BEGIN_C_DECLS #define SPML_UCX_ASSERT MCA_COMMON_UCX_ASSERT #define SPML_UCX_ERROR MCA_COMMON_UCX_ERROR #define SPML_UCX_VERBOSE MCA_COMMON_UCX_VERBOSE +#define SPML_UCX_TRANSP_IDX 0 +#define SPML_UCX_TRANSP_CNT 1 +#define SPML_UCX_SERVICE_SEG 0 + +enum { + SPML_UCX_STRONG_ORDERING_NONE = 0, /* don't use strong ordering */ + SPML_UCX_STRONG_ORDERING_GETNB = 1, /* use non-blocking read to provide ordering */ + SPML_UCX_STRONG_ORDERING_GET = 2, /* use blocking read to provide ordering*/ + SPML_UCX_STRONG_ORDERING_FLUSH = 3 /* flush EP to provide ordering */ +}; /** * UCX SPML module @@ -62,7 +72,8 @@ typedef struct spml_ucx_cached_mkey spml_ucx_cached_mkey_t; struct ucp_peer { ucp_ep_h ucp_conn; - spml_ucx_cached_mkey_t mkeys[MCA_MEMHEAP_MAX_SEGMENTS]; + spml_ucx_cached_mkey_t **mkeys; + size_t mkeys_cnt; }; typedef struct ucp_peer ucp_peer_t; @@ -76,6 +87,7 @@ struct mca_spml_ucx_ctx { int *put_proc_indexes; unsigned put_proc_count; bool synchronized_quiet; + int strong_sync; }; typedef struct mca_spml_ucx_ctx mca_spml_ucx_ctx_t; @@ -110,7 +122,6 @@ struct mca_spml_ucx { mca_spml_ucx_ctx_t *aux_ctx; pthread_spinlock_t async_lock; int aux_refcnt; - bool synchronized_quiet; unsigned long nb_progress_thresh_global; unsigned long nb_put_progress_thresh; unsigned long nb_get_progress_thresh; @@ -186,11 +197,11 @@ extern int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys); extern void mca_spml_ucx_memuse_hook(void *addr, size_t length); extern void mca_spml_ucx_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id); -extern void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey); +extern void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey, int pe); extern void *mca_spml_ucx_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *, int pe); -extern int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs); -extern int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs); +extern int mca_spml_ucx_add_procs(oshmem_group_t* group, size_t nprocs); +extern int mca_spml_ucx_del_procs(oshmem_group_t* group, size_t nprocs); extern int mca_spml_ucx_fence(shmem_ctx_t ctx); extern int mca_spml_ucx_quiet(shmem_ctx_t ctx); extern int spml_ucx_default_progress(void); @@ -200,6 +211,89 @@ void mca_spml_ucx_async_cb(int fd, short event, void *cbdata); int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs); int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx); +int mca_spml_ucx_peer_mkey_cache_add(ucp_peer_t *ucp_peer, int index); +int mca_spml_ucx_peer_mkey_cache_del(ucp_peer_t *ucp_peer, int segno); +void mca_spml_ucx_peer_mkey_cache_release(ucp_peer_t *ucp_peer); +void mca_spml_ucx_peer_mkey_cache_init(mca_spml_ucx_ctx_t *ucx_ctx, int pe); + +extern int mca_spml_ucx_put_signal(shmem_ctx_t ctx, void* dst_addr, size_t size, void* + src_addr, uint64_t *sig_addr, uint64_t signal, int sig_op, int dst); + +extern int mca_spml_ucx_put_signal_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, + void* src_addr, uint64_t *sig_addr, uint64_t signal, int sig_op, int + dst); +extern void mca_spml_ucx_wait_until_all(void *ivars, int cmp, void + *cmp_value, size_t nelems, const int *status, int datatype); +extern size_t mca_spml_ucx_wait_until_any(void *ivars, int cmp, void + *cmp_value, size_t nelems, const int *status, int datatype); +extern size_t mca_spml_ucx_wait_until_some(void *ivars, int cmp, void + *cmp_value, size_t nelems, size_t *indices, const int *status, int + datatype); +extern void mca_spml_ucx_wait_until_all_vector(void *ivars, int cmp, void + *cmp_values, size_t nelems, const int *status, int datatype); +extern size_t mca_spml_ucx_wait_until_any_vector(void *ivars, int cmp, void + *cmp_value, size_t nelems, const int *status, int datatype); +extern size_t mca_spml_ucx_wait_until_some_vector(void *ivars, int cmp, void + *cmp_value, size_t nelems, size_t *indices, const int *status, int + datatype); +extern int mca_spml_ucx_test_all(void *ivars, int cmp, void *cmp_value, + size_t nelems, const int *status, int datatype); +extern size_t mca_spml_ucx_test_any(void *ivars, int cmp, void *cmp_value, + size_t nelems, const int *status, int datatype); +extern size_t mca_spml_ucx_test_some(void *ivars, int cmp, void *cmp_value, + size_t nelems, size_t *indices, const int *status, int datatype); +extern int mca_spml_ucx_test_all_vector(void *ivars, int cmp, void + *cmp_values, size_t nelems, const int *status, int datatype); +extern size_t mca_spml_ucx_test_any_vector(void *ivars, int cmp, void + *cmp_values, size_t nelems, const int *status, int datatype); +extern size_t mca_spml_ucx_test_some_vector(void *ivars, int cmp, void + *cmp_values, size_t nelems, size_t *indices, const int *status, int + datatype); +extern int mca_spml_ucx_team_sync(shmem_team_t team); +extern int mca_spml_ucx_team_my_pe(shmem_team_t team); +extern int mca_spml_ucx_team_n_pes(shmem_team_t team); +extern int mca_spml_ucx_team_get_config(shmem_team_t team, long config_mask, + shmem_team_config_t *config); +extern int mca_spml_ucx_team_translate_pe(shmem_team_t src_team, int src_pe, + shmem_team_t dest_team); +extern int mca_spml_ucx_team_split_strided(shmem_team_t parent_team, int start, int + stride, int size, const shmem_team_config_t *config, long config_mask, + shmem_team_t *new_team); +extern int mca_spml_ucx_team_split_2d(shmem_team_t parent_team, int xrange, const + shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t + *xaxis_team, const shmem_team_config_t *yaxis_config, long yaxis_mask, + shmem_team_t *yaxis_team); +extern int mca_spml_ucx_team_destroy(shmem_team_t team); +extern int mca_spml_ucx_team_get(shmem_ctx_t ctx, shmem_team_t *team); +extern int mca_spml_ucx_team_create_ctx(shmem_team_t team, long options, shmem_ctx_t *ctx); +extern int mca_spml_ucx_team_alltoall(shmem_team_t team, void + *dest, const void *source, size_t nelems, int datatype); +extern int mca_spml_ucx_team_alltoalls(shmem_team_t team, void + *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, + int datatype); +extern int mca_spml_ucx_team_broadcast(shmem_team_t team, void + *dest, const void *source, size_t nelems, int PE_root, int datatype); +extern int mca_spml_ucx_team_collect(shmem_team_t team, void + *dest, const void *source, size_t nelems, int datatype); +extern int mca_spml_ucx_team_fcollect(shmem_team_t team, void + *dest, const void *source, size_t nelems, int datatype); +extern int mca_spml_ucx_team_reduce(shmem_team_t team, void + *dest, const void *source, size_t nreduce, int operation, int datatype); + + +static inline int +mca_spml_ucx_peer_mkey_get(ucp_peer_t *ucp_peer, int index, spml_ucx_cached_mkey_t **out_rmkey) +{ + *out_rmkey = NULL; + if (OPAL_UNLIKELY((index >= (int)ucp_peer->mkeys_cnt) || + (MCA_MEMHEAP_MAX_SEGMENTS <= index) || (0 > index))) { + SPML_UCX_ERROR("Failed to get mkey for segment: bad index = %d, MAX = %d, cached mkeys count: %zu", + index, MCA_MEMHEAP_MAX_SEGMENTS, ucp_peer->mkeys_cnt); + return OSHMEM_ERR_BAD_PARAM; + } + *out_rmkey = ucp_peer->mkeys[index]; + return OSHMEM_SUCCESS; +} static inline void mca_spml_ucx_aux_lock(void) { @@ -215,26 +309,44 @@ static inline void mca_spml_ucx_aux_unlock(void) } } -static inline void mca_spml_ucx_cache_mkey(mca_spml_ucx_ctx_t *ucx_ctx, - sshmem_mkey_t *mkey, uint32_t segno, int dst_pe) -{ - ucp_peer_t *peer; +int mca_spml_ucx_ctx_mkey_new(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segno, spml_ucx_mkey_t **mkey); +int mca_spml_ucx_ctx_mkey_cache(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe); +int mca_spml_ucx_ctx_mkey_add(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segno, sshmem_mkey_t *mkey, spml_ucx_mkey_t **ucx_mkey); +int mca_spml_ucx_ctx_mkey_del(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segno, spml_ucx_mkey_t *ucx_mkey); - peer = &(ucx_ctx->ucp_peers[dst_pe]); - mkey_segment_init(&peer->mkeys[segno].super, mkey, segno); +static inline int +mca_spml_ucx_ctx_mkey_by_seg(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segno, spml_ucx_mkey_t **mkey) +{ + ucp_peer_t *ucp_peer; + spml_ucx_cached_mkey_t *ucx_cached_mkey; + int rc; + ucp_peer = &(ucx_ctx->ucp_peers[pe]); + rc = mca_spml_ucx_peer_mkey_get(ucp_peer, segno, &ucx_cached_mkey); + if (OSHMEM_SUCCESS != rc) { + return rc; + } + *mkey = &(ucx_cached_mkey->key); + return OSHMEM_SUCCESS; } static inline spml_ucx_mkey_t * -mca_spml_ucx_get_mkey(shmem_ctx_t ctx, int pe, void *va, void **rva, mca_spml_ucx_t* module) +mca_spml_ucx_ctx_mkey_by_va(shmem_ctx_t ctx, int pe, void *va, void **rva, mca_spml_ucx_t* module) { - spml_ucx_cached_mkey_t *mkey; + spml_ucx_cached_mkey_t **mkey; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; + size_t i; mkey = ucx_ctx->ucp_peers[pe].mkeys; - mkey = (spml_ucx_cached_mkey_t *)map_segment_find_va(&mkey->super.super, sizeof(*mkey), va); - assert(mkey != NULL); - *rva = map_segment_va2rva(&mkey->super, va); - return &mkey->key; + for (i = 0; i < ucx_ctx->ucp_peers[pe].mkeys_cnt; i++) { + if (NULL == mkey[i]) { + continue; + } + if (OPAL_LIKELY(map_segment_is_va_in(&mkey[i]->super.super, va))) { + *rva = map_segment_va2rva(&mkey[i]->super, va); + return &mkey[i]->key; + } + } + return NULL; } static inline int ucx_status_to_oshmem(ucs_status_t status) @@ -255,9 +367,15 @@ static inline int ucx_status_to_oshmem_nb(ucs_status_t status) #endif } +static inline int mca_spml_ucx_is_strong_ordering(mca_spml_ucx_ctx_t *ctx) +{ + return (ctx->strong_sync != SPML_UCX_STRONG_ORDERING_NONE) || + ctx->synchronized_quiet; +} + static inline void mca_spml_ucx_remote_op_posted(mca_spml_ucx_ctx_t *ctx, int dst) { - if (OPAL_UNLIKELY(ctx->synchronized_quiet)) { + if (OPAL_UNLIKELY(mca_spml_ucx_is_strong_ordering(ctx))) { if (!opal_bitmap_is_set_bit(&ctx->put_op_bitmap, dst)) { ctx->put_proc_indexes[ctx->put_proc_count++] = dst; opal_bitmap_set_bit(&ctx->put_op_bitmap, dst); @@ -271,4 +389,3 @@ static inline void mca_spml_ucx_remote_op_posted(mca_spml_ucx_ctx_t *ctx, int ds END_C_DECLS #endif - diff --git a/oshmem/mca/spml/ucx/spml_ucx_component.c b/oshmem/mca/spml/ucx/spml_ucx_component.c index eb40076a706..860736d3dde 100644 --- a/oshmem/mca/spml/ucx/spml_ucx_component.c +++ b/oshmem/mca/spml/ucx/spml_ucx_component.c @@ -161,6 +161,11 @@ static int mca_spml_ucx_component_register(void) "Use synchronized quiet on shmem_quiet or shmem_barrier_all operations", &mca_spml_ucx_ctx_default.synchronized_quiet); + mca_spml_ucx_param_register_int("strong_sync", 0, + "Use strong synchronization on shmem_quiet, shmem_fence or shmem_barrier_all operations: " + "0 - don't do strong synchronization, 1 - use non blocking get, 2 - use blocking get, 3 - use flush operation", + &mca_spml_ucx_ctx_default.strong_sync); + mca_spml_ucx_param_register_ulong("nb_progress_thresh_global", 0, "Number of nb_put or nb_get operations before ucx progress is triggered. Disabled by default (0). Setting this value will override nb_put/get_progress_thresh.", &mca_spml_ucx.nb_progress_thresh_global); @@ -383,7 +388,14 @@ mca_spml_ucx_component_init(int* priority, if (OSHMEM_SUCCESS != spml_ucx_init()) return NULL ; + if ((mca_spml_ucx_ctx_default.strong_sync < SPML_UCX_STRONG_ORDERING_NONE) || + (mca_spml_ucx_ctx_default.strong_sync > SPML_UCX_STRONG_ORDERING_FLUSH)) { + SPML_UCX_ERROR("incorrect value of strong_sync parameter: %d", + mca_spml_ucx_ctx_default.strong_sync); + } + SPML_UCX_VERBOSE(50, "*** ucx initialized ****"); + return &mca_spml_ucx.super; } @@ -391,13 +403,23 @@ static void _ctx_cleanup(mca_spml_ucx_ctx_t *ctx) { int i, j, nprocs = oshmem_num_procs(); opal_common_ucx_del_proc_t *del_procs; + spml_ucx_mkey_t *ucx_mkey; + int rc; del_procs = malloc(sizeof(*del_procs) * nprocs); for (i = 0; i < nprocs; ++i) { for (j = 0; j < memheap_map->n_segments; j++) { - if (ctx->ucp_peers[i].mkeys[j].key.rkey != NULL) { - ucp_rkey_destroy(ctx->ucp_peers[i].mkeys[j].key.rkey); + rc = mca_spml_ucx_ctx_mkey_by_seg(ctx, i, j, &ucx_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_by_seg failed"); + } else { + if (ucx_mkey->rkey != NULL) { + rc = mca_spml_ucx_ctx_mkey_del(ctx, i, j, ucx_mkey); + if (OSHMEM_SUCCESS != rc) { + SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_del failed"); + } + } } } diff --git a/oshmem/proc/proc.c b/oshmem/proc/proc.c index 6e3446c9abe..f50cd2b1117 100644 --- a/oshmem/proc/proc.c +++ b/oshmem/proc/proc.c @@ -31,13 +31,53 @@ static opal_mutex_t oshmem_proc_lock; +static opal_bitmap_t _oshmem_local_vpids; /* Track the vpids in local node */ +int oshmem_proc_init_set_local_vpids() +{ + opal_process_name_t wildcard_rank; + int ret = OMPI_SUCCESS; + char *val = NULL; + + ret = opal_bitmap_init(&_oshmem_local_vpids, ompi_comm_size(oshmem_comm_world)); + if (OSHMEM_SUCCESS != ret) { + return ret; + } + /* Add all local peers first */ + wildcard_rank.jobid = OMPI_PROC_MY_NAME->jobid; + wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid; + /* retrieve the local peers */ + OPAL_MODEX_RECV_VALUE(ret, PMIX_LOCAL_PEERS, + &wildcard_rank, &val, PMIX_STRING); + + if (OPAL_SUCCESS == ret && NULL != val) { + char **peers = opal_argv_split(val, ','); + int i; + free(val); + for (i=0; NULL != peers[i]; i++) { + ompi_vpid_t local_rank = strtoul(peers[i], NULL, 10); + opal_bitmap_set_bit(&_oshmem_local_vpids, local_rank); + } + opal_argv_free(peers); + } + return OSHMEM_SUCCESS; +} + +bool oshmem_proc_on_local_node(int pe) +{ + return opal_bitmap_is_set_bit(&_oshmem_local_vpids, pe); +} int oshmem_proc_init(void) { + int ret; OBJ_CONSTRUCT(&oshmem_proc_lock, opal_mutex_t); + OBJ_CONSTRUCT(&_oshmem_local_vpids, opal_bitmap_t); + + ret = oshmem_proc_init_set_local_vpids(); + if(OSHMEM_SUCCESS != ret) { + return ret; + } - /* check oshmem_proc_data_t can fit within ompi_proc_t padding */ - assert(sizeof(oshmem_proc_data_t) <= OMPI_PROC_PADDING_SIZE); /* check ompi_proc_t padding is aligned on a pointer */ assert(0 == (offsetof(ompi_proc_t, padding) & (sizeof(char *)-1))); @@ -139,6 +179,7 @@ int oshmem_proc_group_finalize(void) } } + OBJ_DESTRUCT(&_oshmem_local_vpids); OBJ_DESTRUCT(&oshmem_group_array); oshmem_group_cache_destroy(); @@ -150,8 +191,6 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz int cur_pe, count_pe; int i; oshmem_group_t* group = NULL; - ompi_proc_t** proc_array = NULL; - ompi_proc_t* proc = NULL; assert(oshmem_proc_local()); @@ -171,52 +210,28 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz OPAL_THREAD_LOCK(&oshmem_proc_lock); /* allocate an array */ - proc_array = (ompi_proc_t**) malloc(pe_size * sizeof(ompi_proc_t*)); - if (NULL == proc_array) { - OBJ_RELEASE(group); - OPAL_THREAD_UNLOCK(&oshmem_proc_lock); - return NULL ; + group->proc_vpids = (opal_vpid_t *) malloc(pe_size * sizeof(group->proc_vpids[0])); + if (NULL == group->proc_vpids) { + return NULL; } group->my_pe = oshmem_proc_pe(oshmem_proc_local()); group->is_member = 0; for (i = 0 ; i < ompi_comm_size(oshmem_comm_world) ; i++) { - proc = oshmem_proc_find(i); - if (NULL == proc) { - opal_output(0, - "Error: Can not find proc object for pe = %d", i); - free(proc_array); - OBJ_RELEASE(group); - OPAL_THREAD_UNLOCK(&oshmem_proc_lock); - return NULL; - } if (count_pe >= (int) pe_size) { break; } else if ((cur_pe >= pe_start) && ((pe_stride == 0) || (((cur_pe - pe_start) % pe_stride) == 0))) { - proc_array[count_pe++] = proc; - if (oshmem_proc_pe(proc) == group->my_pe) + group->proc_vpids[count_pe] = i; + count_pe ++; + if (i == group->my_pe) group->is_member = 1; } cur_pe++; } - group->proc_array = proc_array; group->proc_count = (int) count_pe; group->ompi_comm = NULL; - - /* Prepare peers list */ - OBJ_CONSTRUCT(&(group->peer_list), opal_list_t); - { - opal_namelist_t *peer = NULL; - - for (i = 0; i < group->proc_count; i++) { - peer = OBJ_NEW(opal_namelist_t); - peer->name.jobid = OSHMEM_PROC_JOBID(group->proc_array[i]); - peer->name.vpid = OSHMEM_PROC_VPID(group->proc_array[i]); - opal_list_append(&(group->peer_list), &peer->super); - } - } group->id = opal_pointer_array_add(&oshmem_group_array, group); memset(&group->g_scoll, 0, sizeof(mca_scoll_base_group_scoll_t)); @@ -251,20 +266,8 @@ oshmem_proc_group_destroy_internal(oshmem_group_t* group, int scoll_unselect) mca_scoll_base_group_unselect(group); } - /* Destroy proc array */ - if (group->proc_array) { - free(group->proc_array); - } - - /* Destroy peer list */ - { - opal_list_item_t *item; - - while (NULL != (item = opal_list_remove_first(&(group->peer_list)))) { - /* destruct the item (we constructed it), then free the memory chunk */ - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&(group->peer_list)); + if (group->proc_vpids) { + free(group->proc_vpids); } /* reset the oshmem_group_array entry - make sure that the diff --git a/oshmem/proc/proc.h b/oshmem/proc/proc.h index 0660b691231..95e7ff60e06 100644 --- a/oshmem/proc/proc.h +++ b/oshmem/proc/proc.h @@ -19,7 +19,9 @@ #include "oshmem/constants.h" #include "opal/class/opal_list.h" +#include "opal/class/opal_bitmap.h" #include "opal/util/proc.h" +#include "opal/util/argv.h" #include "opal/mca/hwloc/hwloc-internal.h" #include "ompi/proc/proc.h" @@ -37,19 +39,6 @@ struct oshmem_group_t; #define OSHMEM_PE_INVALID (-1) -/* This struct will be copied into the padding field of an ompi_proc_t - * so the size of oshmem_proc_data_t must be less or equal than - * OMPI_PROC_PADDING_SIZE */ -struct oshmem_proc_data_t { - char * transport_ids; - int num_transports; -}; - -typedef struct oshmem_proc_data_t oshmem_proc_data_t; - -#define OSHMEM_PROC_DATA(proc) \ - ((oshmem_proc_data_t *)(proc)->padding) - /** * Group of Open SHMEM processes structure * @@ -61,9 +50,7 @@ struct oshmem_group_t { int my_pe; int proc_count; /**< number of processes in group */ int is_member; /* true if my_pe is part of the group, participate in collectives */ - struct ompi_proc_t **proc_array; /**< list of pointers to ompi_proc_t structures - for each process in the group */ - opal_list_t peer_list; + opal_vpid_t *proc_vpids; /* vpids of each process in group */ /* Collectives module interface and data */ mca_scoll_base_group_scoll_t g_scoll; @@ -149,14 +136,21 @@ static inline ompi_proc_t *oshmem_proc_find(int pe) return oshmem_proc_for_find(name); } +static inline int oshmem_proc_pe_vpid(oshmem_group_t *group, int pe) +{ + if (OPAL_LIKELY(pe < group->proc_count)) { + return (group->proc_vpids[pe]); + } else { + return -1; + } +} + static inline int oshmem_proc_pe(ompi_proc_t *proc) { return (proc ? (int) ((ompi_process_name_t*)&proc->super.proc_name)->vpid : -1); } -#define OSHMEM_PROC_JOBID(PROC) (((ompi_process_name_t*)&((PROC)->super.proc_name))->jobid) -#define OSHMEM_PROC_VPID(PROC) (((ompi_process_name_t*)&((PROC)->super.proc_name))->vpid) - +bool oshmem_proc_on_local_node(int pe); /** * Initialize the OSHMEM process predefined groups * @@ -232,40 +226,6 @@ oshmem_proc_group_create_nofail(int pe_start, int pe_stride, int pe_size) */ OSHMEM_DECLSPEC void oshmem_proc_group_destroy(oshmem_group_t* group); -static inline ompi_proc_t *oshmem_proc_group_all(int pe) -{ - return oshmem_group_all->proc_array[pe]; -} - -static inline ompi_proc_t *oshmem_proc_group_find(oshmem_group_t* group, - int pe) -{ - int i = 0; - ompi_proc_t* proc = NULL; - - if (OPAL_LIKELY(group)) { - if (OPAL_LIKELY(group == oshmem_group_all)) { - /* To improve performance use direct index. It is feature of oshmem_group_all */ - proc = group->proc_array[pe]; - } else { - for (i = 0; i < group->proc_count; i++) { - if (pe == oshmem_proc_pe(group->proc_array[i])) { - proc = group->proc_array[i]; - break; - } - } - } - } else { - ompi_process_name_t name; - - name.jobid = OMPI_PROC_MY_NAME->jobid; - name.vpid = pe; - proc = oshmem_proc_for_find(name); - } - - return proc; -} - static inline int oshmem_proc_group_find_id(oshmem_group_t* group, int pe) { int i = 0; @@ -273,7 +233,7 @@ static inline int oshmem_proc_group_find_id(oshmem_group_t* group, int pe) if (group) { for (i = 0; i < group->proc_count; i++) { - if (pe == oshmem_proc_pe(group->proc_array[i])) { + if (pe == oshmem_proc_pe_vpid(group, i)) { id = i; break; } @@ -299,22 +259,6 @@ static inline int oshmem_my_proc_id(void) return oshmem_group_self->my_pe; } -static inline int oshmem_get_transport_id(int pe) -{ - ompi_proc_t *proc; - - proc = oshmem_proc_group_find(oshmem_group_all, pe); - - return (int) OSHMEM_PROC_DATA(proc)->transport_ids[0]; -} - -static inline int oshmem_get_transport_count(int pe) -{ - ompi_proc_t *proc; - proc = oshmem_proc_group_find(oshmem_group_all, pe); - return OSHMEM_PROC_DATA(proc)->num_transports; -} - END_C_DECLS #endif /* OSHMEM_PROC_PROC_H */ diff --git a/oshmem/runtime/oshmem_shmem_finalize.c b/oshmem/runtime/oshmem_shmem_finalize.c index 7e979b8a62e..1950e20fdd1 100644 --- a/oshmem/runtime/oshmem_shmem_finalize.c +++ b/oshmem/runtime/oshmem_shmem_finalize.c @@ -53,7 +53,7 @@ #include "oshmem/shmem/shmem_lock.h" #include "oshmem/runtime/oshmem_shmem_preconnect.h" -extern int oshmem_shmem_globalexit_status; +extern int oshmem_shmem_inglobalexit; static int _shmem_finalize(void); @@ -79,7 +79,7 @@ int oshmem_shmem_finalize(void) if ((OSHMEM_SUCCESS == ret) && (state >= OMPI_MPI_STATE_INIT_COMPLETED && state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) && - oshmem_shmem_globalexit_status == 0) { + (oshmem_shmem_inglobalexit == 0)) { PMPI_Comm_free(&oshmem_comm_world); ret = ompi_mpi_finalize(); } @@ -126,7 +126,7 @@ static int _shmem_finalize(void) if (OSHMEM_SUCCESS != (ret = - MCA_SPML_CALL(del_procs(oshmem_group_all->proc_array, oshmem_group_all->proc_count)))) { + MCA_SPML_CALL(del_procs(oshmem_group_all, oshmem_group_all->proc_count)))) { return ret; } diff --git a/oshmem/runtime/oshmem_shmem_init.c b/oshmem/runtime/oshmem_shmem_init.c index 355d0995b05..ef2714198ba 100644 --- a/oshmem/runtime/oshmem_shmem_init.c +++ b/oshmem/runtime/oshmem_shmem_init.c @@ -65,12 +65,6 @@ #include #endif -#if OPAL_CC_USE_PRAGMA_IDENT -#pragma ident OMPI_IDENT_STRING -#elif OPAL_CC_USE_IDENT -#ident OSHMEM_IDENT_STRING -#endif - /* * WHAT: add thread for invoking opal_progress() function * WHY: SHMEM based on current ompi/trunk (by the time of integrating into Open MPI) @@ -101,6 +95,9 @@ shmem_internal_mutex_t shmem_internal_mutex_alloc = {{0}}; shmem_ctx_t oshmem_ctx_default = NULL; +shmem_team_t oshmem_team_shared = NULL; +shmem_team_t oshmem_team_world = NULL; + static int _shmem_init(int argc, char **argv, int requested, int *provided); #if OSHMEM_OPAL_THREAD_ENABLE @@ -347,7 +344,7 @@ static int _shmem_init(int argc, char **argv, int requested, int *provided) OPAL_TIMING_ENV_NEXT(timing, "MCA_SPML_CALL(enable())"); ret = - MCA_SPML_CALL(add_procs(oshmem_group_all->proc_array, oshmem_group_all->proc_count)); + MCA_SPML_CALL(add_procs(oshmem_group_all, oshmem_group_all->proc_count)); if (OSHMEM_SUCCESS != ret) { error = "SPML add procs failed"; goto error; diff --git a/oshmem/runtime/runtime.h b/oshmem/runtime/runtime.h index 7f83fdc3e0d..3bd6e4e794b 100644 --- a/oshmem/runtime/runtime.h +++ b/oshmem/runtime/runtime.h @@ -151,6 +151,8 @@ OSHMEM_DECLSPEC int oshmem_shmem_register_params(void); #if OSHMEM_PARAM_CHECK == 1 + + #define RUNTIME_CHECK_ERROR(...) \ do { \ fprintf(stderr, "[%s]%s[%s:%d:%s] ", \ @@ -160,6 +162,15 @@ OSHMEM_DECLSPEC int oshmem_shmem_register_params(void); fprintf(stderr, __VA_ARGS__); \ } while(0); +/* check if this routine is implemented. Can be used for routines that do + * not return error code. */ +#define RUNTIME_CHECK_IMPL_RC(x) \ + if (x <= -1) \ + { \ + int _rc = x; \ + RUNTIME_CHECK_ERROR("Internal error is appeared rc = %d\n", (_rc)); \ + } + /** * Check if SHMEM API generates internal error return code * Note: most API does not return error code @@ -222,6 +233,7 @@ OSHMEM_DECLSPEC int oshmem_shmem_register_params(void); #define RUNTIME_CHECK_ADDR(x) #define RUNTIME_CHECK_ADDR_SIZE(x,s) #define RUNTIME_CHECK_WITH_MEMHEAP_SIZE(x) +#define RUNTIME_CHECK_IMPL_RC(x) #endif /* OSHMEM_PARAM_CHECK */ diff --git a/oshmem/shmem/c/Makefile.am b/oshmem/shmem/c/Makefile.am index 3acd7e400d0..d2c152073c0 100644 --- a/oshmem/shmem/c/Makefile.am +++ b/oshmem/shmem/c/Makefile.am @@ -2,6 +2,8 @@ # Copyright (c) 2013-2016 Mellanox Technologies, Inc. # All rights reserved # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2021 Amazon.com, Inc. or its affiliates. +# All Rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -10,10 +12,6 @@ # -if OSHMEM_PROFILING - SUBDIRS = profile -endif - OSHMEM_AUX_SOURCES = \ shmem_lock.c @@ -70,23 +68,38 @@ OSHMEM_API_SOURCES = \ shmem_set_lock.c \ shmem_test_lock.c \ shmem_global_exit.c \ + shmem_cswap_nb.c \ + shmem_fadd_nb.c \ + shmem_fand_nb.c \ + shmem_fetch_nb.c \ + shmem_finc_nb.c \ + shmem_for_nb.c \ + shmem_fxor_nb.c \ + shmem_pcontrol.c \ + shmem_put_signal.c \ + shmem_put_signal_nb.c \ + shmem_swap_nb.c \ + shmem_team.c \ + shmem_wait_ivars.c \ + shmem_test_ivars.c \ shmem_info.c -AM_CPPFLAGS = -DOSHMEM_PROFILING=0 - noinst_LTLIBRARIES = if PROJECT_OSHMEM # Only build if we're building OSHMEM noinst_LTLIBRARIES += liboshmem_c.la endif -headers = +headers = profile-defines.h # In case when OSHMEM_PROFILING enabled # We build api from c/profile folder -liboshmem_c_la_SOURCES = $(OSHMEM_AUX_SOURCES) -if ! OSHMEM_PROFILING -liboshmem_c_la_SOURCES += $(OSHMEM_API_SOURCES) +liboshmem_c_la_SOURCES = $(OSHMEM_AUX_SOURCES) \ + $(OSHMEM_API_SOURCES) +if OSHMEM_PROFILING +liboshmem_c_la_CPPFLAGS = -DOSHMEM_PROFILING=1 +else +liboshmem_c_la_CPPFLAGS = -DOSHMEM_PROFILING=0 endif if PROJECT_OSHMEM diff --git a/oshmem/shmem/c/profile-defines.h b/oshmem/shmem/c/profile-defines.h new file mode 100644 index 00000000000..c71628c3390 --- /dev/null +++ b/oshmem/shmem/c/profile-defines.h @@ -0,0 +1,2084 @@ +/* + * Copyright (c) 2013-2017 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OSHMEM_C_PROFILE_DEFINES_H +#define OSHMEM_C_PROFILE_DEFINES_H +/* + * This file is included in the top directory only if + * profiling is required. Once profiling is required, + * this file will replace all shmem_* symbols with + * pshmem_* symbols + */ + +/* + * Initialization routines + */ +#define shmem_init pshmem_init +#define shmem_init_thread pshmem_init_thread +#define start_pes pstart_pes /* shmem-compat.h */ + +/* + * Finalization routines + */ +#define shmem_finalize pshmem_finalize +#define shmem_global_exit pshmem_global_exit + +/* + * Query routines + */ +#define shmem_n_pes pshmem_n_pes +#define shmem_query_thread pshmem_query_thread +#define shmem_my_pe pshmem_my_pe +#define _num_pes p_num_pes /* shmem-compat.h */ +#define _my_pe p_my_pe /* shmem-compat.h */ + +/* + * Accessability routines + */ +#define shmem_pe_accessible pshmem_pe_accessible +#define shmem_addr_accessible pshmem_addr_accessible + +/* + * Symmetric heap routines + */ +#define shmem_malloc pshmem_malloc +#define shmem_calloc pshmem_calloc +#define shmem_align pshmem_align +#define shmem_realloc pshmem_realloc +#define shmem_malloc_with_hints pshmem_malloc_with_hints +#define shmem_free pshmem_free +#define shmalloc pshmalloc /* shmem-compat.h */ +#define shmemalign pshmemalign /* shmem-compat.h */ +#define shrealloc pshrealloc /* shmem-compat.h */ +#define shfree pshfree /* shmem-compat.h */ + +/* + * Remote pointer operations + */ +#define shmem_ptr pshmem_ptr + +/* + * Communication context operations + */ +#define shmem_ctx_create pshmem_ctx_create +#define shmem_ctx_destroy pshmem_ctx_destroy + + +/* + * Team management routines + */ +#define shmem_team_sync pshmem_team_sync +#define shmem_team_my_pe pshmem_team_my_pe +#define shmem_team_n_pes pshmem_team_n_pes +#define shmem_team_get_config pshmem_team_get_config +#define shmem_team_translate_pe pshmem_team_translate_pe +#define shmem_team_split_strided pshmem_team_split_strided +#define shmem_team_split_2d pshmem_team_split_2d +#define shmem_team_destroy pshmem_team_destroy +#define shmem_ctx_get_team pshmem_ctx_get_team +#define shmem_team_create_ctx pshmem_team_create_ctx + +/* + * Teams-based Collectives + */ + +/* Teams alltoall */ +#define shmem_char_alltoall pshmem_char_alltoall +#define shmem_short_alltoall pshmem_short_alltoall +#define shmem_int_alltoall pshmem_int_alltoall +#define shmem_long_alltoall pshmem_long_alltoall +#define shmem_float_alltoall pshmem_float_alltoall +#define shmem_double_alltoall pshmem_double_alltoall +#define shmem_longlong_alltoall pshmem_longlong_alltoall +#define shmem_schar_alltoall pshmem_schar_alltoall +#define shmem_uchar_alltoall pshmem_uchar_alltoall +#define shmem_ushort_alltoall pshmem_ushort_alltoall +#define shmem_uint_alltoall pshmem_uint_alltoall +#define shmem_ulong_alltoall pshmem_ulong_alltoall +#define shmem_ulonglong_alltoall pshmem_ulonglong_alltoall +#define shmem_longdouble_alltoall pshmem_longdouble_alltoall +#define shmem_int8_alltoall pshmem_int8_alltoall +#define shmem_int16_alltoall pshmem_int16_alltoall +#define shmem_int32_alltoall pshmem_int32_alltoall +#define shmem_int64_alltoall pshmem_int64_alltoall +#define shmem_uint8_alltoall pshmem_uint8_alltoall +#define shmem_uint16_alltoall pshmem_uint16_alltoall +#define shmem_uint32_alltoall pshmem_uint32_alltoall +#define shmem_uint64_alltoall pshmem_uint64_alltoall +#define shmem_size_alltoall pshmem_size_alltoall +#define shmem_ptrdiff_alltoall pshmem_ptrdiff_alltoall + +#define shmem_alltoallmem pshmem_alltoallmem + + +/* Teams alltoalls */ +#define shmem_char_alltoalls pshmem_char_alltoalls +#define shmem_short_alltoalls pshmem_short_alltoalls +#define shmem_int_alltoalls pshmem_int_alltoalls +#define shmem_long_alltoalls pshmem_long_alltoalls +#define shmem_float_alltoalls pshmem_float_alltoalls +#define shmem_double_alltoalls pshmem_double_alltoalls +#define shmem_longlong_alltoalls pshmem_longlong_alltoalls +#define shmem_schar_alltoalls pshmem_schar_alltoalls +#define shmem_uchar_alltoalls pshmem_uchar_alltoalls +#define shmem_ushort_alltoalls pshmem_ushort_alltoalls +#define shmem_uint_alltoalls pshmem_uint_alltoalls +#define shmem_ulong_alltoalls pshmem_ulong_alltoalls +#define shmem_ulonglong_alltoalls pshmem_ulonglong_alltoalls +#define shmem_longdouble_alltoalls pshmem_longdouble_alltoalls +#define shmem_int8_alltoalls pshmem_int8_alltoalls +#define shmem_int16_alltoalls pshmem_int16_alltoalls +#define shmem_int32_alltoalls pshmem_int32_alltoalls +#define shmem_int64_alltoalls pshmem_int64_alltoalls +#define shmem_uint8_alltoalls pshmem_uint8_alltoalls +#define shmem_uint16_alltoalls pshmem_uint16_alltoalls +#define shmem_uint32_alltoalls pshmem_uint32_alltoalls +#define shmem_uint64_alltoalls pshmem_uint64_alltoalls +#define shmem_size_alltoalls pshmem_size_alltoalls +#define shmem_ptrdiff_alltoalls pshmem_ptrdiff_alltoalls + +#define shmem_alltoallsmem pshmem_alltoallsmem + + +/* Teams broadcast */ +#define shmem_char_broadcast pshmem_char_broadcast +#define shmem_short_broadcast pshmem_short_broadcast +#define shmem_int_broadcast pshmem_int_broadcast +#define shmem_long_broadcast pshmem_long_broadcast +#define shmem_float_broadcast pshmem_float_broadcast +#define shmem_double_broadcast pshmem_double_broadcast +#define shmem_longlong_broadcast pshmem_longlong_broadcast +#define shmem_schar_broadcast pshmem_schar_broadcast +#define shmem_uchar_broadcast pshmem_uchar_broadcast +#define shmem_ushort_broadcast pshmem_ushort_broadcast +#define shmem_uint_broadcast pshmem_uint_broadcast +#define shmem_ulong_broadcast pshmem_ulong_broadcast +#define shmem_ulonglong_broadcast pshmem_ulonglong_broadcast +#define shmem_longdouble_broadcast pshmem_longdouble_broadcast +#define shmem_int8_broadcast pshmem_int8_broadcast +#define shmem_int16_broadcast pshmem_int16_broadcast +#define shmem_int32_broadcast pshmem_int32_broadcast +#define shmem_int64_broadcast pshmem_int64_broadcast +#define shmem_uint8_broadcast pshmem_uint8_broadcast +#define shmem_uint16_broadcast pshmem_uint16_broadcast +#define shmem_uint32_broadcast pshmem_uint32_broadcast +#define shmem_uint64_broadcast pshmem_uint64_broadcast +#define shmem_size_broadcast pshmem_size_broadcast +#define shmem_ptrdiff_broadcast pshmem_ptrdiff_broadcast + +#define shmem_broadcastmem pshmem_broadcastmem + + +/* Teams collect */ +#define shmem_char_collect pshmem_char_collect +#define shmem_short_collect pshmem_short_collect +#define shmem_int_collect pshmem_int_collect +#define shmem_long_collect pshmem_long_collect +#define shmem_float_collect pshmem_float_collect +#define shmem_double_collect pshmem_double_collect +#define shmem_longlong_collect pshmem_longlong_collect +#define shmem_schar_collect pshmem_schar_collect +#define shmem_uchar_collect pshmem_uchar_collect +#define shmem_ushort_collect pshmem_ushort_collect +#define shmem_uint_collect pshmem_uint_collect +#define shmem_ulong_collect pshmem_ulong_collect +#define shmem_ulonglong_collect pshmem_ulonglong_collect +#define shmem_longdouble_collect pshmem_longdouble_collect +#define shmem_int8_collect pshmem_int8_collect +#define shmem_int16_collect pshmem_int16_collect +#define shmem_int32_collect pshmem_int32_collect +#define shmem_int64_collect pshmem_int64_collect +#define shmem_uint8_collect pshmem_uint8_collect +#define shmem_uint16_collect pshmem_uint16_collect +#define shmem_uint32_collect pshmem_uint32_collect +#define shmem_uint64_collect pshmem_uint64_collect +#define shmem_size_collect pshmem_size_collect +#define shmem_ptrdiff_collect pshmem_ptrdiff_collect + +#define shmem_collectmem pshmem_collectmem + + +/* Teams fcollect */ +#define shmem_char_fcollect pshmem_char_fcollect +#define shmem_short_fcollect pshmem_short_fcollect +#define shmem_int_fcollect pshmem_int_fcollect +#define shmem_long_fcollect pshmem_long_fcollect +#define shmem_float_fcollect pshmem_float_fcollect +#define shmem_double_fcollect pshmem_double_fcollect +#define shmem_longlong_fcollect pshmem_longlong_fcollect +#define shmem_schar_fcollect pshmem_schar_fcollect +#define shmem_uchar_fcollect pshmem_uchar_fcollect +#define shmem_ushort_fcollect pshmem_ushort_fcollect +#define shmem_uint_fcollect pshmem_uint_fcollect +#define shmem_ulong_fcollect pshmem_ulong_fcollect +#define shmem_ulonglong_fcollect pshmem_ulonglong_fcollect +#define shmem_longdouble_fcollect pshmem_longdouble_fcollect +#define shmem_int8_fcollect pshmem_int8_fcollect +#define shmem_int16_fcollect pshmem_int16_fcollect +#define shmem_int32_fcollect pshmem_int32_fcollect +#define shmem_int64_fcollect pshmem_int64_fcollect +#define shmem_uint8_fcollect pshmem_uint8_fcollect +#define shmem_uint16_fcollect pshmem_uint16_fcollect +#define shmem_uint32_fcollect pshmem_uint32_fcollect +#define shmem_uint64_fcollect pshmem_uint64_fcollect +#define shmem_size_fcollect pshmem_size_fcollect +#define shmem_ptrdiff_fcollect pshmem_ptrdiff_fcollect + +#define shmem_fcollectmem pshmem_fcollectmem + + +/* Teams reduction: AND */ +#define shmem_uchar_and_reduce pshmem_uchar_and_reduce +#define shmem_ushort_and_reduce pshmem_ushort_and_reduce +#define shmem_uint_and_reduce pshmem_uint_and_reduce +#define shmem_ulong_and_reduce pshmem_ulong_and_reduce +#define shmem_ulonglong_and_reduce pshmem_ulonglong_and_reduce +#define shmem_int_and_reduce pshmem_int_and_reduce +#define shmem_longlong_and_reduce pshmem_longlong_and_reduce +#define shmem_int8_and_reduce pshmem_int8_and_reduce +#define shmem_int16_and_reduce pshmem_int16_and_reduce +#define shmem_int32_and_reduce pshmem_int32_and_reduce +#define shmem_int64_and_reduce pshmem_int64_and_reduce +#define shmem_uint8_and_reduce pshmem_uint8_and_reduce +#define shmem_uint16_and_reduce pshmem_uint16_and_reduce +#define shmem_uint32_and_reduce pshmem_uint32_and_reduce +#define shmem_uint64_and_reduce pshmem_uint64_and_reduce +#define shmem_size_and_reduce pshmem_size_and_reduce + + +/* Teams reduction: OR */ +#define shmem_uchar_or_reduce pshmem_uchar_or_reduce +#define shmem_ushort_or_reduce pshmem_ushort_or_reduce +#define shmem_uint_or_reduce pshmem_uint_or_reduce +#define shmem_ulong_or_reduce pshmem_ulong_or_reduce +#define shmem_ulonglong_or_reduce pshmem_ulonglong_or_reduce +#define shmem_int8_or_reduce pshmem_int8_or_reduce +#define shmem_int16_or_reduce pshmem_int16_or_reduce +#define shmem_int32_or_reduce pshmem_int32_or_reduce +#define shmem_int64_or_reduce pshmem_int64_or_reduce +#define shmem_uint8_or_reduce pshmem_uint8_or_reduce +#define shmem_uint16_or_reduce pshmem_uint16_or_reduce +#define shmem_uint32_or_reduce pshmem_uint32_or_reduce +#define shmem_uint64_or_reduce pshmem_uint64_or_reduce +#define shmem_size_or_reduce pshmem_size_or_reduce + + +/* Teams reduction: XOR */ +#define shmem_uchar_xor_reduce pshmem_uchar_xor_reduce +#define shmem_ushort_xor_reduce pshmem_ushort_xor_reduce +#define shmem_uint_xor_reduce pshmem_uint_xor_reduce +#define shmem_ulong_xor_reduce pshmem_ulong_xor_reduce +#define shmem_ulonglong_xor_reduce pshmem_ulonglong_xor_reduce +#define shmem_int8_xor_reduce pshmem_int8_xor_reduce +#define shmem_int16_xor_reduce pshmem_int16_xor_reduce +#define shmem_int32_xor_reduce pshmem_int32_xor_reduce +#define shmem_int64_xor_reduce pshmem_int64_xor_reduce +#define shmem_uint8_xor_reduce pshmem_uint8_xor_reduce +#define shmem_uint16_xor_reduce pshmem_uint16_xor_reduce +#define shmem_uint32_xor_reduce pshmem_uint32_xor_reduce +#define shmem_uint64_xor_reduce pshmem_uint64_xor_reduce +#define shmem_size_xor_reduce pshmem_size_xor_reduce + + +/* Teams reduction: MAX */ +#define shmem_char_max_reduce pshmem_char_max_reduce +#define shmem_short_max_reduce pshmem_short_max_reduce +#define shmem_int_max_reduce pshmem_int_max_reduce +#define shmem_long_max_reduce pshmem_long_max_reduce +#define shmem_float_max_reduce pshmem_float_max_reduce +#define shmem_double_max_reduce pshmem_double_max_reduce +#define shmem_longlong_max_reduce pshmem_longlong_max_reduce +#define shmem_schar_max_reduce pshmem_schar_max_reduce +#define shmem_longdouble_max_reduce pshmem_longdouble_max_reduce +#define shmem_ptrdiff_max_reduce pshmem_ptrdiff_max_reduce +#define shmem_uchar_max_reduce pshmem_uchar_max_reduce +#define shmem_ushort_max_reduce pshmem_ushort_max_reduce +#define shmem_uint_max_reduce pshmem_uint_max_reduce +#define shmem_ulong_max_reduce pshmem_ulong_max_reduce +#define shmem_ulonglong_max_reduce pshmem_ulonglong_max_reduce +#define shmem_int8_max_reduce pshmem_int8_max_reduce +#define shmem_int16_max_reduce pshmem_int16_max_reduce +#define shmem_int32_max_reduce pshmem_int32_max_reduce +#define shmem_int64_max_reduce pshmem_int64_max_reduce +#define shmem_uint8_max_reduce pshmem_uint8_max_reduce +#define shmem_uint16_max_reduce pshmem_uint16_max_reduce +#define shmem_uint32_max_reduce pshmem_uint32_max_reduce +#define shmem_uint64_max_reduce pshmem_uint64_max_reduce +#define shmem_size_max_reduce pshmem_size_max_reduce + + +/* Teams reduction: MIN */ +#define shmem_char_min_reduce pshmem_char_min_reduce +#define shmem_short_min_reduce pshmem_short_min_reduce +#define shmem_int_min_reduce pshmem_int_min_reduce +#define shmem_long_min_reduce pshmem_long_min_reduce +#define shmem_float_min_reduce pshmem_float_min_reduce +#define shmem_double_min_reduce pshmem_double_min_reduce +#define shmem_longlong_min_reduce pshmem_longlong_min_reduce +#define shmem_schar_min_reduce pshmem_schar_min_reduce +#define shmem_longdouble_min_reduce pshmem_longdouble_min_reduce +#define shmem_ptrdiff_min_reduce pshmem_ptrdiff_min_reduce +#define shmem_uchar_min_reduce pshmem_uchar_min_reduce +#define shmem_ushort_min_reduce pshmem_ushort_min_reduce +#define shmem_uint_min_reduce pshmem_uint_min_reduce +#define shmem_ulong_min_reduce pshmem_ulong_min_reduce +#define shmem_ulonglong_min_reduce pshmem_ulonglong_min_reduce +#define shmem_int8_min_reduce pshmem_int8_min_reduce +#define shmem_int16_min_reduce pshmem_int16_min_reduce +#define shmem_int32_min_reduce pshmem_int32_min_reduce +#define shmem_int64_min_reduce pshmem_int64_min_reduce +#define shmem_uint8_min_reduce pshmem_uint8_min_reduce +#define shmem_uint16_min_reduce pshmem_uint16_min_reduce +#define shmem_uint32_min_reduce pshmem_uint32_min_reduce +#define shmem_uint64_min_reduce pshmem_uint64_min_reduce +#define shmem_size_min_reduce pshmem_size_min_reduce + + +/* Teams reduction: SUM */ +#define shmem_char_sum_reduce pshmem_char_sum_reduce +#define shmem_short_sum_reduce pshmem_short_sum_reduce +#define shmem_int_sum_reduce pshmem_int_sum_reduce +#define shmem_long_sum_reduce pshmem_long_sum_reduce +#define shmem_float_sum_reduce pshmem_float_sum_reduce +#define shmem_double_sum_reduce pshmem_double_sum_reduce +#define shmem_longlong_sum_reduce pshmem_longlong_sum_reduce +#define shmem_schar_sum_reduce pshmem_schar_sum_reduce +#define shmem_longdouble_sum_reduce pshmem_longdouble_sum_reduce +#define shmem_ptrdiff_sum_reduce pshmem_ptrdiff_sum_reduce +#define shmem_uchar_sum_reduce pshmem_uchar_sum_reduce +#define shmem_ushort_sum_reduce pshmem_ushort_sum_reduce +#define shmem_uint_sum_reduce pshmem_uint_sum_reduce +#define shmem_ulong_sum_reduce pshmem_ulong_sum_reduce +#define shmem_ulonglong_sum_reduce pshmem_ulonglong_sum_reduce +#define shmem_int8_sum_reduce pshmem_int8_sum_reduce +#define shmem_int16_sum_reduce pshmem_int16_sum_reduce +#define shmem_int32_sum_reduce pshmem_int32_sum_reduce +#define shmem_int64_sum_reduce pshmem_int64_sum_reduce +#define shmem_uint8_sum_reduce pshmem_uint8_sum_reduce +#define shmem_uint16_sum_reduce pshmem_uint16_sum_reduce +#define shmem_uint32_sum_reduce pshmem_uint32_sum_reduce +#define shmem_uint64_sum_reduce pshmem_uint64_sum_reduce +#define shmem_size_sum_reduce pshmem_size_sum_reduce +#define shmem_complexd_sum_reduce pshmem_complexd_sum_reduce +#define shmem_complexf_sum_reduce pshmem_complexf_sum_reduce + + +/* Teams reduction: PROD */ +#define shmem_char_prod_reduce pshmem_char_prod_reduce +#define shmem_short_prod_reduce pshmem_short_prod_reduce +#define shmem_int_prod_reduce pshmem_int_prod_reduce +#define shmem_long_prod_reduce pshmem_long_prod_reduce +#define shmem_float_prod_reduce pshmem_float_prod_reduce +#define shmem_double_prod_reduce pshmem_double_prod_reduce +#define shmem_longlong_prod_reduce pshmem_longlong_prod_reduce +#define shmem_schar_prod_reduce pshmem_schar_prod_reduce +#define shmem_longdouble_prod_reduce pshmem_longdouble_prod_reduce +#define shmem_ptrdiff_prod_reduce pshmem_ptrdiff_prod_reduce +#define shmem_uchar_prod_reduce pshmem_uchar_prod_reduce +#define shmem_ushort_prod_reduce pshmem_ushort_prod_reduce +#define shmem_uint_prod_reduce pshmem_uint_prod_reduce +#define shmem_ulong_prod_reduce pshmem_ulong_prod_reduce +#define shmem_ulonglong_prod_reduce pshmem_ulonglong_prod_reduce +#define shmem_int8_prod_reduce pshmem_int8_prod_reduce +#define shmem_int16_prod_reduce pshmem_int16_prod_reduce +#define shmem_int32_prod_reduce pshmem_int32_prod_reduce +#define shmem_int64_prod_reduce pshmem_int64_prod_reduce +#define shmem_uint8_prod_reduce pshmem_uint8_prod_reduce +#define shmem_uint16_prod_reduce pshmem_uint16_prod_reduce +#define shmem_uint32_prod_reduce pshmem_uint32_prod_reduce +#define shmem_uint64_prod_reduce pshmem_uint64_prod_reduce +#define shmem_size_prod_reduce pshmem_size_prod_reduce +#define shmem_complexd_prod_reduce pshmem_complexd_prod_reduce +#define shmem_complexf_prod_reduce pshmem_complexf_prod_reduce + +/* + * Elemental put routines + */ +#define shmem_ctx_char_p pshmem_ctx_char_p +#define shmem_ctx_short_p pshmem_ctx_short_p +#define shmem_ctx_int_p pshmem_ctx_int_p +#define shmem_ctx_long_p pshmem_ctx_long_p +#define shmem_ctx_float_p pshmem_ctx_float_p +#define shmem_ctx_double_p pshmem_ctx_double_p +#define shmem_ctx_longlong_p pshmem_ctx_longlong_p +#define shmem_ctx_schar_p pshmem_ctx_schar_p +#define shmem_ctx_uchar_p pshmem_ctx_uchar_p +#define shmem_ctx_ushort_p pshmem_ctx_ushort_p +#define shmem_ctx_uint_p pshmem_ctx_uint_p +#define shmem_ctx_ulong_p pshmem_ctx_ulong_p +#define shmem_ctx_ulonglong_p pshmem_ctx_ulonglong_p +#define shmem_ctx_longdouble_p pshmem_ctx_longdouble_p +#define shmem_ctx_int8_p pshmem_ctx_int8_p +#define shmem_ctx_int16_p pshmem_ctx_int16_p +#define shmem_ctx_int32_p pshmem_ctx_int32_p +#define shmem_ctx_int64_p pshmem_ctx_int64_p +#define shmem_ctx_uint8_p pshmem_ctx_uint8_p +#define shmem_ctx_uint16_p pshmem_ctx_uint16_p +#define shmem_ctx_uint32_p pshmem_ctx_uint32_p +#define shmem_ctx_uint64_p pshmem_ctx_uint64_p +#define shmem_ctx_size_p pshmem_ctx_size_p +#define shmem_ctx_ptrdiff_p pshmem_ctx_ptrdiff_p + +#define shmem_char_p pshmem_char_p +#define shmem_short_p pshmem_short_p +#define shmem_int_p pshmem_int_p +#define shmem_long_p pshmem_long_p +#define shmem_float_p pshmem_float_p +#define shmem_double_p pshmem_double_p +#define shmem_longlong_p pshmem_longlong_p +#define shmem_schar_p pshmem_schar_p +#define shmem_uchar_p pshmem_uchar_p +#define shmem_ushort_p pshmem_ushort_p +#define shmem_uint_p pshmem_uint_p +#define shmem_ulong_p pshmem_ulong_p +#define shmem_ulonglong_p pshmem_ulonglong_p +#define shmem_longdouble_p pshmem_longdouble_p +#define shmem_int8_p pshmem_int8_p +#define shmem_int16_p pshmem_int16_p +#define shmem_int32_p pshmem_int32_p +#define shmem_int64_p pshmem_int64_p +#define shmem_uint8_p pshmem_uint8_p +#define shmem_uint16_p pshmem_uint16_p +#define shmem_uint32_p pshmem_uint32_p +#define shmem_uint64_p pshmem_uint64_p +#define shmem_size_p pshmem_size_p +#define shmem_ptrdiff_p pshmem_ptrdiff_p + +#define shmemx_int16_p pshmemx_int16_p +#define shmemx_int32_p pshmemx_int32_p +#define shmemx_int64_p pshmemx_int64_p + +/* + * Signaled put routines + */ +#define shmem_ctx_char_put_signal pshmem_ctx_char_put_signal +#define shmem_ctx_short_put_signal pshmem_ctx_short_put_signal +#define shmem_ctx_int_put_signal pshmem_ctx_int_put_signal +#define shmem_ctx_long_put_signal pshmem_ctx_long_put_signal +#define shmem_ctx_float_put_signal pshmem_ctx_float_put_signal +#define shmem_ctx_double_put_signal pshmem_ctx_double_put_signal +#define shmem_ctx_longlong_put_signal pshmem_ctx_longlong_put_signal +#define shmem_ctx_schar_put_signal pshmem_ctx_schar_put_signal +#define shmem_ctx_uchar_put_signal pshmem_ctx_uchar_put_signal +#define shmem_ctx_ushort_put_signal pshmem_ctx_ushort_put_signal +#define shmem_ctx_uint_put_signal pshmem_ctx_uint_put_signal +#define shmem_ctx_ulong_put_signal pshmem_ctx_ulong_put_signal +#define shmem_ctx_ulonglong_put_signal pshmem_ctx_ulonglong_put_signal +#define shmem_ctx_longdouble_put_signal pshmem_ctx_longdouble_put_signal +#define shmem_ctx_int8_put_signal pshmem_ctx_int8_put_signal +#define shmem_ctx_int16_put_signal pshmem_ctx_int16_put_signal +#define shmem_ctx_int32_put_signal pshmem_ctx_int32_put_signal +#define shmem_ctx_int64_put_signal pshmem_ctx_int64_put_signal +#define shmem_ctx_uint8_put_signal pshmem_ctx_uint8_put_signal +#define shmem_ctx_uint16_put_signal pshmem_ctx_uint16_put_signal +#define shmem_ctx_uint32_put_signal pshmem_ctx_uint32_put_signal +#define shmem_ctx_uint64_put_signal pshmem_ctx_uint64_put_signal +#define shmem_ctx_size_put_signal pshmem_ctx_size_put_signal +#define shmem_ctx_ptrdiff_put_signal pshmem_ctx_ptrdiff_put_signal + +#define shmem_char_put_signal pshmem_char_put_signal +#define shmem_short_put_signal pshmem_short_put_signal +#define shmem_int_put_signal pshmem_int_put_signal +#define shmem_long_put_signal pshmem_long_put_signal +#define shmem_float_put_signal pshmem_float_put_signal +#define shmem_double_put_signal pshmem_double_put_signal +#define shmem_longlong_put_signal pshmem_longlong_put_signal +#define shmem_schar_put_signal pshmem_schar_put_signal +#define shmem_uchar_put_signal pshmem_uchar_put_signal +#define shmem_ushort_put_signal pshmem_ushort_put_signal +#define shmem_uint_put_signal pshmem_uint_put_signal +#define shmem_ulong_put_signal pshmem_ulong_put_signal +#define shmem_ulonglong_put_signal pshmem_ulonglong_put_signal +#define shmem_longdouble_put_signal pshmem_longdouble_put_signal +#define shmem_int8_put_signal pshmem_int8_put_signal +#define shmem_int16_put_signal pshmem_int16_put_signal +#define shmem_int32_put_signal pshmem_int32_put_signal +#define shmem_int64_put_signal pshmem_int64_put_signal +#define shmem_uint8_put_signal pshmem_uint8_put_signal +#define shmem_uint16_put_signal pshmem_uint16_put_signal +#define shmem_uint32_put_signal pshmem_uint32_put_signal +#define shmem_uint64_put_signal pshmem_uint64_put_signal +#define shmem_size_put_signal pshmem_size_put_signal +#define shmem_ptrdiff_put_signal pshmem_ptrdiff_put_signal + +#define shmem_put8_signal pshmem_put8_signal +#define shmem_put16_signal pshmem_put16_signal +#define shmem_put32_signal pshmem_put32_signal +#define shmem_put64_signal pshmem_put64_signal +#define shmem_put128_signal pshmem_put128_signal + +#define shmem_ctx_put8_signal pshmem_ctx_put8_signal +#define shmem_ctx_put16_signal pshmem_ctx_put16_signal +#define shmem_ctx_put32_signal pshmem_ctx_put32_signal +#define shmem_ctx_put64_signal pshmem_ctx_put64_signal +#define shmem_ctx_put128_signal pshmem_ctx_put128_signal + +#define shmem_putmem_signal pshmem_putmem_signal +#define shmem_ctx_putmem_signal pshmem_ctx_putmem_signal + +/* + * Nonblocking signaled put routines + */ +#define shmem_ctx_char_put_signal_nbi pshmem_ctx_char_put_signal_nbi +#define shmem_ctx_short_put_signal_nbi pshmem_ctx_short_put_signal_nbi +#define shmem_ctx_int_put_signal_nbi pshmem_ctx_int_put_signal_nbi +#define shmem_ctx_long_put_signal_nbi pshmem_ctx_long_put_signal_nbi +#define shmem_ctx_float_put_signal_nbi pshmem_ctx_float_put_signal_nbi +#define shmem_ctx_double_put_signal_nbi pshmem_ctx_double_put_signal_nbi +#define shmem_ctx_longlong_put_signal_nbi pshmem_ctx_longlong_put_signal_nbi +#define shmem_ctx_schar_put_signal_nbi pshmem_ctx_schar_put_signal_nbi +#define shmem_ctx_uchar_put_signal_nbi pshmem_ctx_uchar_put_signal_nbi +#define shmem_ctx_ushort_put_signal_nbi pshmem_ctx_ushort_put_signal_nbi +#define shmem_ctx_uint_put_signal_nbi pshmem_ctx_uint_put_signal_nbi +#define shmem_ctx_ulong_put_signal_nbi pshmem_ctx_ulong_put_signal_nbi +#define shmem_ctx_ulonglong_put_signal_nbi pshmem_ctx_ulonglong_put_signal_nbi +#define shmem_ctx_longdouble_put_signal_nbi pshmem_ctx_longdouble_put_signal_nbi +#define shmem_ctx_int8_put_signal_nbi pshmem_ctx_int8_put_signal_nbi +#define shmem_ctx_int16_put_signal_nbi pshmem_ctx_int16_put_signal_nbi +#define shmem_ctx_int32_put_signal_nbi pshmem_ctx_int32_put_signal_nbi +#define shmem_ctx_int64_put_signal_nbi pshmem_ctx_int64_put_signal_nbi +#define shmem_ctx_uint8_put_signal_nbi pshmem_ctx_uint8_put_signal_nbi +#define shmem_ctx_uint16_put_signal_nbi pshmem_ctx_uint16_put_signal_nbi +#define shmem_ctx_uint32_put_signal_nbi pshmem_ctx_uint32_put_signal_nbi +#define shmem_ctx_uint64_put_signal_nbi pshmem_ctx_uint64_put_signal_nbi +#define shmem_ctx_size_put_signal_nbi pshmem_ctx_size_put_signal_nbi +#define shmem_ctx_ptrdiff_put_signal_nbi pshmem_ctx_ptrdiff_put_signal_nbi + +#define shmem_char_put_signal_nbi pshmem_char_put_signal_nbi +#define shmem_short_put_signal_nbi pshmem_short_put_signal_nbi +#define shmem_int_put_signal_nbi pshmem_int_put_signal_nbi +#define shmem_long_put_signal_nbi pshmem_long_put_signal_nbi +#define shmem_float_put_signal_nbi pshmem_float_put_signal_nbi +#define shmem_double_put_signal_nbi pshmem_double_put_signal_nbi +#define shmem_longlong_put_signal_nbi pshmem_longlong_put_signal_nbi +#define shmem_schar_put_signal_nbi pshmem_schar_put_signal_nbi +#define shmem_uchar_put_signal_nbi pshmem_uchar_put_signal_nbi +#define shmem_ushort_put_signal_nbi pshmem_ushort_put_signal_nbi +#define shmem_uint_put_signal_nbi pshmem_uint_put_signal_nbi +#define shmem_ulong_put_signal_nbi pshmem_ulong_put_signal_nbi +#define shmem_ulonglong_put_signal_nbi pshmem_ulonglong_put_signal_nbi +#define shmem_longdouble_put_signal_nbi pshmem_longdouble_put_signal_nbi +#define shmem_int8_put_signal_nbi pshmem_int8_put_signal_nbi +#define shmem_int16_put_signal_nbi pshmem_int16_put_signal_nbi +#define shmem_int32_put_signal_nbi pshmem_int32_put_signal_nbi +#define shmem_int64_put_signal_nbi pshmem_int64_put_signal_nbi +#define shmem_uint8_put_signal_nbi pshmem_uint8_put_signal_nbi +#define shmem_uint16_put_signal_nbi pshmem_uint16_put_signal_nbi +#define shmem_uint32_put_signal_nbi pshmem_uint32_put_signal_nbi +#define shmem_uint64_put_signal_nbi pshmem_uint64_put_signal_nbi +#define shmem_size_put_signal_nbi pshmem_size_put_signal_nbi +#define shmem_ptrdiff_put_signal_nbi pshmem_ptrdiff_put_signal_nbi + +#define shmem_put8_signal_nbi pshmem_put8_signal_nbi +#define shmem_put16_signal_nbi pshmem_put16_signal_nbi +#define shmem_put32_signal_nbi pshmem_put32_signal_nbi +#define shmem_put64_signal_nbi pshmem_put64_signal_nbi +#define shmem_put128_signal_nbi pshmem_put128_signal_nbi + +#define shmem_ctx_put8_signal_nbi pshmem_ctx_put8_signal_nbi +#define shmem_ctx_put16_signal_nbi pshmem_ctx_put16_signal_nbi +#define shmem_ctx_put32_signal_nbi pshmem_ctx_put32_signal_nbi +#define shmem_ctx_put64_signal_nbi pshmem_ctx_put64_signal_nbi +#define shmem_ctx_put128_signal_nbi pshmem_ctx_put128_signal_nbi + +#define shmem_putmem_signal_nbi pshmem_putmem_signal_nbi +#define shmem_ctx_putmem_signal_nbi pshmem_ctx_putmem_signal_nbi + +#define shmem_signal_fetch pshmem_signal_fetch + +/* + * Block data put routines + */ +#define shmem_ctx_char_put pshmem_ctx_char_put +#define shmem_ctx_short_put pshmem_ctx_short_put +#define shmem_ctx_int_put pshmem_ctx_int_put +#define shmem_ctx_long_put pshmem_ctx_long_put +#define shmem_ctx_float_put pshmem_ctx_float_put +#define shmem_ctx_double_put pshmem_ctx_double_put +#define shmem_ctx_longlong_put pshmem_ctx_longlong_put +#define shmem_ctx_schar_put pshmem_ctx_schar_put +#define shmem_ctx_uchar_put pshmem_ctx_uchar_put +#define shmem_ctx_ushort_put pshmem_ctx_ushort_put +#define shmem_ctx_uint_put pshmem_ctx_uint_put +#define shmem_ctx_ulong_put pshmem_ctx_ulong_put +#define shmem_ctx_ulonglong_put pshmem_ctx_ulonglong_put +#define shmem_ctx_longdouble_put pshmem_ctx_longdouble_put +#define shmem_ctx_int8_put pshmem_ctx_int8_put +#define shmem_ctx_int16_put pshmem_ctx_int16_put +#define shmem_ctx_int32_put pshmem_ctx_int32_put +#define shmem_ctx_int64_put pshmem_ctx_int64_put +#define shmem_ctx_uint8_put pshmem_ctx_uint8_put +#define shmem_ctx_uint16_put pshmem_ctx_uint16_put +#define shmem_ctx_uint32_put pshmem_ctx_uint32_put +#define shmem_ctx_uint64_put pshmem_ctx_uint64_put +#define shmem_ctx_size_put pshmem_ctx_size_put +#define shmem_ctx_ptrdiff_put pshmem_ctx_ptrdiff_put + +#define shmem_char_put pshmem_char_put /* shmem-compat.h */ +#define shmem_short_put pshmem_short_put +#define shmem_int_put pshmem_int_put +#define shmem_long_put pshmem_long_put +#define shmem_float_put pshmem_float_put +#define shmem_double_put pshmem_double_put +#define shmem_longlong_put pshmem_longlong_put +#define shmem_schar_put pshmem_schar_put +#define shmem_uchar_put pshmem_uchar_put +#define shmem_ushort_put pshmem_ushort_put +#define shmem_uint_put pshmem_uint_put +#define shmem_ulong_put pshmem_ulong_put +#define shmem_ulonglong_put pshmem_ulonglong_put +#define shmem_longdouble_put pshmem_longdouble_put +#define shmem_int8_put pshmem_int8_put +#define shmem_int16_put pshmem_int16_put +#define shmem_int32_put pshmem_int32_put +#define shmem_int64_put pshmem_int64_put +#define shmem_uint8_put pshmem_uint8_put +#define shmem_uint16_put pshmem_uint16_put +#define shmem_uint32_put pshmem_uint32_put +#define shmem_uint64_put pshmem_uint64_put +#define shmem_size_put pshmem_size_put +#define shmem_ptrdiff_put pshmem_ptrdiff_put + +#define shmem_ctx_put8 pshmem_ctx_put8 +#define shmem_ctx_put16 pshmem_ctx_put16 +#define shmem_ctx_put32 pshmem_ctx_put32 +#define shmem_ctx_put64 pshmem_ctx_put64 +#define shmem_ctx_put128 pshmem_ctx_put128 +#define shmem_ctx_putmem pshmem_ctx_putmem + +#define shmem_put8 pshmem_put8 +#define shmem_put16 pshmem_put16 +#define shmem_put32 pshmem_put32 +#define shmem_put64 pshmem_put64 +#define shmem_put128 pshmem_put128 +#define shmem_putmem pshmem_putmem + + +/* + * Strided put routines + */ +#define shmem_ctx_char_iput pshmem_ctx_char_iput +#define shmem_ctx_short_iput pshmem_ctx_short_iput +#define shmem_ctx_int_iput pshmem_ctx_int_iput +#define shmem_ctx_long_iput pshmem_ctx_long_iput +#define shmem_ctx_float_iput pshmem_ctx_float_iput +#define shmem_ctx_double_iput pshmem_ctx_double_iput +#define shmem_ctx_longlong_iput pshmem_ctx_longlong_iput +#define shmem_ctx_schar_iput pshmem_ctx_schar_iput +#define shmem_ctx_uchar_iput pshmem_ctx_uchar_iput +#define shmem_ctx_ushort_iput pshmem_ctx_ushort_iput +#define shmem_ctx_uint_iput pshmem_ctx_uint_iput +#define shmem_ctx_ulong_iput pshmem_ctx_ulong_iput +#define shmem_ctx_ulonglong_iput pshmem_ctx_ulonglong_iput +#define shmem_ctx_longdouble_iput pshmem_ctx_longdouble_iput +#define shmem_ctx_int8_iput pshmem_ctx_int8_iput +#define shmem_ctx_int16_iput pshmem_ctx_int16_iput +#define shmem_ctx_int32_iput pshmem_ctx_int32_iput +#define shmem_ctx_int64_iput pshmem_ctx_int64_iput +#define shmem_ctx_uint8_iput pshmem_ctx_uint8_iput +#define shmem_ctx_uint16_iput pshmem_ctx_uint16_iput +#define shmem_ctx_uint32_iput pshmem_ctx_uint32_iput +#define shmem_ctx_uint64_iput pshmem_ctx_uint64_iput +#define shmem_ctx_size_iput pshmem_ctx_size_iput +#define shmem_ctx_ptrdiff_iput pshmem_ctx_ptrdiff_iput + +#define shmem_char_iput pshmem_char_iput +#define shmem_short_iput pshmem_short_iput +#define shmem_int_iput pshmem_int_iput +#define shmem_long_iput pshmem_long_iput +#define shmem_float_iput pshmem_float_iput +#define shmem_double_iput pshmem_double_iput +#define shmem_longlong_iput pshmem_longlong_iput +#define shmem_schar_iput pshmem_schar_iput +#define shmem_uchar_iput pshmem_uchar_iput +#define shmem_ushort_iput pshmem_ushort_iput +#define shmem_uint_iput pshmem_uint_iput +#define shmem_ulong_iput pshmem_ulong_iput +#define shmem_ulonglong_iput pshmem_ulonglong_iput +#define shmem_longdouble_iput pshmem_longdouble_iput +#define shmem_int8_iput pshmem_int8_iput +#define shmem_int16_iput pshmem_int16_iput +#define shmem_int32_iput pshmem_int32_iput +#define shmem_int64_iput pshmem_int64_iput +#define shmem_uint8_iput pshmem_uint8_iput +#define shmem_uint16_iput pshmem_uint16_iput +#define shmem_uint32_iput pshmem_uint32_iput +#define shmem_uint64_iput pshmem_uint64_iput +#define shmem_size_iput pshmem_size_iput +#define shmem_ptrdiff_iput pshmem_ptrdiff_iput + +#define shmem_ctx_iput8 pshmem_ctx_iput8 +#define shmem_ctx_iput16 pshmem_ctx_iput16 +#define shmem_ctx_iput32 pshmem_ctx_iput32 +#define shmem_ctx_iput64 pshmem_ctx_iput64 +#define shmem_ctx_iput128 pshmem_ctx_iput128 + +#define shmem_iput8 pshmem_iput8 +#define shmem_iput16 pshmem_iput16 +#define shmem_iput32 pshmem_iput32 +#define shmem_iput64 pshmem_iput64 +#define shmem_iput128 pshmem_iput128 + +/* + * Non-block data put routines + */ +#define shmem_ctx_char_put_nbi pshmem_ctx_char_put_nbi +#define shmem_ctx_short_put_nbi pshmem_ctx_short_put_nbi +#define shmem_ctx_int_put_nbi pshmem_ctx_int_put_nbi +#define shmem_ctx_long_put_nbi pshmem_ctx_long_put_nbi +#define shmem_ctx_float_put_nbi pshmem_ctx_float_put_nbi +#define shmem_ctx_double_put_nbi pshmem_ctx_double_put_nbi +#define shmem_ctx_longlong_put_nbi pshmem_ctx_longlong_put_nbi +#define shmem_ctx_schar_put_nbi pshmem_ctx_schar_put_nbi +#define shmem_ctx_uchar_put_nbi pshmem_ctx_uchar_put_nbi +#define shmem_ctx_ushort_put_nbi pshmem_ctx_ushort_put_nbi +#define shmem_ctx_uint_put_nbi pshmem_ctx_uint_put_nbi +#define shmem_ctx_ulong_put_nbi pshmem_ctx_ulong_put_nbi +#define shmem_ctx_ulonglong_put_nbi pshmem_ctx_ulonglong_put_nbi +#define shmem_ctx_longdouble_put_nbi pshmem_ctx_longdouble_put_nbi +#define shmem_ctx_int8_put_nbi pshmem_ctx_int8_put_nbi +#define shmem_ctx_int16_put_nbi pshmem_ctx_int16_put_nbi +#define shmem_ctx_int32_put_nbi pshmem_ctx_int32_put_nbi +#define shmem_ctx_int64_put_nbi pshmem_ctx_int64_put_nbi +#define shmem_ctx_uint8_put_nbi pshmem_ctx_uint8_put_nbi +#define shmem_ctx_uint16_put_nbi pshmem_ctx_uint16_put_nbi +#define shmem_ctx_uint32_put_nbi pshmem_ctx_uint32_put_nbi +#define shmem_ctx_uint64_put_nbi pshmem_ctx_uint64_put_nbi +#define shmem_ctx_size_put_nbi pshmem_ctx_size_put_nbi +#define shmem_ctx_ptrdiff_put_nbi pshmem_ctx_ptrdiff_put_nbi + +#define shmem_char_put_nbi pshmem_char_put_nbi +#define shmem_short_put_nbi pshmem_short_put_nbi +#define shmem_int_put_nbi pshmem_int_put_nbi +#define shmem_long_put_nbi pshmem_long_put_nbi +#define shmem_float_put_nbi pshmem_float_put_nbi +#define shmem_double_put_nbi pshmem_double_put_nbi +#define shmem_longlong_put_nbi pshmem_longlong_put_nbi +#define shmem_schar_put_nbi pshmem_schar_put_nbi +#define shmem_uchar_put_nbi pshmem_uchar_put_nbi +#define shmem_ushort_put_nbi pshmem_ushort_put_nbi +#define shmem_uint_put_nbi pshmem_uint_put_nbi +#define shmem_ulong_put_nbi pshmem_ulong_put_nbi +#define shmem_ulonglong_put_nbi pshmem_ulonglong_put_nbi +#define shmem_longdouble_put_nbi pshmem_longdouble_put_nbi +#define shmem_int8_put_nbi pshmem_int8_put_nbi +#define shmem_int16_put_nbi pshmem_int16_put_nbi +#define shmem_int32_put_nbi pshmem_int32_put_nbi +#define shmem_int64_put_nbi pshmem_int64_put_nbi +#define shmem_uint8_put_nbi pshmem_uint8_put_nbi +#define shmem_uint16_put_nbi pshmem_uint16_put_nbi +#define shmem_uint32_put_nbi pshmem_uint32_put_nbi +#define shmem_uint64_put_nbi pshmem_uint64_put_nbi +#define shmem_size_put_nbi pshmem_size_put_nbi +#define shmem_ptrdiff_put_nbi pshmem_ptrdiff_put_nbi + +#define shmem_ctx_put8_nbi pshmem_ctx_put8_nbi +#define shmem_ctx_put16_nbi pshmem_ctx_put16_nbi +#define shmem_ctx_put32_nbi pshmem_ctx_put32_nbi +#define shmem_ctx_put64_nbi pshmem_ctx_put64_nbi +#define shmem_ctx_put128_nbi pshmem_ctx_put128_nbi +#define shmem_ctx_putmem_nbi pshmem_ctx_putmem_nbi + +#define shmem_put8_nbi pshmem_put8_nbi +#define shmem_put16_nbi pshmem_put16_nbi +#define shmem_put32_nbi pshmem_put32_nbi +#define shmem_put64_nbi pshmem_put64_nbi +#define shmem_put128_nbi pshmem_put128_nbi +#define shmem_putmem_nbi pshmem_putmem_nbi + +/* + * Elemental get routines + */ +#define shmem_ctx_char_g pshmem_ctx_char_g +#define shmem_ctx_short_g pshmem_ctx_short_g +#define shmem_ctx_int_g pshmem_ctx_int_g +#define shmem_ctx_long_g pshmem_ctx_long_g +#define shmem_ctx_float_g pshmem_ctx_float_g +#define shmem_ctx_double_g pshmem_ctx_double_g +#define shmem_ctx_longlong_g pshmem_ctx_longlong_g +#define shmem_ctx_schar_g pshmem_ctx_schar_g +#define shmem_ctx_uchar_g pshmem_ctx_uchar_g +#define shmem_ctx_ushort_g pshmem_ctx_ushort_g +#define shmem_ctx_uint_g pshmem_ctx_uint_g +#define shmem_ctx_ulong_g pshmem_ctx_ulong_g +#define shmem_ctx_ulonglong_g pshmem_ctx_ulonglong_g +#define shmem_ctx_longdouble_g pshmem_ctx_longdouble_g +#define shmem_ctx_int8_g pshmem_ctx_int8_g +#define shmem_ctx_int16_g pshmem_ctx_int16_g +#define shmem_ctx_int32_g pshmem_ctx_int32_g +#define shmem_ctx_int64_g pshmem_ctx_int64_g +#define shmem_ctx_uint8_g pshmem_ctx_uint8_g +#define shmem_ctx_uint16_g pshmem_ctx_uint16_g +#define shmem_ctx_uint32_g pshmem_ctx_uint32_g +#define shmem_ctx_uint64_g pshmem_ctx_uint64_g +#define shmem_ctx_size_g pshmem_ctx_size_g +#define shmem_ctx_ptrdiff_g pshmem_ctx_ptrdiff_g + +#define shmem_char_g pshmem_char_g +#define shmem_short_g pshmem_short_g +#define shmem_int_g pshmem_int_g +#define shmem_long_g pshmem_long_g +#define shmem_float_g pshmem_float_g +#define shmem_double_g pshmem_double_g +#define shmem_longlong_g pshmem_longlong_g +#define shmem_schar_g pshmem_schar_g +#define shmem_uchar_g pshmem_uchar_g +#define shmem_ushort_g pshmem_ushort_g +#define shmem_uint_g pshmem_uint_g +#define shmem_ulong_g pshmem_ulong_g +#define shmem_ulonglong_g pshmem_ulonglong_g +#define shmem_longdouble_g pshmem_longdouble_g +#define shmem_int8_g pshmem_int8_g +#define shmem_int16_g pshmem_int16_g +#define shmem_int32_g pshmem_int32_g +#define shmem_int64_g pshmem_int64_g +#define shmem_uint8_g pshmem_uint8_g +#define shmem_uint16_g pshmem_uint16_g +#define shmem_uint32_g pshmem_uint32_g +#define shmem_uint64_g pshmem_uint64_g +#define shmem_size_g pshmem_size_g +#define shmem_ptrdiff_g pshmem_ptrdiff_g + +#define shmemx_int16_g pshmemx_int16_g +#define shmemx_int32_g pshmemx_int32_g +#define shmemx_int64_g pshmemx_int64_g + +/* + * Block data get routines + */ +#define shmem_ctx_char_get pshmem_ctx_char_get +#define shmem_ctx_short_get pshmem_ctx_short_get +#define shmem_ctx_int_get pshmem_ctx_int_get +#define shmem_ctx_long_get pshmem_ctx_long_get +#define shmem_ctx_float_get pshmem_ctx_float_get +#define shmem_ctx_double_get pshmem_ctx_double_get +#define shmem_ctx_longlong_get pshmem_ctx_longlong_get +#define shmem_ctx_schar_get pshmem_ctx_schar_get +#define shmem_ctx_uchar_get pshmem_ctx_uchar_get +#define shmem_ctx_ushort_get pshmem_ctx_ushort_get +#define shmem_ctx_uint_get pshmem_ctx_uint_get +#define shmem_ctx_ulong_get pshmem_ctx_ulong_get +#define shmem_ctx_ulonglong_get pshmem_ctx_ulonglong_get +#define shmem_ctx_longdouble_get pshmem_ctx_longdouble_get +#define shmem_ctx_int8_get pshmem_ctx_int8_get +#define shmem_ctx_int16_get pshmem_ctx_int16_get +#define shmem_ctx_int32_get pshmem_ctx_int32_get +#define shmem_ctx_int64_get pshmem_ctx_int64_get +#define shmem_ctx_uint8_get pshmem_ctx_uint8_get +#define shmem_ctx_uint16_get pshmem_ctx_uint16_get +#define shmem_ctx_uint32_get pshmem_ctx_uint32_get +#define shmem_ctx_uint64_get pshmem_ctx_uint64_get +#define shmem_ctx_size_get pshmem_ctx_size_get +#define shmem_ctx_ptrdiff_get pshmem_ctx_ptrdiff_get + +#define shmem_char_get pshmem_char_get /* shmem-compat.h */ +#define shmem_short_get pshmem_short_get +#define shmem_int_get pshmem_int_get +#define shmem_long_get pshmem_long_get +#define shmem_float_get pshmem_float_get +#define shmem_double_get pshmem_double_get +#define shmem_longlong_get pshmem_longlong_get +#define shmem_schar_get pshmem_schar_get +#define shmem_uchar_get pshmem_uchar_get +#define shmem_ushort_get pshmem_ushort_get +#define shmem_uint_get pshmem_uint_get +#define shmem_ulong_get pshmem_ulong_get +#define shmem_ulonglong_get pshmem_ulonglong_get +#define shmem_longdouble_get pshmem_longdouble_get +#define shmem_int8_get pshmem_int8_get +#define shmem_int16_get pshmem_int16_get +#define shmem_int32_get pshmem_int32_get +#define shmem_int64_get pshmem_int64_get +#define shmem_uint8_get pshmem_uint8_get +#define shmem_uint16_get pshmem_uint16_get +#define shmem_uint32_get pshmem_uint32_get +#define shmem_uint64_get pshmem_uint64_get +#define shmem_size_get pshmem_size_get +#define shmem_ptrdiff_get pshmem_ptrdiff_get + +#define shmem_ctx_get8 pshmem_ctx_get8 +#define shmem_ctx_get16 pshmem_ctx_get16 +#define shmem_ctx_get32 pshmem_ctx_get32 +#define shmem_ctx_get64 pshmem_ctx_get64 +#define shmem_ctx_get128 pshmem_ctx_get128 +#define shmem_ctx_getmem pshmem_ctx_getmem + +#define shmem_get8 pshmem_get8 +#define shmem_get16 pshmem_get16 +#define shmem_get32 pshmem_get32 +#define shmem_get64 pshmem_get64 +#define shmem_get128 pshmem_get128 +#define shmem_getmem pshmem_getmem + +/* + * Strided get routines + */ +#define shmem_ctx_char_iget pshmem_ctx_char_iget +#define shmem_ctx_short_iget pshmem_ctx_short_iget +#define shmem_ctx_int_iget pshmem_ctx_int_iget +#define shmem_ctx_long_iget pshmem_ctx_long_iget +#define shmem_ctx_float_iget pshmem_ctx_float_iget +#define shmem_ctx_double_iget pshmem_ctx_double_iget +#define shmem_ctx_longlong_iget pshmem_ctx_longlong_iget +#define shmem_ctx_schar_iget pshmem_ctx_schar_iget +#define shmem_ctx_uchar_iget pshmem_ctx_uchar_iget +#define shmem_ctx_ushort_iget pshmem_ctx_ushort_iget +#define shmem_ctx_uint_iget pshmem_ctx_uint_iget +#define shmem_ctx_ulong_iget pshmem_ctx_ulong_iget +#define shmem_ctx_ulonglong_iget pshmem_ctx_ulonglong_iget +#define shmem_ctx_longdouble_iget pshmem_ctx_longdouble_iget +#define shmem_ctx_int8_iget pshmem_ctx_int8_iget +#define shmem_ctx_int16_iget pshmem_ctx_int16_iget +#define shmem_ctx_int32_iget pshmem_ctx_int32_iget +#define shmem_ctx_int64_iget pshmem_ctx_int64_iget +#define shmem_ctx_uint8_iget pshmem_ctx_uint8_iget +#define shmem_ctx_uint16_iget pshmem_ctx_uint16_iget +#define shmem_ctx_uint32_iget pshmem_ctx_uint32_iget +#define shmem_ctx_uint64_iget pshmem_ctx_uint64_iget +#define shmem_ctx_size_iget pshmem_ctx_size_iget +#define shmem_ctx_ptrdiff_iget pshmem_ctx_ptrdiff_iget + +#define shmem_char_iget pshmem_char_iget +#define shmem_short_iget pshmem_short_iget +#define shmem_int_iget pshmem_int_iget +#define shmem_long_iget pshmem_long_iget +#define shmem_float_iget pshmem_float_iget +#define shmem_double_iget pshmem_double_iget +#define shmem_longlong_iget pshmem_longlong_iget +#define shmem_schar_iget pshmem_schar_iget +#define shmem_uchar_iget pshmem_uchar_iget +#define shmem_ushort_iget pshmem_ushort_iget +#define shmem_uint_iget pshmem_uint_iget +#define shmem_ulong_iget pshmem_ulong_iget +#define shmem_ulonglong_iget pshmem_ulonglong_iget +#define shmem_longdouble_iget pshmem_longdouble_iget +#define shmem_int8_iget pshmem_int8_iget +#define shmem_int16_iget pshmem_int16_iget +#define shmem_int32_iget pshmem_int32_iget +#define shmem_int64_iget pshmem_int64_iget +#define shmem_uint8_iget pshmem_uint8_iget +#define shmem_uint16_iget pshmem_uint16_iget +#define shmem_uint32_iget pshmem_uint32_iget +#define shmem_uint64_iget pshmem_uint64_iget +#define shmem_size_iget pshmem_size_iget +#define shmem_ptrdiff_iget pshmem_ptrdiff_iget + +#define shmem_ctx_iget8 pshmem_ctx_iget8 +#define shmem_ctx_iget16 pshmem_ctx_iget16 +#define shmem_ctx_iget32 pshmem_ctx_iget32 +#define shmem_ctx_iget64 pshmem_ctx_iget64 +#define shmem_ctx_iget128 pshmem_ctx_iget128 + +#define shmem_iget8 pshmem_iget8 +#define shmem_iget16 pshmem_iget16 +#define shmem_iget32 pshmem_iget32 +#define shmem_iget64 pshmem_iget64 +#define shmem_iget128 pshmem_iget128 + +/* + * Non-block data get routines + */ +#define shmem_ctx_char_get_nbi pshmem_ctx_char_get_nbi +#define shmem_ctx_short_get_nbi pshmem_ctx_short_get_nbi +#define shmem_ctx_int_get_nbi pshmem_ctx_int_get_nbi +#define shmem_ctx_long_get_nbi pshmem_ctx_long_get_nbi +#define shmem_ctx_float_get_nbi pshmem_ctx_float_get_nbi +#define shmem_ctx_double_get_nbi pshmem_ctx_double_get_nbi +#define shmem_ctx_longlong_get_nbi pshmem_ctx_longlong_get_nbi +#define shmem_ctx_schar_get_nbi pshmem_ctx_schar_get_nbi +#define shmem_ctx_uchar_get_nbi pshmem_ctx_uchar_get_nbi +#define shmem_ctx_ushort_get_nbi pshmem_ctx_ushort_get_nbi +#define shmem_ctx_uint_get_nbi pshmem_ctx_uint_get_nbi +#define shmem_ctx_ulong_get_nbi pshmem_ctx_ulong_get_nbi +#define shmem_ctx_ulonglong_get_nbi pshmem_ctx_ulonglong_get_nbi +#define shmem_ctx_longdouble_get_nbi pshmem_ctx_longdouble_get_nbi +#define shmem_ctx_int8_get_nbi pshmem_ctx_int8_get_nbi +#define shmem_ctx_int16_get_nbi pshmem_ctx_int16_get_nbi +#define shmem_ctx_int32_get_nbi pshmem_ctx_int32_get_nbi +#define shmem_ctx_int64_get_nbi pshmem_ctx_int64_get_nbi +#define shmem_ctx_uint8_get_nbi pshmem_ctx_uint8_get_nbi +#define shmem_ctx_uint16_get_nbi pshmem_ctx_uint16_get_nbi +#define shmem_ctx_uint32_get_nbi pshmem_ctx_uint32_get_nbi +#define shmem_ctx_uint64_get_nbi pshmem_ctx_uint64_get_nbi +#define shmem_ctx_size_get_nbi pshmem_ctx_size_get_nbi +#define shmem_ctx_ptrdiff_get_nbi pshmem_ctx_ptrdiff_get_nbi + +#define shmem_char_get_nbi pshmem_char_get_nbi +#define shmem_short_get_nbi pshmem_short_get_nbi +#define shmem_int_get_nbi pshmem_int_get_nbi +#define shmem_long_get_nbi pshmem_long_get_nbi +#define shmem_float_get_nbi pshmem_float_get_nbi +#define shmem_double_get_nbi pshmem_double_get_nbi +#define shmem_longlong_get_nbi pshmem_longlong_get_nbi +#define shmem_schar_get_nbi pshmem_schar_get_nbi +#define shmem_uchar_get_nbi pshmem_uchar_get_nbi +#define shmem_ushort_get_nbi pshmem_ushort_get_nbi +#define shmem_uint_get_nbi pshmem_uint_get_nbi +#define shmem_ulong_get_nbi pshmem_ulong_get_nbi +#define shmem_ulonglong_get_nbi pshmem_ulonglong_get_nbi +#define shmem_longdouble_get_nbi pshmem_longdouble_get_nbi +#define shmem_int8_get_nbi pshmem_int8_get_nbi +#define shmem_int16_get_nbi pshmem_int16_get_nbi +#define shmem_int32_get_nbi pshmem_int32_get_nbi +#define shmem_int64_get_nbi pshmem_int64_get_nbi +#define shmem_uint8_get_nbi pshmem_uint8_get_nbi +#define shmem_uint16_get_nbi pshmem_uint16_get_nbi +#define shmem_uint32_get_nbi pshmem_uint32_get_nbi +#define shmem_uint64_get_nbi pshmem_uint64_get_nbi +#define shmem_size_get_nbi pshmem_size_get_nbi +#define shmem_ptrdiff_get_nbi pshmem_ptrdiff_get_nbi + +#define shmem_ctx_get8_nbi pshmem_ctx_get8_nbi +#define shmem_ctx_get16_nbi pshmem_ctx_get16_nbi +#define shmem_ctx_get32_nbi pshmem_ctx_get32_nbi +#define shmem_ctx_get64_nbi pshmem_ctx_get64_nbi +#define shmem_ctx_get128_nbi pshmem_ctx_get128_nbi +#define shmem_ctx_getmem_nbi pshmem_ctx_getmem_nbi + +#define shmem_get8_nbi pshmem_get8_nbi +#define shmem_get16_nbi pshmem_get16_nbi +#define shmem_get32_nbi pshmem_get32_nbi +#define shmem_get64_nbi pshmem_get64_nbi +#define shmem_get128_nbi pshmem_get128_nbi +#define shmem_getmem_nbi pshmem_getmem_nbi + +/* + * Atomic operations + */ +/* Atomic swap */ +#define shmem_ctx_double_atomic_swap pshmem_ctx_double_atomic_swap +#define shmem_ctx_float_atomic_swap pshmem_ctx_float_atomic_swap +#define shmem_ctx_int_atomic_swap pshmem_ctx_int_atomic_swap +#define shmem_ctx_long_atomic_swap pshmem_ctx_long_atomic_swap +#define shmem_ctx_longlong_atomic_swap pshmem_ctx_longlong_atomic_swap +#define shmem_ctx_uint_atomic_swap pshmem_ctx_uint_atomic_swap +#define shmem_ctx_ulong_atomic_swap pshmem_ctx_ulong_atomic_swap +#define shmem_ctx_ulonglong_atomic_swap pshmem_ctx_ulonglong_atomic_swap +#define shmem_ctx_int32_atomic_swap pshmem_ctx_int32_atomic_swap +#define shmem_ctx_int64_atomic_swap pshmem_ctx_int64_atomic_swap +#define shmem_ctx_uint32_atomic_swap pshmem_ctx_uint32_atomic_swap +#define shmem_ctx_uint64_atomic_swap pshmem_ctx_uint64_atomic_swap +#define shmem_ctx_size_atomic_swap pshmem_ctx_size_atomic_swap +#define shmem_ctx_ptrdiff_atomic_swap pshmem_ctx_ptrdiff_atomic_swap + +#define shmem_double_atomic_swap pshmem_double_atomic_swap +#define shmem_float_atomic_swap pshmem_float_atomic_swap +#define shmem_int_atomic_swap pshmem_int_atomic_swap +#define shmem_long_atomic_swap pshmem_long_atomic_swap +#define shmem_longlong_atomic_swap pshmem_longlong_atomic_swap +#define shmem_uint_atomic_swap pshmem_uint_atomic_swap +#define shmem_ulong_atomic_swap pshmem_ulong_atomic_swap +#define shmem_ulonglong_atomic_swap pshmem_ulonglong_atomic_swap +#define shmem_int32_atomic_swap pshmem_int32_atomic_swap +#define shmem_int64_atomic_swap pshmem_int64_atomic_swap +#define shmem_uint32_atomic_swap pshmem_uint32_atomic_swap +#define shmem_uint64_atomic_swap pshmem_uint64_atomic_swap +#define shmem_size_atomic_swap pshmem_size_atomic_swap +#define shmem_ptrdiff_atomic_swap pshmem_ptrdiff_atomic_swap + +#define shmem_double_swap pshmem_double_swap +#define shmem_float_swap pshmem_float_swap +#define shmem_int_swap pshmem_int_swap +#define shmem_long_swap pshmem_long_swap +#define shmem_longlong_swap pshmem_longlong_swap + +#define shmemx_int32_swap pshmemx_int32_swap +#define shmemx_int64_swap pshmemx_int64_swap + +/* Atomic set */ +#define shmem_ctx_double_atomic_set pshmem_ctx_double_atomic_set +#define shmem_ctx_float_atomic_set pshmem_ctx_float_atomic_set +#define shmem_ctx_int_atomic_set pshmem_ctx_int_atomic_set +#define shmem_ctx_long_atomic_set pshmem_ctx_long_atomic_set +#define shmem_ctx_longlong_atomic_set pshmem_ctx_longlong_atomic_set +#define shmem_ctx_uint_atomic_set pshmem_ctx_uint_atomic_set +#define shmem_ctx_ulong_atomic_set pshmem_ctx_ulong_atomic_set +#define shmem_ctx_ulonglong_atomic_set pshmem_ctx_ulonglong_atomic_set +#define shmem_ctx_int32_atomic_set pshmem_ctx_int32_atomic_set +#define shmem_ctx_int64_atomic_set pshmem_ctx_int64_atomic_set +#define shmem_ctx_uint32_atomic_set pshmem_ctx_uint32_atomic_set +#define shmem_ctx_uint64_atomic_set pshmem_ctx_uint64_atomic_set +#define shmem_ctx_size_atomic_set pshmem_ctx_size_atomic_set +#define shmem_ctx_ptrdiff_atomic_set pshmem_ctx_ptrdiff_atomic_set + +#define shmem_double_atomic_set pshmem_double_atomic_set +#define shmem_float_atomic_set pshmem_float_atomic_set +#define shmem_int_atomic_set pshmem_int_atomic_set +#define shmem_long_atomic_set pshmem_long_atomic_set +#define shmem_longlong_atomic_set pshmem_longlong_atomic_set +#define shmem_uint_atomic_set pshmem_uint_atomic_set +#define shmem_ulong_atomic_set pshmem_ulong_atomic_set +#define shmem_ulonglong_atomic_set pshmem_ulonglong_atomic_set +#define shmem_int32_atomic_set pshmem_int32_atomic_set +#define shmem_int64_atomic_set pshmem_int64_atomic_set +#define shmem_uint32_atomic_set pshmem_uint32_atomic_set +#define shmem_uint64_atomic_set pshmem_uint64_atomic_set +#define shmem_size_atomic_set pshmem_size_atomic_set +#define shmem_ptrdiff_atomic_set pshmem_ptrdiff_atomic_set + +#define shmem_double_set pshmem_double_set +#define shmem_float_set pshmem_float_set +#define shmem_int_set pshmem_int_set +#define shmem_long_set pshmem_long_set +#define shmem_longlong_set pshmem_longlong_set + +#define shmemx_int32_set pshmemx_int32_set +#define shmemx_int64_set pshmemx_int64_set + +/* Atomic conditional swap */ +#define shmem_ctx_int_atomic_compare_swap pshmem_ctx_int_atomic_compare_swap +#define shmem_ctx_long_atomic_compare_swap pshmem_ctx_long_atomic_compare_swap +#define shmem_ctx_longlong_atomic_compare_swap pshmem_ctx_longlong_atomic_compare_swap +#define shmem_ctx_uint_atomic_compare_swap pshmem_ctx_uint_atomic_compare_swap +#define shmem_ctx_ulong_atomic_compare_swap pshmem_ctx_ulong_atomic_compare_swap +#define shmem_ctx_ulonglong_atomic_compare_swap pshmem_ctx_ulonglong_atomic_compare_swap +#define shmem_ctx_int32_atomic_compare_swap pshmem_ctx_int32_atomic_compare_swap +#define shmem_ctx_int64_atomic_compare_swap pshmem_ctx_int64_atomic_compare_swap +#define shmem_ctx_uint32_atomic_compare_swap pshmem_ctx_uint32_atomic_compare_swap +#define shmem_ctx_uint64_atomic_compare_swap pshmem_ctx_uint64_atomic_compare_swap +#define shmem_ctx_size_atomic_compare_swap pshmem_ctx_size_atomic_compare_swap +#define shmem_ctx_ptrdiff_atomic_compare_swap pshmem_ctx_ptrdiff_atomic_compare_swap + +#define shmem_int_atomic_compare_swap pshmem_int_atomic_compare_swap +#define shmem_long_atomic_compare_swap pshmem_long_atomic_compare_swap +#define shmem_longlong_atomic_compare_swap pshmem_longlong_atomic_compare_swap +#define shmem_uint_atomic_compare_swap pshmem_uint_atomic_compare_swap +#define shmem_ulong_atomic_compare_swap pshmem_ulong_atomic_compare_swap +#define shmem_ulonglong_atomic_compare_swap pshmem_ulonglong_atomic_compare_swap +#define shmem_int32_atomic_compare_swap pshmem_int32_atomic_compare_swap +#define shmem_int64_atomic_compare_swap pshmem_int64_atomic_compare_swap +#define shmem_uint32_atomic_compare_swap pshmem_uint32_atomic_compare_swap +#define shmem_uint64_atomic_compare_swap pshmem_uint64_atomic_compare_swap +#define shmem_size_atomic_compare_swap pshmem_size_atomic_compare_swap +#define shmem_ptrdiff_atomic_compare_swap pshmem_ptrdiff_atomic_compare_swap + +#define shmem_int_cswap pshmem_int_cswap +#define shmem_long_cswap pshmem_long_cswap +#define shmem_longlong_cswap pshmem_longlong_cswap + +#define shmemx_int32_cswap pshmemx_int32_cswap +#define shmemx_int64_cswap pshmemx_int64_cswap + +/* Atomic Fetch&Add */ +#define shmem_ctx_int_atomic_fetch_add pshmem_ctx_int_atomic_fetch_add +#define shmem_ctx_long_atomic_fetch_add pshmem_ctx_long_atomic_fetch_add +#define shmem_ctx_longlong_atomic_fetch_add pshmem_ctx_longlong_atomic_fetch_add +#define shmem_ctx_uint_atomic_fetch_add pshmem_ctx_uint_atomic_fetch_add +#define shmem_ctx_ulong_atomic_fetch_add pshmem_ctx_ulong_atomic_fetch_add +#define shmem_ctx_ulonglong_atomic_fetch_add pshmem_ctx_ulonglong_atomic_fetch_add +#define shmem_ctx_int32_atomic_fetch_add pshmem_ctx_int32_atomic_fetch_add +#define shmem_ctx_int64_atomic_fetch_add pshmem_ctx_int64_atomic_fetch_add +#define shmem_ctx_uint32_atomic_fetch_add pshmem_ctx_uint32_atomic_fetch_add +#define shmem_ctx_uint64_atomic_fetch_add pshmem_ctx_uint64_atomic_fetch_add +#define shmem_ctx_size_atomic_fetch_add pshmem_ctx_size_atomic_fetch_add +#define shmem_ctx_ptrdiff_atomic_fetch_add pshmem_ctx_ptrdiff_atomic_fetch_add + +#define shmem_int_atomic_fetch_add pshmem_int_atomic_fetch_add +#define shmem_long_atomic_fetch_add pshmem_long_atomic_fetch_add +#define shmem_longlong_atomic_fetch_add pshmem_longlong_atomic_fetch_add +#define shmem_uint_atomic_fetch_add pshmem_uint_atomic_fetch_add +#define shmem_ulong_atomic_fetch_add pshmem_ulong_atomic_fetch_add +#define shmem_ulonglong_atomic_fetch_add pshmem_ulonglong_atomic_fetch_add +#define shmem_int32_atomic_fetch_add pshmem_int32_atomic_fetch_add +#define shmem_int64_atomic_fetch_add pshmem_int64_atomic_fetch_add +#define shmem_uint32_atomic_fetch_add pshmem_uint32_atomic_fetch_add +#define shmem_uint64_atomic_fetch_add pshmem_uint64_atomic_fetch_add +#define shmem_size_atomic_fetch_add pshmem_size_atomic_fetch_add +#define shmem_ptrdiff_atomic_fetch_add pshmem_ptrdiff_atomic_fetch_add + +#define shmem_int_fadd pshmem_int_fadd +#define shmem_long_fadd pshmem_long_fadd +#define shmem_longlong_fadd pshmem_longlong_fadd + +#define shmemx_int32_fadd pshmemx_int32_fadd +#define shmemx_int64_fadd pshmemx_int64_fadd + +/* Atomic Fetch&And */ +#define shmem_int_atomic_fetch_and pshmem_int_atomic_fetch_and +#define shmem_long_atomic_fetch_and pshmem_long_atomic_fetch_and +#define shmem_longlong_atomic_fetch_and pshmem_longlong_atomic_fetch_and +#define shmem_uint_atomic_fetch_and pshmem_uint_atomic_fetch_and +#define shmem_ulong_atomic_fetch_and pshmem_ulong_atomic_fetch_and +#define shmem_ulonglong_atomic_fetch_and pshmem_ulonglong_atomic_fetch_and +#define shmem_int32_atomic_fetch_and pshmem_int32_atomic_fetch_and +#define shmem_int64_atomic_fetch_and pshmem_int64_atomic_fetch_and +#define shmem_uint32_atomic_fetch_and pshmem_uint32_atomic_fetch_and +#define shmem_uint64_atomic_fetch_and pshmem_uint64_atomic_fetch_and + +#define shmem_ctx_int_atomic_fetch_and pshmem_ctx_int_atomic_fetch_and +#define shmem_ctx_long_atomic_fetch_and pshmem_ctx_long_atomic_fetch_and +#define shmem_ctx_longlong_atomic_fetch_and pshmem_ctx_longlong_atomic_fetch_and +#define shmem_ctx_uint_atomic_fetch_and pshmem_ctx_uint_atomic_fetch_and +#define shmem_ctx_ulong_atomic_fetch_and pshmem_ctx_ulong_atomic_fetch_and +#define shmem_ctx_ulonglong_atomic_fetch_and pshmem_ctx_ulonglong_atomic_fetch_and +#define shmem_ctx_int32_atomic_fetch_and pshmem_ctx_int32_atomic_fetch_and +#define shmem_ctx_int64_atomic_fetch_and pshmem_ctx_int64_atomic_fetch_and +#define shmem_ctx_uint32_atomic_fetch_and pshmem_ctx_uint32_atomic_fetch_and +#define shmem_ctx_uint64_atomic_fetch_and pshmem_ctx_uint64_atomic_fetch_and + +#define shmemx_int32_atomic_fetch_and pshmemx_int32_atomic_fetch_and +#define shmemx_int64_atomic_fetch_and pshmemx_int64_atomic_fetch_and +#define shmemx_uint32_atomic_fetch_and pshmemx_uint32_atomic_fetch_and +#define shmemx_uint64_atomic_fetch_and pshmemx_uint64_atomic_fetch_and + +/* Atomic Fetch&Or */ +#define shmem_int_atomic_fetch_or pshmem_int_atomic_fetch_or +#define shmem_long_atomic_fetch_or pshmem_long_atomic_fetch_or +#define shmem_longlong_atomic_fetch_or pshmem_longlong_atomic_fetch_or +#define shmem_uint_atomic_fetch_or pshmem_uint_atomic_fetch_or +#define shmem_ulong_atomic_fetch_or pshmem_ulong_atomic_fetch_or +#define shmem_ulonglong_atomic_fetch_or pshmem_ulonglong_atomic_fetch_or +#define shmem_int32_atomic_fetch_or pshmem_int32_atomic_fetch_or +#define shmem_int64_atomic_fetch_or pshmem_int64_atomic_fetch_or +#define shmem_uint32_atomic_fetch_or pshmem_uint32_atomic_fetch_or +#define shmem_uint64_atomic_fetch_or pshmem_uint64_atomic_fetch_or + +#define shmem_ctx_int_atomic_fetch_or pshmem_ctx_int_atomic_fetch_or +#define shmem_ctx_long_atomic_fetch_or pshmem_ctx_long_atomic_fetch_or +#define shmem_ctx_longlong_atomic_fetch_or pshmem_ctx_longlong_atomic_fetch_or +#define shmem_ctx_uint_atomic_fetch_or pshmem_ctx_uint_atomic_fetch_or +#define shmem_ctx_ulong_atomic_fetch_or pshmem_ctx_ulong_atomic_fetch_or +#define shmem_ctx_ulonglong_atomic_fetch_or pshmem_ctx_ulonglong_atomic_fetch_or +#define shmem_ctx_int32_atomic_fetch_or pshmem_ctx_int32_atomic_fetch_or +#define shmem_ctx_int64_atomic_fetch_or pshmem_ctx_int64_atomic_fetch_or +#define shmem_ctx_uint32_atomic_fetch_or pshmem_ctx_uint32_atomic_fetch_or +#define shmem_ctx_uint64_atomic_fetch_or pshmem_ctx_uint64_atomic_fetch_or + +#define shmemx_int32_atomic_fetch_or pshmemx_int32_atomic_fetch_or +#define shmemx_int64_atomic_fetch_or pshmemx_int64_atomic_fetch_or +#define shmemx_uint32_atomic_fetch_or pshmemx_uint32_atomic_fetch_or +#define shmemx_uint64_atomic_fetch_or pshmemx_uint64_atomic_fetch_or + +/* Atomic Fetch&Xor */ +#define shmem_int_atomic_fetch_xor pshmem_int_atomic_fetch_xor +#define shmem_long_atomic_fetch_xor pshmem_long_atomic_fetch_xor +#define shmem_longlong_atomic_fetch_xor pshmem_longlong_atomic_fetch_xor +#define shmem_uint_atomic_fetch_xor pshmem_uint_atomic_fetch_xor +#define shmem_ulong_atomic_fetch_xor pshmem_ulong_atomic_fetch_xor +#define shmem_ulonglong_atomic_fetch_xor pshmem_ulonglong_atomic_fetch_xor +#define shmem_int32_atomic_fetch_xor pshmem_int32_atomic_fetch_xor +#define shmem_int64_atomic_fetch_xor pshmem_int64_atomic_fetch_xor +#define shmem_uint32_atomic_fetch_xor pshmem_uint32_atomic_fetch_xor +#define shmem_uint64_atomic_fetch_xor pshmem_uint64_atomic_fetch_xor + +#define shmem_ctx_int_atomic_fetch_xor pshmem_ctx_int_atomic_fetch_xor +#define shmem_ctx_long_atomic_fetch_xor pshmem_ctx_long_atomic_fetch_xor +#define shmem_ctx_longlong_atomic_fetch_xor pshmem_ctx_longlong_atomic_fetch_xor +#define shmem_ctx_uint_atomic_fetch_xor pshmem_ctx_uint_atomic_fetch_xor +#define shmem_ctx_ulong_atomic_fetch_xor pshmem_ctx_ulong_atomic_fetch_xor +#define shmem_ctx_ulonglong_atomic_fetch_xor pshmem_ctx_ulonglong_atomic_fetch_xor +#define shmem_ctx_int32_atomic_fetch_xor pshmem_ctx_int32_atomic_fetch_xor +#define shmem_ctx_int64_atomic_fetch_xor pshmem_ctx_int64_atomic_fetch_xor +#define shmem_ctx_uint32_atomic_fetch_xor pshmem_ctx_uint32_atomic_fetch_xor +#define shmem_ctx_uint64_atomic_fetch_xor pshmem_ctx_uint64_atomic_fetch_xor + +#define shmemx_int32_atomic_fetch_xor pshmemx_int32_atomic_fetch_xor +#define shmemx_int64_atomic_fetch_xor pshmemx_int64_atomic_fetch_xor +#define shmemx_uint32_atomic_fetch_xor pshmemx_uint32_atomic_fetch_xor +#define shmemx_uint64_atomic_fetch_xor pshmemx_uint64_atomic_fetch_xor + +/* Atomic Fetch */ +#define shmem_ctx_double_atomic_fetch pshmem_ctx_double_atomic_fetch +#define shmem_ctx_float_atomic_fetch pshmem_ctx_float_atomic_fetch +#define shmem_ctx_int_atomic_fetch pshmem_ctx_int_atomic_fetch +#define shmem_ctx_long_atomic_fetch pshmem_ctx_long_atomic_fetch +#define shmem_ctx_longlong_atomic_fetch pshmem_ctx_longlong_atomic_fetch +#define shmem_ctx_uint_atomic_fetch pshmem_ctx_uint_atomic_fetch +#define shmem_ctx_ulong_atomic_fetch pshmem_ctx_ulong_atomic_fetch +#define shmem_ctx_ulonglong_atomic_fetch pshmem_ctx_ulonglong_atomic_fetch +#define shmem_ctx_int32_atomic_fetch pshmem_ctx_int32_atomic_fetch +#define shmem_ctx_int64_atomic_fetch pshmem_ctx_int64_atomic_fetch +#define shmem_ctx_uint32_atomic_fetch pshmem_ctx_uint32_atomic_fetch +#define shmem_ctx_uint64_atomic_fetch pshmem_ctx_uint64_atomic_fetch +#define shmem_ctx_size_atomic_fetch pshmem_ctx_size_atomic_fetch +#define shmem_ctx_ptrdiff_atomic_fetch pshmem_ctx_ptrdiff_atomic_fetch + +#define shmem_double_atomic_fetch pshmem_double_atomic_fetch +#define shmem_float_atomic_fetch pshmem_float_atomic_fetch +#define shmem_int_atomic_fetch pshmem_int_atomic_fetch +#define shmem_long_atomic_fetch pshmem_long_atomic_fetch +#define shmem_longlong_atomic_fetch pshmem_longlong_atomic_fetch +#define shmem_uint_atomic_fetch pshmem_uint_atomic_fetch +#define shmem_ulong_atomic_fetch pshmem_ulong_atomic_fetch +#define shmem_ulonglong_atomic_fetch pshmem_ulonglong_atomic_fetch +#define shmem_int32_atomic_fetch pshmem_int32_atomic_fetch +#define shmem_int64_atomic_fetch pshmem_int64_atomic_fetch +#define shmem_uint32_atomic_fetch pshmem_uint32_atomic_fetch +#define shmem_uint64_atomic_fetch pshmem_uint64_atomic_fetch +#define shmem_size_atomic_fetch pshmem_size_atomic_fetch +#define shmem_ptrdiff_atomic_fetch pshmem_ptrdiff_atomic_fetch + +#define shmem_double_fetch pshmem_double_fetch +#define shmem_float_fetch pshmem_float_fetch +#define shmem_int_fetch pshmem_int_fetch +#define shmem_long_fetch pshmem_long_fetch +#define shmem_longlong_fetch pshmem_longlong_fetch + +#define shmemx_int32_fetch pshmemx_int32_fetch +#define shmemx_int64_fetch pshmemx_int64_fetch + +/* Atomic Fetch&Inc */ +#define shmem_ctx_int_atomic_fetch_inc pshmem_ctx_int_atomic_fetch_inc +#define shmem_ctx_long_atomic_fetch_inc pshmem_ctx_long_atomic_fetch_inc +#define shmem_ctx_longlong_atomic_fetch_inc pshmem_ctx_longlong_atomic_fetch_inc +#define shmem_ctx_uint_atomic_fetch_inc pshmem_ctx_uint_atomic_fetch_inc +#define shmem_ctx_ulong_atomic_fetch_inc pshmem_ctx_ulong_atomic_fetch_inc +#define shmem_ctx_ulonglong_atomic_fetch_inc pshmem_ctx_ulonglong_atomic_fetch_inc +#define shmem_ctx_int32_atomic_fetch_inc pshmem_ctx_int32_atomic_fetch_inc +#define shmem_ctx_int64_atomic_fetch_inc pshmem_ctx_int64_atomic_fetch_inc +#define shmem_ctx_uint32_atomic_fetch_inc pshmem_ctx_uint32_atomic_fetch_inc +#define shmem_ctx_uint64_atomic_fetch_inc pshmem_ctx_uint64_atomic_fetch_inc +#define shmem_ctx_size_atomic_fetch_inc pshmem_ctx_size_atomic_fetch_inc +#define shmem_ctx_ptrdiff_atomic_fetch_inc pshmem_ctx_ptrdiff_atomic_fetch_inc + +#define shmem_uint_atomic_fetch_inc pshmem_uint_atomic_fetch_inc +#define shmem_ulong_atomic_fetch_inc pshmem_ulong_atomic_fetch_inc +#define shmem_ulonglong_atomic_fetch_inc pshmem_ulonglong_atomic_fetch_inc +#define shmem_int_atomic_fetch_inc pshmem_int_atomic_fetch_inc +#define shmem_long_atomic_fetch_inc pshmem_long_atomic_fetch_inc +#define shmem_longlong_atomic_fetch_inc pshmem_longlong_atomic_fetch_inc +#define shmem_int32_atomic_fetch_inc pshmem_int32_atomic_fetch_inc +#define shmem_int64_atomic_fetch_inc pshmem_int64_atomic_fetch_inc +#define shmem_uint32_atomic_fetch_inc pshmem_uint32_atomic_fetch_inc +#define shmem_uint64_atomic_fetch_inc pshmem_uint64_atomic_fetch_inc +#define shmem_size_atomic_fetch_inc pshmem_size_atomic_fetch_inc +#define shmem_ptrdiff_atomic_fetch_inc pshmem_ptrdiff_atomic_fetch_inc + + + +#define shmem_int_finc pshmem_int_finc +#define shmem_long_finc pshmem_long_finc +#define shmem_longlong_finc pshmem_longlong_finc + +#define shmemx_int32_finc pshmemx_int32_finc +#define shmemx_int64_finc pshmemx_int64_finc + +/* Atomic Add */ +#define shmem_ctx_int_atomic_add pshmem_ctx_int_atomic_add +#define shmem_ctx_long_atomic_add pshmem_ctx_long_atomic_add +#define shmem_ctx_longlong_atomic_add pshmem_ctx_longlong_atomic_add +#define shmem_ctx_uint_atomic_add pshmem_ctx_uint_atomic_add +#define shmem_ctx_ulong_atomic_add pshmem_ctx_ulong_atomic_add +#define shmem_ctx_ulonglong_atomic_add pshmem_ctx_ulonglong_atomic_add +#define shmem_ctx_int32_atomic_add pshmem_ctx_int32_atomic_add +#define shmem_ctx_int64_atomic_add pshmem_ctx_int64_atomic_add +#define shmem_ctx_uint32_atomic_add pshmem_ctx_uint32_atomic_add +#define shmem_ctx_uint64_atomic_add pshmem_ctx_uint64_atomic_add +#define shmem_ctx_size_atomic_add pshmem_ctx_size_atomic_add +#define shmem_ctx_ptrdiff_atomic_add pshmem_ctx_ptrdiff_atomic_add + +#define shmem_int_atomic_add pshmem_int_atomic_add +#define shmem_long_atomic_add pshmem_long_atomic_add +#define shmem_longlong_atomic_add pshmem_longlong_atomic_add +#define shmem_uint_atomic_add pshmem_uint_atomic_add +#define shmem_ulong_atomic_add pshmem_ulong_atomic_add +#define shmem_ulonglong_atomic_add pshmem_ulonglong_atomic_add +#define shmem_int32_atomic_add pshmem_int32_atomic_add +#define shmem_int64_atomic_add pshmem_int64_atomic_add +#define shmem_uint32_atomic_add pshmem_uint32_atomic_add +#define shmem_uint64_atomic_add pshmem_uint64_atomic_add +#define shmem_size_atomic_add pshmem_size_atomic_add +#define shmem_ptrdiff_atomic_add pshmem_ptrdiff_atomic_add + +#define shmem_int_add pshmem_int_add +#define shmem_long_add pshmem_long_add +#define shmem_longlong_add pshmem_longlong_add + +#define shmemx_int32_add pshmemx_int32_add +#define shmemx_int64_add pshmemx_int64_add + +/* Atomic And */ +#define shmem_int_atomic_and pshmem_int_atomic_and +#define shmem_long_atomic_and pshmem_long_atomic_and +#define shmem_longlong_atomic_and pshmem_longlong_atomic_and +#define shmem_uint_atomic_and pshmem_uint_atomic_and +#define shmem_ulong_atomic_and pshmem_ulong_atomic_and +#define shmem_ulonglong_atomic_and pshmem_ulonglong_atomic_and +#define shmem_int32_atomic_and pshmem_int32_atomic_and +#define shmem_int64_atomic_and pshmem_int64_atomic_and +#define shmem_uint32_atomic_and pshmem_uint32_atomic_and +#define shmem_uint64_atomic_and pshmem_uint64_atomic_and + +#define shmem_ctx_int_atomic_and pshmem_ctx_int_atomic_and +#define shmem_ctx_long_atomic_and pshmem_ctx_long_atomic_and +#define shmem_ctx_longlong_atomic_and pshmem_ctx_longlong_atomic_and +#define shmem_ctx_uint_atomic_and pshmem_ctx_uint_atomic_and +#define shmem_ctx_ulong_atomic_and pshmem_ctx_ulong_atomic_and +#define shmem_ctx_ulonglong_atomic_and pshmem_ctx_ulonglong_atomic_and +#define shmem_ctx_int32_atomic_and pshmem_ctx_int32_atomic_and +#define shmem_ctx_int64_atomic_and pshmem_ctx_int64_atomic_and +#define shmem_ctx_uint32_atomic_and pshmem_ctx_uint32_atomic_and +#define shmem_ctx_uint64_atomic_and pshmem_ctx_uint64_atomic_and + +#define shmemx_int32_atomic_and pshmemx_int32_atomic_and +#define shmemx_int64_atomic_and pshmemx_int64_atomic_and + +#define shmemx_uint32_atomic_and pshmemx_uint32_atomic_and +#define shmemx_uint64_atomic_and pshmemx_uint64_atomic_and + +/* Atomic Or */ +#define shmem_int_atomic_or pshmem_int_atomic_or +#define shmem_long_atomic_or pshmem_long_atomic_or +#define shmem_longlong_atomic_or pshmem_longlong_atomic_or +#define shmem_uint_atomic_or pshmem_uint_atomic_or +#define shmem_ulong_atomic_or pshmem_ulong_atomic_or +#define shmem_ulonglong_atomic_or pshmem_ulonglong_atomic_or +#define shmem_int32_atomic_or pshmem_int32_atomic_or +#define shmem_int64_atomic_or pshmem_int64_atomic_or +#define shmem_uint32_atomic_or pshmem_uint32_atomic_or +#define shmem_uint64_atomic_or pshmem_uint64_atomic_or + +#define shmem_ctx_int_atomic_or pshmem_ctx_int_atomic_or +#define shmem_ctx_long_atomic_or pshmem_ctx_long_atomic_or +#define shmem_ctx_longlong_atomic_or pshmem_ctx_longlong_atomic_or +#define shmem_ctx_uint_atomic_or pshmem_ctx_uint_atomic_or +#define shmem_ctx_ulong_atomic_or pshmem_ctx_ulong_atomic_or +#define shmem_ctx_ulonglong_atomic_or pshmem_ctx_ulonglong_atomic_or +#define shmem_ctx_int32_atomic_or pshmem_ctx_int32_atomic_or +#define shmem_ctx_int64_atomic_or pshmem_ctx_int64_atomic_or +#define shmem_ctx_uint32_atomic_or pshmem_ctx_uint32_atomic_or +#define shmem_ctx_uint64_atomic_or pshmem_ctx_uint64_atomic_or + +#define shmemx_int32_atomic_or pshmemx_int32_atomic_or +#define shmemx_int64_atomic_or pshmemx_int64_atomic_or + +#define shmemx_uint32_atomic_or pshmemx_uint32_atomic_or +#define shmemx_uint64_atomic_or pshmemx_uint64_atomic_or + +/* Atomic Xor */ +#define shmem_int_atomic_xor pshmem_int_atomic_xor +#define shmem_long_atomic_xor pshmem_long_atomic_xor +#define shmem_longlong_atomic_xor pshmem_longlong_atomic_xor +#define shmem_uint_atomic_xor pshmem_uint_atomic_xor +#define shmem_ulong_atomic_xor pshmem_ulong_atomic_xor +#define shmem_ulonglong_atomic_xor pshmem_ulonglong_atomic_xor +#define shmem_int32_atomic_xor pshmem_int32_atomic_xor +#define shmem_int64_atomic_xor pshmem_int64_atomic_xor +#define shmem_uint32_atomic_xor pshmem_uint32_atomic_xor +#define shmem_uint64_atomic_xor pshmem_uint64_atomic_xor + +#define shmem_ctx_int_atomic_xor pshmem_ctx_int_atomic_xor +#define shmem_ctx_long_atomic_xor pshmem_ctx_long_atomic_xor +#define shmem_ctx_longlong_atomic_xor pshmem_ctx_longlong_atomic_xor +#define shmem_ctx_uint_atomic_xor pshmem_ctx_uint_atomic_xor +#define shmem_ctx_ulong_atomic_xor pshmem_ctx_ulong_atomic_xor +#define shmem_ctx_ulonglong_atomic_xor pshmem_ctx_ulonglong_atomic_xor +#define shmem_ctx_int32_atomic_xor pshmem_ctx_int32_atomic_xor +#define shmem_ctx_int64_atomic_xor pshmem_ctx_int64_atomic_xor +#define shmem_ctx_uint32_atomic_xor pshmem_ctx_uint32_atomic_xor +#define shmem_ctx_uint64_atomic_xor pshmem_ctx_uint64_atomic_xor + +#define shmemx_int32_atomic_xor pshmemx_int32_atomic_xor +#define shmemx_int64_atomic_xor pshmemx_int64_atomic_xor + +#define shmemx_uint32_atomic_xor pshmemx_uint32_atomic_xor +#define shmemx_uint64_atomic_xor pshmemx_uint64_atomic_xor + +/* Atomic Inc */ +#define shmem_ctx_int_atomic_inc pshmem_ctx_int_atomic_inc +#define shmem_ctx_long_atomic_inc pshmem_ctx_long_atomic_inc +#define shmem_ctx_longlong_atomic_inc pshmem_ctx_longlong_atomic_inc +#define shmem_ctx_uint_atomic_inc pshmem_ctx_uint_atomic_inc +#define shmem_ctx_ulong_atomic_inc pshmem_ctx_ulong_atomic_inc +#define shmem_ctx_ulonglong_atomic_inc pshmem_ctx_ulonglong_atomic_inc +#define shmem_ctx_int32_atomic_inc pshmem_ctx_int32_atomic_inc +#define shmem_ctx_int64_atomic_inc pshmem_ctx_int64_atomic_inc +#define shmem_ctx_uint32_atomic_inc pshmem_ctx_uint32_atomic_inc +#define shmem_ctx_uint64_atomic_inc pshmem_ctx_uint64_atomic_inc +#define shmem_ctx_size_atomic_inc pshmem_ctx_size_atomic_inc +#define shmem_ctx_ptrdiff_atomic_inc pshmem_ctx_ptrdiff_atomic_inc + +#define shmem_int_atomic_inc pshmem_int_atomic_inc +#define shmem_long_atomic_inc pshmem_long_atomic_inc +#define shmem_longlong_atomic_inc pshmem_longlong_atomic_inc +#define shmem_uint_atomic_inc pshmem_uint_atomic_inc +#define shmem_ulong_atomic_inc pshmem_ulong_atomic_inc +#define shmem_ulonglong_atomic_inc pshmem_ulonglong_atomic_inc +#define shmem_int32_atomic_inc pshmem_int32_atomic_inc +#define shmem_int64_atomic_inc pshmem_int64_atomic_inc +#define shmem_uint32_atomic_inc pshmem_uint32_atomic_inc +#define shmem_uint64_atomic_inc pshmem_uint64_atomic_inc +#define shmem_size_atomic_inc pshmem_size_atomic_inc +#define shmem_ptrdiff_atomic_inc pshmem_ptrdiff_atomic_inc + +#define shmem_int_inc pshmem_int_inc +#define shmem_long_inc pshmem_long_inc +#define shmem_longlong_inc pshmem_longlong_inc + +#define shmemx_int32_inc pshmemx_int32_inc +#define shmemx_int64_inc pshmemx_int64_inc + +/* Nonblocking Atomic Fetch */ +#define shmem_ctx_double_atomic_fetch_nbi pshmem_ctx_double_atomic_fetch_nbi +#define shmem_ctx_float_atomic_fetch_nbi pshmem_ctx_float_atomic_fetch_nbi +#define shmem_ctx_int_atomic_fetch_nbi pshmem_ctx_int_atomic_fetch_nbi +#define shmem_ctx_long_atomic_fetch_nbi pshmem_ctx_long_atomic_fetch_nbi +#define shmem_ctx_longlong_atomic_fetch_nbi pshmem_ctx_longlong_atomic_fetch_nbi +#define shmem_ctx_uint_atomic_fetch_nbi pshmem_ctx_uint_atomic_fetch_nbi +#define shmem_ctx_ulong_atomic_fetch_nbi pshmem_ctx_ulong_atomic_fetch_nbi +#define shmem_ctx_ulonglong_atomic_fetch_nbi pshmem_ctx_ulonglong_atomic_fetch_nbi +#define shmem_ctx_int32_atomic_fetch_nbi pshmem_ctx_int32_atomic_fetch_nbi +#define shmem_ctx_int64_atomic_fetch_nbi pshmem_ctx_int64_atomic_fetch_nbi +#define shmem_ctx_uint32_atomic_fetch_nbi pshmem_ctx_uint32_atomic_fetch_nbi +#define shmem_ctx_uint64_atomic_fetch_nbi pshmem_ctx_uint64_atomic_fetch_nbi +#define shmem_ctx_size_atomic_fetch_nbi pshmem_ctx_size_atomic_fetch_nbi +#define shmem_ctx_ptrdiff_atomic_fetch_nbi pshmem_ctx_ptrdiff_atomic_fetch_nbi + +#define shmem_double_atomic_fetch_nbi pshmem_double_atomic_fetch_nbi +#define shmem_float_atomic_fetch_nbi pshmem_float_atomic_fetch_nbi +#define shmem_int_atomic_fetch_nbi pshmem_int_atomic_fetch_nbi +#define shmem_long_atomic_fetch_nbi pshmem_long_atomic_fetch_nbi +#define shmem_longlong_atomic_fetch_nbi pshmem_longlong_atomic_fetch_nbi +#define shmem_uint_atomic_fetch_nbi pshmem_uint_atomic_fetch_nbi +#define shmem_ulong_atomic_fetch_nbi pshmem_ulong_atomic_fetch_nbi +#define shmem_ulonglong_atomic_fetch_nbi pshmem_ulonglong_atomic_fetch_nbi +#define shmem_int32_atomic_fetch_nbi pshmem_int32_atomic_fetch_nbi +#define shmem_int64_atomic_fetch_nbi pshmem_int64_atomic_fetch_nbi +#define shmem_uint32_atomic_fetch_nbi pshmem_uint32_atomic_fetch_nbi +#define shmem_uint64_atomic_fetch_nbi pshmem_uint64_atomic_fetch_nbi +#define shmem_size_atomic_fetch_nbi pshmem_size_atomic_fetch_nbi +#define shmem_ptrdiff_atomic_fetch_nbi pshmem_ptrdiff_atomic_fetch_nbi + + +/* Nonblocking Atomic Compare Swap */ +#define shmem_ctx_int_atomic_compare_swap_nbi pshmem_ctx_int_atomic_compare_swap_nbi +#define shmem_ctx_long_atomic_compare_swap_nbi pshmem_ctx_long_atomic_compare_swap_nbi +#define shmem_ctx_longlong_atomic_compare_swap_nbi pshmem_ctx_longlong_atomic_compare_swap_nbi +#define shmem_ctx_uint_atomic_compare_swap_nbi pshmem_ctx_uint_atomic_compare_swap_nbi +#define shmem_ctx_ulong_atomic_compare_swap_nbi pshmem_ctx_ulong_atomic_compare_swap_nbi +#define shmem_ctx_ulonglong_atomic_compare_swap_nbi pshmem_ctx_ulonglong_atomic_compare_swap_nbi +#define shmem_ctx_int32_atomic_compare_swap_nbi pshmem_ctx_int32_atomic_compare_swap_nbi +#define shmem_ctx_int64_atomic_compare_swap_nbi pshmem_ctx_int64_atomic_compare_swap_nbi +#define shmem_ctx_uint32_atomic_compare_swap_nbi pshmem_ctx_uint32_atomic_compare_swap_nbi +#define shmem_ctx_uint64_atomic_compare_swap_nbi pshmem_ctx_uint64_atomic_compare_swap_nbi +#define shmem_ctx_size_atomic_compare_swap_nbi pshmem_ctx_size_atomic_compare_swap_nbi +#define shmem_ctx_ptrdiff_atomic_compare_swap_nbi pshmem_ctx_ptrdiff_atomic_compare_swap_nbi + +#define shmem_int_atomic_compare_swap_nbi pshmem_int_atomic_compare_swap_nbi +#define shmem_long_atomic_compare_swap_nbi pshmem_long_atomic_compare_swap_nbi +#define shmem_longlong_atomic_compare_swap_nbi pshmem_longlong_atomic_compare_swap_nbi +#define shmem_uint_atomic_compare_swap_nbi pshmem_uint_atomic_compare_swap_nbi +#define shmem_ulong_atomic_compare_swap_nbi pshmem_ulong_atomic_compare_swap_nbi +#define shmem_ulonglong_atomic_compare_swap_nbi pshmem_ulonglong_atomic_compare_swap_nbi +#define shmem_int32_atomic_compare_swap_nbi pshmem_int32_atomic_compare_swap_nbi +#define shmem_int64_atomic_compare_swap_nbi pshmem_int64_atomic_compare_swap_nbi +#define shmem_uint32_atomic_compare_swap_nbi pshmem_uint32_atomic_compare_swap_nbi +#define shmem_uint64_atomic_compare_swap_nbi pshmem_uint64_atomic_compare_swap_nbi +#define shmem_size_atomic_compare_swap_nbi pshmem_size_atomic_compare_swap_nbi +#define shmem_ptrdiff_atomic_compare_swap_nbi pshmem_ptrdiff_atomic_compare_swap_nbi + + +/* Nonblocking Atomic Swap */ +#define shmem_ctx_double_atomic_swap_nbi pshmem_ctx_double_atomic_swap_nbi +#define shmem_ctx_float_atomic_swap_nbi pshmem_ctx_float_atomic_swap_nbi +#define shmem_ctx_int_atomic_swap_nbi pshmem_ctx_int_atomic_swap_nbi +#define shmem_ctx_long_atomic_swap_nbi pshmem_ctx_long_atomic_swap_nbi +#define shmem_ctx_longlong_atomic_swap_nbi pshmem_ctx_longlong_atomic_swap_nbi +#define shmem_ctx_uint_atomic_swap_nbi pshmem_ctx_uint_atomic_swap_nbi +#define shmem_ctx_ulong_atomic_swap_nbi pshmem_ctx_ulong_atomic_swap_nbi +#define shmem_ctx_ulonglong_atomic_swap_nbi pshmem_ctx_ulonglong_atomic_swap_nbi +#define shmem_ctx_int32_atomic_swap_nbi pshmem_ctx_int32_atomic_swap_nbi +#define shmem_ctx_int64_atomic_swap_nbi pshmem_ctx_int64_atomic_swap_nbi +#define shmem_ctx_uint32_atomic_swap_nbi pshmem_ctx_uint32_atomic_swap_nbi +#define shmem_ctx_uint64_atomic_swap_nbi pshmem_ctx_uint64_atomic_swap_nbi +#define shmem_ctx_size_atomic_swap_nbi pshmem_ctx_size_atomic_swap_nbi +#define shmem_ctx_ptrdiff_atomic_swap_nbi pshmem_ctx_ptrdiff_atomic_swap_nbi + +#define shmem_double_atomic_swap_nbi pshmem_double_atomic_swap_nbi +#define shmem_float_atomic_swap_nbi pshmem_float_atomic_swap_nbi +#define shmem_int_atomic_swap_nbi pshmem_int_atomic_swap_nbi +#define shmem_long_atomic_swap_nbi pshmem_long_atomic_swap_nbi +#define shmem_longlong_atomic_swap_nbi pshmem_longlong_atomic_swap_nbi +#define shmem_uint_atomic_swap_nbi pshmem_uint_atomic_swap_nbi +#define shmem_ulong_atomic_swap_nbi pshmem_ulong_atomic_swap_nbi +#define shmem_ulonglong_atomic_swap_nbi pshmem_ulonglong_atomic_swap_nbi +#define shmem_int32_atomic_swap_nbi pshmem_int32_atomic_swap_nbi +#define shmem_int64_atomic_swap_nbi pshmem_int64_atomic_swap_nbi +#define shmem_uint32_atomic_swap_nbi pshmem_uint32_atomic_swap_nbi +#define shmem_uint64_atomic_swap_nbi pshmem_uint64_atomic_swap_nbi +#define shmem_size_atomic_swap_nbi pshmem_size_atomic_swap_nbi +#define shmem_ptrdiff_atomic_swap_nbi pshmem_ptrdiff_atomic_swap_nbi + + +/* Nonblocking Atomic Fetch and Increment */ +#define shmem_ctx_int_atomic_fetch_inc_nbi pshmem_ctx_int_atomic_fetch_inc_nbi +#define shmem_ctx_long_atomic_fetch_inc_nbi pshmem_ctx_long_atomic_fetch_inc_nbi +#define shmem_ctx_longlong_atomic_fetch_inc_nbi pshmem_ctx_longlong_atomic_fetch_inc_nbi +#define shmem_ctx_uint_atomic_fetch_inc_nbi pshmem_ctx_uint_atomic_fetch_inc_nbi +#define shmem_ctx_ulong_atomic_fetch_inc_nbi pshmem_ctx_ulong_atomic_fetch_inc_nbi +#define shmem_ctx_ulonglong_atomic_fetch_inc_nbi pshmem_ctx_ulonglong_atomic_fetch_inc_nbi +#define shmem_ctx_int32_atomic_fetch_inc_nbi pshmem_ctx_int32_atomic_fetch_inc_nbi +#define shmem_ctx_int64_atomic_fetch_inc_nbi pshmem_ctx_int64_atomic_fetch_inc_nbi +#define shmem_ctx_uint32_atomic_fetch_inc_nbi pshmem_ctx_uint32_atomic_fetch_inc_nbi +#define shmem_ctx_uint64_atomic_fetch_inc_nbi pshmem_ctx_uint64_atomic_fetch_inc_nbi +#define shmem_ctx_size_atomic_fetch_inc_nbi pshmem_ctx_size_atomic_fetch_inc_nbi +#define shmem_ctx_ptrdiff_atomic_fetch_inc_nbi pshmem_ctx_ptrdiff_atomic_fetch_inc_nbi + +#define shmem_int_atomic_fetch_inc_nbi pshmem_int_atomic_fetch_inc_nbi +#define shmem_long_atomic_fetch_inc_nbi pshmem_long_atomic_fetch_inc_nbi +#define shmem_longlong_atomic_fetch_inc_nbi pshmem_longlong_atomic_fetch_inc_nbi +#define shmem_uint_atomic_fetch_inc_nbi pshmem_uint_atomic_fetch_inc_nbi +#define shmem_ulong_atomic_fetch_inc_nbi pshmem_ulong_atomic_fetch_inc_nbi +#define shmem_ulonglong_atomic_fetch_inc_nbi pshmem_ulonglong_atomic_fetch_inc_nbi +#define shmem_int32_atomic_fetch_inc_nbi pshmem_int32_atomic_fetch_inc_nbi +#define shmem_int64_atomic_fetch_inc_nbi pshmem_int64_atomic_fetch_inc_nbi +#define shmem_uint32_atomic_fetch_inc_nbi pshmem_uint32_atomic_fetch_inc_nbi +#define shmem_uint64_atomic_fetch_inc_nbi pshmem_uint64_atomic_fetch_inc_nbi +#define shmem_size_atomic_fetch_inc_nbi pshmem_size_atomic_fetch_inc_nbi +#define shmem_ptrdiff_atomic_fetch_inc_nbi pshmem_ptrdiff_atomic_fetch_inc_nbi + + +/* Nonblocking Atomic Fetch and Add */ +#define shmem_ctx_int_atomic_fetch_add_nbi pshmem_ctx_int_atomic_fetch_add_nbi +#define shmem_ctx_long_atomic_fetch_add_nbi pshmem_ctx_long_atomic_fetch_add_nbi +#define shmem_ctx_longlong_atomic_fetch_add_nbi pshmem_ctx_longlong_atomic_fetch_add_nbi +#define shmem_ctx_uint_atomic_fetch_add_nbi pshmem_ctx_uint_atomic_fetch_add_nbi +#define shmem_ctx_ulong_atomic_fetch_add_nbi pshmem_ctx_ulong_atomic_fetch_add_nbi +#define shmem_ctx_ulonglong_atomic_fetch_add_nbi pshmem_ctx_ulonglong_atomic_fetch_add_nbi +#define shmem_ctx_int32_atomic_fetch_add_nbi pshmem_ctx_int32_atomic_fetch_add_nbi +#define shmem_ctx_int64_atomic_fetch_add_nbi pshmem_ctx_int64_atomic_fetch_add_nbi +#define shmem_ctx_uint32_atomic_fetch_add_nbi pshmem_ctx_uint32_atomic_fetch_add_nbi +#define shmem_ctx_uint64_atomic_fetch_add_nbi pshmem_ctx_uint64_atomic_fetch_add_nbi +#define shmem_ctx_size_atomic_fetch_add_nbi pshmem_ctx_size_atomic_fetch_add_nbi +#define shmem_ctx_ptrdiff_atomic_fetch_add_nbi pshmem_ctx_ptrdiff_atomic_fetch_add_nbi + +#define shmem_int_atomic_fetch_add_nbi pshmem_int_atomic_fetch_add_nbi +#define shmem_long_atomic_fetch_add_nbi pshmem_long_atomic_fetch_add_nbi +#define shmem_longlong_atomic_fetch_add_nbi pshmem_longlong_atomic_fetch_add_nbi +#define shmem_uint_atomic_fetch_add_nbi pshmem_uint_atomic_fetch_add_nbi +#define shmem_ulong_atomic_fetch_add_nbi pshmem_ulong_atomic_fetch_add_nbi +#define shmem_ulonglong_atomic_fetch_add_nbi pshmem_ulonglong_atomic_fetch_add_nbi +#define shmem_int32_atomic_fetch_add_nbi pshmem_int32_atomic_fetch_add_nbi +#define shmem_int64_atomic_fetch_add_nbi pshmem_int64_atomic_fetch_add_nbi +#define shmem_uint32_atomic_fetch_add_nbi pshmem_uint32_atomic_fetch_add_nbi +#define shmem_uint64_atomic_fetch_add_nbi pshmem_uint64_atomic_fetch_add_nbi +#define shmem_size_atomic_fetch_add_nbi pshmem_size_atomic_fetch_add_nbi +#define shmem_ptrdiff_atomic_fetch_add_nbi pshmem_ptrdiff_atomic_fetch_add_nbi + + +/* Nonblocking Atomic Fetch and And */ +#define shmem_ctx_uint_atomic_fetch_and_nbi pshmem_ctx_uint_atomic_fetch_and_nbi +#define shmem_ctx_ulong_atomic_fetch_and_nbi pshmem_ctx_ulong_atomic_fetch_and_nbi +#define shmem_ctx_ulonglong_atomic_fetch_and_nbi pshmem_ctx_ulonglong_atomic_fetch_and_nbi +#define shmem_ctx_int32_atomic_fetch_and_nbi pshmem_ctx_int32_atomic_fetch_and_nbi +#define shmem_ctx_int64_atomic_fetch_and_nbi pshmem_ctx_int64_atomic_fetch_and_nbi +#define shmem_ctx_uint32_atomic_fetch_and_nbi pshmem_ctx_uint32_atomic_fetch_and_nbi +#define shmem_ctx_uint64_atomic_fetch_and_nbi pshmem_ctx_uint64_atomic_fetch_and_nbi + +#define shmem_uint_atomic_fetch_and_nbi pshmem_uint_atomic_fetch_and_nbi +#define shmem_ulong_atomic_fetch_and_nbi pshmem_ulong_atomic_fetch_and_nbi +#define shmem_ulonglong_atomic_fetch_and_nbi pshmem_ulonglong_atomic_fetch_and_nbi +#define shmem_int32_atomic_fetch_and_nbi pshmem_int32_atomic_fetch_and_nbi +#define shmem_int64_atomic_fetch_and_nbi pshmem_int64_atomic_fetch_and_nbi +#define shmem_uint32_atomic_fetch_and_nbi pshmem_uint32_atomic_fetch_and_nbi +#define shmem_uint64_atomic_fetch_and_nbi pshmem_uint64_atomic_fetch_and_nbi + + +/* Nonblocking Atomic Fetch and OR */ +#define shmem_ctx_uint_atomic_fetch_or_nbi pshmem_ctx_uint_atomic_fetch_or_nbi +#define shmem_ctx_ulong_atomic_fetch_or_nbi pshmem_ctx_ulong_atomic_fetch_or_nbi +#define shmem_ctx_ulonglong_atomic_fetch_or_nbi pshmem_ctx_ulonglong_atomic_fetch_or_nbi +#define shmem_ctx_int32_atomic_fetch_or_nbi pshmem_ctx_int32_atomic_fetch_or_nbi +#define shmem_ctx_int64_atomic_fetch_or_nbi pshmem_ctx_int64_atomic_fetch_or_nbi +#define shmem_ctx_uint32_atomic_fetch_or_nbi pshmem_ctx_uint32_atomic_fetch_or_nbi +#define shmem_ctx_uint64_atomic_fetch_or_nbi pshmem_ctx_uint64_atomic_fetch_or_nbi + +#define shmem_uint_atomic_fetch_or_nbi pshmem_uint_atomic_fetch_or_nbi +#define shmem_ulong_atomic_fetch_or_nbi pshmem_ulong_atomic_fetch_or_nbi +#define shmem_ulonglong_atomic_fetch_or_nbi pshmem_ulonglong_atomic_fetch_or_nbi +#define shmem_int32_atomic_fetch_or_nbi pshmem_int32_atomic_fetch_or_nbi +#define shmem_int64_atomic_fetch_or_nbi pshmem_int64_atomic_fetch_or_nbi +#define shmem_uint32_atomic_fetch_or_nbi pshmem_uint32_atomic_fetch_or_nbi +#define shmem_uint64_atomic_fetch_or_nbi pshmem_uint64_atomic_fetch_or_nbi + + +/* Nonblocking Atomic Fetch and XOR */ +#define shmem_ctx_uint_atomic_fetch_xor_nbi pshmem_ctx_uint_atomic_fetch_xor_nbi +#define shmem_ctx_ulong_atomic_fetch_xor_nbi pshmem_ctx_ulong_atomic_fetch_xor_nbi +#define shmem_ctx_ulonglong_atomic_fetch_xor_nbi pshmem_ctx_ulonglong_atomic_fetch_xor_nbi +#define shmem_ctx_int32_atomic_fetch_xor_nbi pshmem_ctx_int32_atomic_fetch_xor_nbi +#define shmem_ctx_int64_atomic_fetch_xor_nbi pshmem_ctx_int64_atomic_fetch_xor_nbi +#define shmem_ctx_uint32_atomic_fetch_xor_nbi pshmem_ctx_uint32_atomic_fetch_xor_nbi +#define shmem_ctx_uint64_atomic_fetch_xor_nbi pshmem_ctx_uint64_atomic_fetch_xor_nbi + +#define shmem_uint_atomic_fetch_xor_nbi pshmem_uint_atomic_fetch_xor_nbi +#define shmem_ulong_atomic_fetch_xor_nbi pshmem_ulong_atomic_fetch_xor_nbi +#define shmem_ulonglong_atomic_fetch_xor_nbi pshmem_ulonglong_atomic_fetch_xor_nbi +#define shmem_int32_atomic_fetch_xor_nbi pshmem_int32_atomic_fetch_xor_nbi +#define shmem_int64_atomic_fetch_xor_nbi pshmem_int64_atomic_fetch_xor_nbi +#define shmem_uint32_atomic_fetch_xor_nbi pshmem_uint32_atomic_fetch_xor_nbi +#define shmem_uint64_atomic_fetch_xor_nbi pshmem_uint64_atomic_fetch_xor_nbi + +/* + * Control of profile + */ + +#define shmem_pcontrol pshmem_pcontrol + +/* + * Lock functions + */ +#define shmem_set_lock pshmem_set_lock +#define shmem_clear_lock pshmem_clear_lock +#define shmem_test_lock pshmem_test_lock + +/* + * P2P sync routines + */ +#define shmem_short_wait pshmem_short_wait +#define shmem_int_wait pshmem_int_wait +#define shmem_long_wait pshmem_long_wait +#define shmem_longlong_wait pshmem_longlong_wait +#define shmem_wait pshmem_wait +#define shmemx_int32_wait pshmemx_int32_wait +#define shmemx_int64_wait pshmemx_int64_wait + +#define shmem_short_wait_until pshmem_short_wait_until +#define shmem_int_wait_until pshmem_int_wait_until +#define shmem_long_wait_until pshmem_long_wait_until +#define shmem_longlong_wait_until pshmem_longlong_wait_until +#define shmem_ushort_wait_until pshmem_ushort_wait_until +#define shmem_uint_wait_until pshmem_uint_wait_until +#define shmem_ulong_wait_until pshmem_ulong_wait_until +#define shmem_ulonglong_wait_until pshmem_ulonglong_wait_until +#define shmem_int32_wait_until pshmem_int32_wait_until +#define shmem_int64_wait_until pshmem_int64_wait_until +#define shmem_uint32_wait_until pshmem_uint32_wait_until +#define shmem_uint64_wait_until pshmem_uint64_wait_until +#define shmem_size_wait_until pshmem_size_wait_until +#define shmem_ptrdiff_wait_until pshmem_ptrdiff_wait_until + +#define shmemx_int32_wait_until pshmemx_int32_wait_until +#define shmemx_int64_wait_until pshmemx_int64_wait_until + +#define shmem_short_wait_until_all pshmem_short_wait_until_all +#define shmem_ushort_wait_until_all pshmem_ushort_wait_until_all +#define shmem_int_wait_until_all pshmem_int_wait_until_all +#define shmem_long_wait_until_all pshmem_long_wait_until_all +#define shmem_longlong_wait_until_all pshmem_longlong_wait_until_all +#define shmem_uint_wait_until_all pshmem_uint_wait_until_all +#define shmem_ulong_wait_until_all pshmem_ulong_wait_until_all +#define shmem_ulonglong_wait_until_all pshmem_ulonglong_wait_until_all +#define shmem_int32_wait_until_all pshmem_int32_wait_until_all +#define shmem_int64_wait_until_all pshmem_int64_wait_until_all +#define shmem_uint32_wait_until_all pshmem_uint32_wait_until_all +#define shmem_uint64_wait_until_all pshmem_uint64_wait_until_all +#define shmem_size_wait_until_all pshmem_size_wait_until_all +#define shmem_ptrdiff_wait_until_all pshmem_ptrdiff_wait_until_all + + +#define shmem_short_wait_until_any pshmem_short_wait_until_any +#define shmem_ushort_wait_until_any pshmem_ushort_wait_until_any +#define shmem_int_wait_until_any pshmem_int_wait_until_any +#define shmem_long_wait_until_any pshmem_long_wait_until_any +#define shmem_longlong_wait_until_any pshmem_longlong_wait_until_any +#define shmem_uint_wait_until_any pshmem_uint_wait_until_any +#define shmem_ulong_wait_until_any pshmem_ulong_wait_until_any +#define shmem_ulonglong_wait_until_any pshmem_ulonglong_wait_until_any +#define shmem_int32_wait_until_any pshmem_int32_wait_until_any +#define shmem_int64_wait_until_any pshmem_int64_wait_until_any +#define shmem_uint32_wait_until_any pshmem_uint32_wait_until_any +#define shmem_uint64_wait_until_any pshmem_uint64_wait_until_any +#define shmem_size_wait_until_any pshmem_size_wait_until_any +#define shmem_ptrdiff_wait_until_any pshmem_ptrdiff_wait_until_any + + +#define shmem_short_wait_until_some pshmem_short_wait_until_some +#define shmem_ushort_wait_until_some pshmem_ushort_wait_until_some +#define shmem_int_wait_until_some pshmem_int_wait_until_some +#define shmem_long_wait_until_some pshmem_long_wait_until_some +#define shmem_longlong_wait_until_some pshmem_longlong_wait_until_some +#define shmem_uint_wait_until_some pshmem_uint_wait_until_some +#define shmem_ulong_wait_until_some pshmem_ulong_wait_until_some +#define shmem_ulonglong_wait_until_some pshmem_ulonglong_wait_until_some +#define shmem_int32_wait_until_some pshmem_int32_wait_until_some +#define shmem_int64_wait_until_some pshmem_int64_wait_until_some +#define shmem_uint32_wait_until_some pshmem_uint32_wait_until_some +#define shmem_uint64_wait_until_some pshmem_uint64_wait_until_some +#define shmem_size_wait_until_some pshmem_size_wait_until_some +#define shmem_ptrdiff_wait_until_some pshmem_ptrdiff_wait_until_some + + +#define shmem_short_wait_until_all_vector pshmem_short_wait_until_all_vector +#define shmem_ushort_wait_until_all_vector pshmem_ushort_wait_until_all_vector +#define shmem_int_wait_until_all_vector pshmem_int_wait_until_all_vector +#define shmem_long_wait_until_all_vector pshmem_long_wait_until_all_vector +#define shmem_longlong_wait_until_all_vector pshmem_longlong_wait_until_all_vector +#define shmem_uint_wait_until_all_vector pshmem_uint_wait_until_all_vector +#define shmem_ulong_wait_until_all_vector pshmem_ulong_wait_until_all_vector +#define shmem_ulonglong_wait_until_all_vector pshmem_ulonglong_wait_until_all_vector +#define shmem_int32_wait_until_all_vector pshmem_int32_wait_until_all_vector +#define shmem_int64_wait_until_all_vector pshmem_int64_wait_until_all_vector +#define shmem_uint32_wait_until_all_vector pshmem_uint32_wait_until_all_vector +#define shmem_uint64_wait_until_all_vector pshmem_uint64_wait_until_all_vector +#define shmem_size_wait_until_all_vector pshmem_size_wait_until_all_vector +#define shmem_ptrdiff_wait_until_all_vector pshmem_ptrdiff_wait_until_all_vector + + +#define shmem_short_wait_until_any_vector pshmem_short_wait_until_any_vector +#define shmem_ushort_wait_until_any_vector pshmem_ushort_wait_until_any_vector +#define shmem_int_wait_until_any_vector pshmem_int_wait_until_any_vector +#define shmem_long_wait_until_any_vector pshmem_long_wait_until_any_vector +#define shmem_longlong_wait_until_any_vector pshmem_longlong_wait_until_any_vector +#define shmem_uint_wait_until_any_vector pshmem_uint_wait_until_any_vector +#define shmem_ulong_wait_until_any_vector pshmem_ulong_wait_until_any_vector +#define shmem_ulonglong_wait_until_any_vector pshmem_ulonglong_wait_until_any_vector +#define shmem_int32_wait_until_any_vector pshmem_int32_wait_until_any_vector +#define shmem_int64_wait_until_any_vector pshmem_int64_wait_until_any_vector +#define shmem_uint32_wait_until_any_vector pshmem_uint32_wait_until_any_vector +#define shmem_uint64_wait_until_any_vector pshmem_uint64_wait_until_any_vector +#define shmem_size_wait_until_any_vector pshmem_size_wait_until_any_vector +#define shmem_ptrdiff_wait_until_any_vector pshmem_ptrdiff_wait_until_any_vector + + +#define shmem_short_wait_until_some_vector pshmem_short_wait_until_some_vector +#define shmem_ushort_wait_until_some_vector pshmem_ushort_wait_until_some_vector +#define shmem_int_wait_until_some_vector pshmem_int_wait_until_some_vector +#define shmem_long_wait_until_some_vector pshmem_long_wait_until_some_vector +#define shmem_longlong_wait_until_some_vector pshmem_longlong_wait_until_some_vector +#define shmem_uint_wait_until_some_vector pshmem_uint_wait_until_some_vector +#define shmem_ulong_wait_until_some_vector pshmem_ulong_wait_until_some_vector +#define shmem_ulonglong_wait_until_some_vector pshmem_ulonglong_wait_until_some_vector +#define shmem_int32_wait_until_some_vector pshmem_int32_wait_until_some_vector +#define shmem_int64_wait_until_some_vector pshmem_int64_wait_until_some_vector +#define shmem_uint32_wait_until_some_vector pshmem_uint32_wait_until_some_vector +#define shmem_uint64_wait_until_some_vector pshmem_uint64_wait_until_some_vector +#define shmem_size_wait_until_some_vector pshmem_size_wait_until_some_vector +#define shmem_ptrdiff_wait_until_some_vector pshmem_ptrdiff_wait_until_some_vector + + +#define shmem_short_test pshmem_short_test +#define shmem_int_test pshmem_int_test +#define shmem_long_test pshmem_long_test +#define shmem_longlong_test pshmem_longlong_test +#define shmem_ushort_test pshmem_ushort_test +#define shmem_uint_test pshmem_uint_test +#define shmem_ulong_test pshmem_ulong_test +#define shmem_ulonglong_test pshmem_ulonglong_test +#define shmem_int32_test pshmem_int32_test +#define shmem_int64_test pshmem_int64_test +#define shmem_uint32_test pshmem_uint32_test +#define shmem_uint64_test pshmem_uint64_test +#define shmem_size_test pshmem_size_test +#define shmem_ptrdiff_test pshmem_ptrdiff_test + + +#define shmem_short_test_all pshmem_short_test_all +#define shmem_ushort_test_all pshmem_ushort_test_all +#define shmem_int_test_all pshmem_int_test_all +#define shmem_long_test_all pshmem_long_test_all +#define shmem_longlong_test_all pshmem_longlong_test_all +#define shmem_uint_test_all pshmem_uint_test_all +#define shmem_ulong_test_all pshmem_ulong_test_all +#define shmem_ulonglong_test_all pshmem_ulonglong_test_all +#define shmem_int32_test_all pshmem_int32_test_all +#define shmem_int64_test_all pshmem_int64_test_all +#define shmem_uint32_test_all pshmem_uint32_test_all +#define shmem_uint64_test_all pshmem_uint64_test_all +#define shmem_size_test_all pshmem_size_test_all +#define shmem_ptrdiff_test_all pshmem_ptrdiff_test_all + + +#define shmem_short_test_any pshmem_short_test_any +#define shmem_ushort_test_any pshmem_ushort_test_any +#define shmem_int_test_any pshmem_int_test_any +#define shmem_long_test_any pshmem_long_test_any +#define shmem_longlong_test_any pshmem_longlong_test_any +#define shmem_uint_test_any pshmem_uint_test_any +#define shmem_ulong_test_any pshmem_ulong_test_any +#define shmem_ulonglong_test_any pshmem_ulonglong_test_any +#define shmem_int32_test_any pshmem_int32_test_any +#define shmem_int64_test_any pshmem_int64_test_any +#define shmem_uint32_test_any pshmem_uint32_test_any +#define shmem_uint64_test_any pshmem_uint64_test_any +#define shmem_size_test_any pshmem_size_test_any +#define shmem_ptrdiff_test_any pshmem_ptrdiff_test_any + + + +#define shmem_short_test_some pshmem_short_test_some +#define shmem_ushort_test_some pshmem_ushort_test_some +#define shmem_int_test_some pshmem_int_test_some +#define shmem_long_test_some pshmem_long_test_some +#define shmem_longlong_test_some pshmem_longlong_test_some +#define shmem_uint_test_some pshmem_uint_test_some +#define shmem_ulong_test_some pshmem_ulong_test_some +#define shmem_ulonglong_test_some pshmem_ulonglong_test_some +#define shmem_int32_test_some pshmem_int32_test_some +#define shmem_int64_test_some pshmem_int64_test_some +#define shmem_uint32_test_some pshmem_uint32_test_some +#define shmem_uint64_test_some pshmem_uint64_test_some +#define shmem_size_test_some pshmem_size_test_some +#define shmem_ptrdiff_test_some pshmem_ptrdiff_test_some + + +#define shmem_short_test_all_vector pshmem_short_test_all_vector +#define shmem_ushort_test_all_vector pshmem_ushort_test_all_vector +#define shmem_int_test_all_vector pshmem_int_test_all_vector +#define shmem_long_test_all_vector pshmem_long_test_all_vector +#define shmem_longlong_test_all_vector pshmem_longlong_test_all_vector +#define shmem_uint_test_all_vector pshmem_uint_test_all_vector +#define shmem_ulong_test_all_vector pshmem_ulong_test_all_vector +#define shmem_ulonglong_test_all_vector pshmem_ulonglong_test_all_vector +#define shmem_int32_test_all_vector pshmem_int32_test_all_vector +#define shmem_int64_test_all_vector pshmem_int64_test_all_vector +#define shmem_uint32_test_all_vector pshmem_uint32_test_all_vector +#define shmem_uint64_test_all_vector pshmem_uint64_test_all_vector +#define shmem_size_test_all_vector pshmem_size_test_all_vector +#define shmem_ptrdiff_test_all_vector pshmem_ptrdiff_test_all_vector + + +#define shmem_short_test_any_vector pshmem_short_test_any_vector +#define shmem_ushort_test_any_vector pshmem_ushort_test_any_vector +#define shmem_int_test_any_vector pshmem_int_test_any_vector +#define shmem_long_test_any_vector pshmem_long_test_any_vector +#define shmem_longlong_test_any_vector pshmem_longlong_test_any_vector +#define shmem_uint_test_any_vector pshmem_uint_test_any_vector +#define shmem_ulong_test_any_vector pshmem_ulong_test_any_vector +#define shmem_ulonglong_test_any_vector pshmem_ulonglong_test_any_vector +#define shmem_int32_test_any_vector pshmem_int32_test_any_vector +#define shmem_int64_test_any_vector pshmem_int64_test_any_vector +#define shmem_uint32_test_any_vector pshmem_uint32_test_any_vector +#define shmem_uint64_test_any_vector pshmem_uint64_test_any_vector +#define shmem_size_test_any_vector pshmem_size_test_any_vector +#define shmem_ptrdiff_test_any_vector pshmem_ptrdiff_test_any_vector + + +#define shmem_short_test_some_vector pshmem_short_test_some_vector +#define shmem_ushort_test_some_vector pshmem_ushort_test_some_vector +#define shmem_int_test_some_vector pshmem_int_test_some_vector +#define shmem_long_test_some_vector pshmem_long_test_some_vector +#define shmem_longlong_test_some_vector pshmem_longlong_test_some_vector +#define shmem_uint_test_some_vector pshmem_uint_test_some_vector +#define shmem_ulong_test_some_vector pshmem_ulong_test_some_vector +#define shmem_ulonglong_test_some_vector pshmem_ulonglong_test_some_vector +#define shmem_int32_test_some_vector pshmem_int32_test_some_vector +#define shmem_int64_test_some_vector pshmem_int64_test_some_vector +#define shmem_uint32_test_some_vector pshmem_uint32_test_some_vector +#define shmem_uint64_test_some_vector pshmem_uint64_test_some_vector +#define shmem_size_test_some_vector pshmem_size_test_some_vector +#define shmem_ptrdiff_test_some_vector pshmem_ptrdiff_test_some_vector + +/* + * Barrier sync routines + */ +#define shmem_barrier pshmem_barrier +#define shmem_barrier_all pshmem_barrier_all +#define shmem_sync_all pshmem_sync_all +#define shmem_sync_deprecated pshmem_sync_deprecated +#define shmem_fence pshmem_fence +#define shmem_ctx_fence pshmem_ctx_fence +#define shmem_quiet pshmem_quiet +#define shmem_ctx_quiet pshmem_ctx_quiet + +/* + * Collective routines + */ +#define shmem_broadcast32 pshmem_broadcast32 +#define shmem_broadcast64 pshmem_broadcast64 +#define shmem_collect32 pshmem_collect32 +#define shmem_collect64 pshmem_collect64 +#define shmem_fcollect32 pshmem_fcollect32 +#define shmem_fcollect64 pshmem_fcollect64 + +/* + * Reduction routines + */ +#define shmem_short_and_to_all pshmem_short_and_to_all +#define shmem_int_and_to_all pshmem_int_and_to_all +#define shmem_long_and_to_all pshmem_long_and_to_all +#define shmem_longlong_and_to_all pshmem_longlong_and_to_all +#define shmemx_int16_and_to_all pshmemx_int16_and_to_all +#define shmemx_int32_and_to_all pshmemx_int32_and_to_all +#define shmemx_int64_and_to_all pshmemx_int64_and_to_all + +#define shmem_short_or_to_all pshmem_short_or_to_all +#define shmem_int_or_to_all pshmem_int_or_to_all +#define shmem_long_or_to_all pshmem_long_or_to_all +#define shmem_longlong_or_to_all pshmem_longlong_or_to_all +#define shmemx_int16_or_to_all pshmemx_int16_or_to_all +#define shmemx_int32_or_to_all pshmemx_int32_or_to_all +#define shmemx_int64_or_to_all pshmemx_int64_or_to_all + +#define shmem_short_xor_to_all pshmem_short_xor_to_all +#define shmem_int_xor_to_all pshmem_int_xor_to_all +#define shmem_long_xor_to_all pshmem_long_xor_to_all +#define shmem_longlong_xor_to_all pshmem_longlong_xor_to_all +#define shmemx_int16_xor_to_all pshmemx_int16_xor_to_all +#define shmemx_int32_xor_to_all pshmemx_int32_xor_to_all +#define shmemx_int64_xor_to_all pshmemx_int64_xor_to_all + +#define shmem_short_max_to_all pshmem_short_max_to_all +#define shmem_int_max_to_all pshmem_int_max_to_all +#define shmem_long_max_to_all pshmem_long_max_to_all +#define shmem_longlong_max_to_all pshmem_longlong_max_to_all +#define shmem_float_max_to_all pshmem_float_max_to_all +#define shmem_double_max_to_all pshmem_double_max_to_all +#define shmem_longdouble_max_to_all pshmem_longdouble_max_to_all +#define shmemx_int16_max_to_all pshmemx_int16_max_to_all +#define shmemx_int32_max_to_all pshmemx_int32_max_to_all +#define shmemx_int64_max_to_all pshmemx_int64_max_to_all + +#define shmem_short_min_to_all pshmem_short_min_to_all +#define shmem_int_min_to_all pshmem_int_min_to_all +#define shmem_long_min_to_all pshmem_long_min_to_all +#define shmem_longlong_min_to_all pshmem_longlong_min_to_all +#define shmem_float_min_to_all pshmem_float_min_to_all +#define shmem_double_min_to_all pshmem_double_min_to_all +#define shmem_longdouble_min_to_all pshmem_longdouble_min_to_all +#define shmemx_int16_min_to_all pshmemx_int16_min_to_all +#define shmemx_int32_min_to_all pshmemx_int32_min_to_all +#define shmemx_int64_min_to_all pshmemx_int64_min_to_all + +#define shmem_short_sum_to_all pshmem_short_sum_to_all +#define shmem_int_sum_to_all pshmem_int_sum_to_all +#define shmem_long_sum_to_all pshmem_long_sum_to_all +#define shmem_longlong_sum_to_all pshmem_longlong_sum_to_all +#define shmem_float_sum_to_all pshmem_float_sum_to_all +#define shmem_double_sum_to_all pshmem_double_sum_to_all +#define shmem_longdouble_sum_to_all pshmem_longdouble_sum_to_all +#define shmem_complexf_sum_to_all pshmem_complexf_sum_to_all +#define shmem_complexd_sum_to_all pshmem_complexd_sum_to_all +#define shmemx_int16_sum_to_all pshmemx_int16_sum_to_all +#define shmemx_int32_sum_to_all pshmemx_int32_sum_to_all +#define shmemx_int64_sum_to_all pshmemx_int64_sum_to_all + +#define shmem_short_prod_to_all pshmem_short_prod_to_all +#define shmem_int_prod_to_all pshmem_int_prod_to_all +#define shmem_long_prod_to_all pshmem_long_prod_to_all +#define shmem_longlong_prod_to_all pshmem_longlong_prod_to_all +#define shmem_float_prod_to_all pshmem_float_prod_to_all +#define shmem_double_prod_to_all pshmem_double_prod_to_all +#define shmem_longdouble_prod_to_all pshmem_longdouble_prod_to_all +#define shmem_complexf_prod_to_all pshmem_complexf_prod_to_all +#define shmem_complexd_prod_to_all pshmem_complexd_prod_to_all +#define shmemx_int16_prod_to_all pshmemx_int16_prod_to_all +#define shmemx_int32_prod_to_all pshmemx_int32_prod_to_all +#define shmemx_int64_prod_to_all pshmemx_int64_prod_to_all + +/* + * Alltoall routines + */ +#define shmem_alltoall32 pshmem_alltoall32 +#define shmem_alltoall64 pshmem_alltoall64 +#define shmem_alltoalls32 pshmem_alltoalls32 +#define shmem_alltoalls64 pshmem_alltoalls64 + +/* + * Platform specific cache management routines + */ +#define shmem_udcflush pshmem_udcflush +#define shmem_udcflush_line pshmem_udcflush_line +#define shmem_set_cache_inv pshmem_set_cache_inv +#define shmem_set_cache_line_inv pshmem_set_cache_line_inv +#define shmem_clear_cache_inv pshmem_clear_cache_inv +#define shmem_clear_cache_line_inv pshmem_clear_cache_line_inv + +#endif /* OSHMEM_C_PROFILE_DEFINES_H */ diff --git a/oshmem/shmem/c/profile/Makefile.am b/oshmem/shmem/c/profile/Makefile.am deleted file mode 100644 index 717d9fbf2ef..00000000000 --- a/oshmem/shmem/c/profile/Makefile.am +++ /dev/null @@ -1,116 +0,0 @@ -# -# Copyright (c) 2013-2016 Mellanox Technologies, Inc. -# All rights reserved -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -# -# OSHMEM_PROFILING flag is enabled when we want our shmem_* symbols -# to be replaced by pshmem_*. In other words, this flag decides -# whether "profile/defines.h" is included or not. "profile/defines.h" -# replaces all shmem_* symbols with pshmem_* symbols. In this directory -# we definately need it to be 1. -# -AM_CPPFLAGS = -DOSHMEM_PROFILING=1 - -noinst_LTLIBRARIES = -if PROJECT_OSHMEM -# Only build if we're building OSHMEM -noinst_LTLIBRARIES += liboshmem_c_pshmem.la -endif - -headers = defines.h - - -OSHMEM_API_SOURCES = \ - pshmem_init.c \ - pshmem_finalize.c \ - pshmem_free.c \ - pshmem_alloc.c \ - pshmem_realloc.c \ - pshmem_align.c \ - pshmem_query.c \ - pshmem_p.c \ - pshmem_context.c \ - pshmem_put.c \ - pshmem_g.c \ - pshmem_get.c \ - pshmem_alltoall.c \ - pshmem_broadcast.c \ - pshmem_collect.c \ - pshmem_ptr.c \ - pshmem_pe_accessible.c \ - pshmem_addr_accessible.c \ - pshmem_barrier.c \ - pshmem_sync.c \ - pshmem_fence.c \ - pshmem_quiet.c \ - pshmem_wait.c \ - pshmem_iget.c \ - pshmem_iput.c \ - pshmem_get_nb.c \ - pshmem_put_nb.c \ - pshmem_udcflush.c \ - pshmem_udcflush_line.c \ - pshmem_set_cache_inv.c \ - pshmem_set_cache_line_inv.c \ - pshmem_clear_cache_inv.c \ - pshmem_clear_cache_line_inv.c \ - pshmem_reduce.c \ - pshmem_swap.c \ - pshmem_set.c \ - pshmem_cswap.c \ - pshmem_fadd.c \ - pshmem_fand.c \ - pshmem_for.c \ - pshmem_fxor.c \ - pshmem_fetch.c \ - pshmem_finc.c \ - pshmem_add.c \ - pshmem_and.c \ - pshmem_or.c \ - pshmem_xor.c \ - pshmem_inc.c \ - pshmem_clear_lock.c \ - pshmem_set_lock.c \ - pshmem_test_lock.c \ - pshmem_global_exit.c \ - pshmem_info.c - -nodist_liboshmem_c_pshmem_la_SOURCES = \ - $(OSHMEM_API_SOURCES) - -# -# Sym link in the sources from the real OSHMEM directory -# -$(nodist_liboshmem_c_pshmem_la_SOURCES): - $(OMPI_V_LN_S) if test ! -r $@ ; then \ - pname=`echo $@ | cut -b '2-'` ; \ - $(LN_S) $(top_srcdir)/oshmem/shmem/c/$$pname $@ ; \ - fi - -if PROJECT_OSHMEM -if WANT_INSTALL_HEADERS -oshmemdir = $(oshmemincludedir)/$(subdir) -oshmem_HEADERS = $(headers) -endif -endif - -# These files were created by targets above - -MAINTAINERCLEANFILES = $(nodist_liboshmem_c_pshmem_la_SOURCES) - -# Don't want these targets in here - -tags-recursive: -tags: -TAGS: -GTAGS: -ID: diff --git a/oshmem/shmem/c/profile/defines.h b/oshmem/shmem/c/profile/defines.h deleted file mode 100644 index fa30d783778..00000000000 --- a/oshmem/shmem/c/profile/defines.h +++ /dev/null @@ -1,1120 +0,0 @@ -/* - * Copyright (c) 2013-2017 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OSHMEM_C_PROFILE_DEFINES_H -#define OSHMEM_C_PROFILE_DEFINES_H -/* - * This file is included in the top directory only if - * profiling is required. Once profiling is required, - * this file will replace all shmem_* symbols with - * pshmem_* symbols - */ - -/* - * Initialization routines - */ -#define shmem_init pshmem_init -#define shmem_init_thread pshmem_init_thread -#define start_pes pstart_pes /* shmem-compat.h */ - -/* - * Finalization routines - */ -#define shmem_finalize pshmem_finalize -#define shmem_global_exit pshmem_global_exit - -/* - * Query routines - */ -#define shmem_n_pes pshmem_n_pes -#define shmem_query_thread pshmem_query_thread -#define shmem_my_pe pshmem_my_pe -#define _num_pes p_num_pes /* shmem-compat.h */ -#define _my_pe p_my_pe /* shmem-compat.h */ - -/* - * Accessability routines - */ -#define shmem_pe_accessible pshmem_pe_accessible -#define shmem_addr_accessible pshmem_addr_accessible - -/* - * Symmetric heap routines - */ -#define shmem_malloc pshmem_malloc -#define shmem_calloc pshmem_calloc -#define shmem_align pshmem_align -#define shmem_realloc pshmem_realloc -#define shmem_free pshmem_free -#define shmalloc pshmalloc /* shmem-compat.h */ -#define shmemalign pshmemalign /* shmem-compat.h */ -#define shrealloc pshrealloc /* shmem-compat.h */ -#define shfree pshfree /* shmem-compat.h */ - -#define shmemx_malloc_with_hint pshmemx_malloc_with_hint - -/* - * Remote pointer operations - */ -#define shmem_ptr pshmem_ptr - -/* - * Communication context operations - */ -#define shmem_ctx_create pshmem_ctx_create -#define shmem_ctx_destroy pshmem_ctx_destroy - -/* - * Elemental put routines - */ -#define shmem_ctx_char_p pshmem_ctx_char_p -#define shmem_ctx_short_p pshmem_ctx_short_p -#define shmem_ctx_int_p pshmem_ctx_int_p -#define shmem_ctx_long_p pshmem_ctx_long_p -#define shmem_ctx_float_p pshmem_ctx_float_p -#define shmem_ctx_double_p pshmem_ctx_double_p -#define shmem_ctx_longlong_p pshmem_ctx_longlong_p -#define shmem_ctx_schar_p pshmem_ctx_schar_p -#define shmem_ctx_uchar_p pshmem_ctx_uchar_p -#define shmem_ctx_ushort_p pshmem_ctx_ushort_p -#define shmem_ctx_uint_p pshmem_ctx_uint_p -#define shmem_ctx_ulong_p pshmem_ctx_ulong_p -#define shmem_ctx_ulonglong_p pshmem_ctx_ulonglong_p -#define shmem_ctx_longdouble_p pshmem_ctx_longdouble_p -#define shmem_ctx_int8_p pshmem_ctx_int8_p -#define shmem_ctx_int16_p pshmem_ctx_int16_p -#define shmem_ctx_int32_p pshmem_ctx_int32_p -#define shmem_ctx_int64_p pshmem_ctx_int64_p -#define shmem_ctx_uint8_p pshmem_ctx_uint8_p -#define shmem_ctx_uint16_p pshmem_ctx_uint16_p -#define shmem_ctx_uint32_p pshmem_ctx_uint32_p -#define shmem_ctx_uint64_p pshmem_ctx_uint64_p -#define shmem_ctx_size_p pshmem_ctx_size_p -#define shmem_ctx_ptrdiff_p pshmem_ctx_ptrdiff_p - -#define shmem_char_p pshmem_char_p -#define shmem_short_p pshmem_short_p -#define shmem_int_p pshmem_int_p -#define shmem_long_p pshmem_long_p -#define shmem_float_p pshmem_float_p -#define shmem_double_p pshmem_double_p -#define shmem_longlong_p pshmem_longlong_p -#define shmem_schar_p pshmem_schar_p -#define shmem_uchar_p pshmem_uchar_p -#define shmem_ushort_p pshmem_ushort_p -#define shmem_uint_p pshmem_uint_p -#define shmem_ulong_p pshmem_ulong_p -#define shmem_ulonglong_p pshmem_ulonglong_p -#define shmem_longdouble_p pshmem_longdouble_p -#define shmem_int8_p pshmem_int8_p -#define shmem_int16_p pshmem_int16_p -#define shmem_int32_p pshmem_int32_p -#define shmem_int64_p pshmem_int64_p -#define shmem_uint8_p pshmem_uint8_p -#define shmem_uint16_p pshmem_uint16_p -#define shmem_uint32_p pshmem_uint32_p -#define shmem_uint64_p pshmem_uint64_p -#define shmem_size_p pshmem_size_p -#define shmem_ptrdiff_p pshmem_ptrdiff_p - -#define shmemx_int16_p pshmemx_int16_p -#define shmemx_int32_p pshmemx_int32_p -#define shmemx_int64_p pshmemx_int64_p - -/* - * Block data put routines - */ -#define shmem_ctx_char_put pshmem_ctx_char_put -#define shmem_ctx_short_put pshmem_ctx_short_put -#define shmem_ctx_int_put pshmem_ctx_int_put -#define shmem_ctx_long_put pshmem_ctx_long_put -#define shmem_ctx_float_put pshmem_ctx_float_put -#define shmem_ctx_double_put pshmem_ctx_double_put -#define shmem_ctx_longlong_put pshmem_ctx_longlong_put -#define shmem_ctx_schar_put pshmem_ctx_schar_put -#define shmem_ctx_uchar_put pshmem_ctx_uchar_put -#define shmem_ctx_ushort_put pshmem_ctx_ushort_put -#define shmem_ctx_uint_put pshmem_ctx_uint_put -#define shmem_ctx_ulong_put pshmem_ctx_ulong_put -#define shmem_ctx_ulonglong_put pshmem_ctx_ulonglong_put -#define shmem_ctx_longdouble_put pshmem_ctx_longdouble_put -#define shmem_ctx_int8_put pshmem_ctx_int8_put -#define shmem_ctx_int16_put pshmem_ctx_int16_put -#define shmem_ctx_int32_put pshmem_ctx_int32_put -#define shmem_ctx_int64_put pshmem_ctx_int64_put -#define shmem_ctx_uint8_put pshmem_ctx_uint8_put -#define shmem_ctx_uint16_put pshmem_ctx_uint16_put -#define shmem_ctx_uint32_put pshmem_ctx_uint32_put -#define shmem_ctx_uint64_put pshmem_ctx_uint64_put -#define shmem_ctx_size_put pshmem_ctx_size_put -#define shmem_ctx_ptrdiff_put pshmem_ctx_ptrdiff_put - -#define shmem_char_put pshmem_char_put /* shmem-compat.h */ -#define shmem_short_put pshmem_short_put -#define shmem_int_put pshmem_int_put -#define shmem_long_put pshmem_long_put -#define shmem_float_put pshmem_float_put -#define shmem_double_put pshmem_double_put -#define shmem_longlong_put pshmem_longlong_put -#define shmem_schar_put pshmem_schar_put -#define shmem_uchar_put pshmem_uchar_put -#define shmem_ushort_put pshmem_ushort_put -#define shmem_uint_put pshmem_uint_put -#define shmem_ulong_put pshmem_ulong_put -#define shmem_ulonglong_put pshmem_ulonglong_put -#define shmem_longdouble_put pshmem_longdouble_put -#define shmem_int8_put pshmem_int8_put -#define shmem_int16_put pshmem_int16_put -#define shmem_int32_put pshmem_int32_put -#define shmem_int64_put pshmem_int64_put -#define shmem_uint8_put pshmem_uint8_put -#define shmem_uint16_put pshmem_uint16_put -#define shmem_uint32_put pshmem_uint32_put -#define shmem_uint64_put pshmem_uint64_put -#define shmem_size_put pshmem_size_put -#define shmem_ptrdiff_put pshmem_ptrdiff_put - -#define shmem_ctx_put8 pshmem_ctx_put8 -#define shmem_ctx_put16 pshmem_ctx_put16 -#define shmem_ctx_put32 pshmem_ctx_put32 -#define shmem_ctx_put64 pshmem_ctx_put64 -#define shmem_ctx_put128 pshmem_ctx_put128 -#define shmem_ctx_putmem pshmem_ctx_putmem - -#define shmem_put8 pshmem_put8 -#define shmem_put16 pshmem_put16 -#define shmem_put32 pshmem_put32 -#define shmem_put64 pshmem_put64 -#define shmem_put128 pshmem_put128 -#define shmem_putmem pshmem_putmem - -/* - * Strided put routines - */ -#define shmem_ctx_char_iput pshmem_ctx_char_iput -#define shmem_ctx_short_iput pshmem_ctx_short_iput -#define shmem_ctx_int_iput pshmem_ctx_int_iput -#define shmem_ctx_long_iput pshmem_ctx_long_iput -#define shmem_ctx_float_iput pshmem_ctx_float_iput -#define shmem_ctx_double_iput pshmem_ctx_double_iput -#define shmem_ctx_longlong_iput pshmem_ctx_longlong_iput -#define shmem_ctx_schar_iput pshmem_ctx_schar_iput -#define shmem_ctx_uchar_iput pshmem_ctx_uchar_iput -#define shmem_ctx_ushort_iput pshmem_ctx_ushort_iput -#define shmem_ctx_uint_iput pshmem_ctx_uint_iput -#define shmem_ctx_ulong_iput pshmem_ctx_ulong_iput -#define shmem_ctx_ulonglong_iput pshmem_ctx_ulonglong_iput -#define shmem_ctx_longdouble_iput pshmem_ctx_longdouble_iput -#define shmem_ctx_int8_iput pshmem_ctx_int8_iput -#define shmem_ctx_int16_iput pshmem_ctx_int16_iput -#define shmem_ctx_int32_iput pshmem_ctx_int32_iput -#define shmem_ctx_int64_iput pshmem_ctx_int64_iput -#define shmem_ctx_uint8_iput pshmem_ctx_uint8_iput -#define shmem_ctx_uint16_iput pshmem_ctx_uint16_iput -#define shmem_ctx_uint32_iput pshmem_ctx_uint32_iput -#define shmem_ctx_uint64_iput pshmem_ctx_uint64_iput -#define shmem_ctx_size_iput pshmem_ctx_size_iput -#define shmem_ctx_ptrdiff_iput pshmem_ctx_ptrdiff_iput - -#define shmem_char_iput pshmem_char_iput -#define shmem_short_iput pshmem_short_iput -#define shmem_int_iput pshmem_int_iput -#define shmem_long_iput pshmem_long_iput -#define shmem_float_iput pshmem_float_iput -#define shmem_double_iput pshmem_double_iput -#define shmem_longlong_iput pshmem_longlong_iput -#define shmem_schar_iput pshmem_schar_iput -#define shmem_uchar_iput pshmem_uchar_iput -#define shmem_ushort_iput pshmem_ushort_iput -#define shmem_uint_iput pshmem_uint_iput -#define shmem_ulong_iput pshmem_ulong_iput -#define shmem_ulonglong_iput pshmem_ulonglong_iput -#define shmem_longdouble_iput pshmem_longdouble_iput -#define shmem_int8_iput pshmem_int8_iput -#define shmem_int16_iput pshmem_int16_iput -#define shmem_int32_iput pshmem_int32_iput -#define shmem_int64_iput pshmem_int64_iput -#define shmem_uint8_iput pshmem_uint8_iput -#define shmem_uint16_iput pshmem_uint16_iput -#define shmem_uint32_iput pshmem_uint32_iput -#define shmem_uint64_iput pshmem_uint64_iput -#define shmem_size_iput pshmem_size_iput -#define shmem_ptrdiff_iput pshmem_ptrdiff_iput - -#define shmem_ctx_iput8 pshmem_ctx_iput8 -#define shmem_ctx_iput16 pshmem_ctx_iput16 -#define shmem_ctx_iput32 pshmem_ctx_iput32 -#define shmem_ctx_iput64 pshmem_ctx_iput64 -#define shmem_ctx_iput128 pshmem_ctx_iput128 - -#define shmem_iput8 pshmem_iput8 -#define shmem_iput16 pshmem_iput16 -#define shmem_iput32 pshmem_iput32 -#define shmem_iput64 pshmem_iput64 -#define shmem_iput128 pshmem_iput128 - -/* - * Non-block data put routines - */ -#define shmem_ctx_char_put_nbi pshmem_ctx_char_put_nbi -#define shmem_ctx_short_put_nbi pshmem_ctx_short_put_nbi -#define shmem_ctx_int_put_nbi pshmem_ctx_int_put_nbi -#define shmem_ctx_long_put_nbi pshmem_ctx_long_put_nbi -#define shmem_ctx_float_put_nbi pshmem_ctx_float_put_nbi -#define shmem_ctx_double_put_nbi pshmem_ctx_double_put_nbi -#define shmem_ctx_longlong_put_nbi pshmem_ctx_longlong_put_nbi -#define shmem_ctx_schar_put_nbi pshmem_ctx_schar_put_nbi -#define shmem_ctx_uchar_put_nbi pshmem_ctx_uchar_put_nbi -#define shmem_ctx_ushort_put_nbi pshmem_ctx_ushort_put_nbi -#define shmem_ctx_uint_put_nbi pshmem_ctx_uint_put_nbi -#define shmem_ctx_ulong_put_nbi pshmem_ctx_ulong_put_nbi -#define shmem_ctx_ulonglong_put_nbi pshmem_ctx_ulonglong_put_nbi -#define shmem_ctx_longdouble_put_nbi pshmem_ctx_longdouble_put_nbi -#define shmem_ctx_int8_put_nbi pshmem_ctx_int8_put_nbi -#define shmem_ctx_int16_put_nbi pshmem_ctx_int16_put_nbi -#define shmem_ctx_int32_put_nbi pshmem_ctx_int32_put_nbi -#define shmem_ctx_int64_put_nbi pshmem_ctx_int64_put_nbi -#define shmem_ctx_uint8_put_nbi pshmem_ctx_uint8_put_nbi -#define shmem_ctx_uint16_put_nbi pshmem_ctx_uint16_put_nbi -#define shmem_ctx_uint32_put_nbi pshmem_ctx_uint32_put_nbi -#define shmem_ctx_uint64_put_nbi pshmem_ctx_uint64_put_nbi -#define shmem_ctx_size_put_nbi pshmem_ctx_size_put_nbi -#define shmem_ctx_ptrdiff_put_nbi pshmem_ctx_ptrdiff_put_nbi - -#define shmem_char_put_nbi pshmem_char_put_nbi -#define shmem_short_put_nbi pshmem_short_put_nbi -#define shmem_int_put_nbi pshmem_int_put_nbi -#define shmem_long_put_nbi pshmem_long_put_nbi -#define shmem_float_put_nbi pshmem_float_put_nbi -#define shmem_double_put_nbi pshmem_double_put_nbi -#define shmem_longlong_put_nbi pshmem_longlong_put_nbi -#define shmem_schar_put_nbi pshmem_schar_put_nbi -#define shmem_uchar_put_nbi pshmem_uchar_put_nbi -#define shmem_ushort_put_nbi pshmem_ushort_put_nbi -#define shmem_uint_put_nbi pshmem_uint_put_nbi -#define shmem_ulong_put_nbi pshmem_ulong_put_nbi -#define shmem_ulonglong_put_nbi pshmem_ulonglong_put_nbi -#define shmem_longdouble_put_nbi pshmem_longdouble_put_nbi -#define shmem_int8_put_nbi pshmem_int8_put_nbi -#define shmem_int16_put_nbi pshmem_int16_put_nbi -#define shmem_int32_put_nbi pshmem_int32_put_nbi -#define shmem_int64_put_nbi pshmem_int64_put_nbi -#define shmem_uint8_put_nbi pshmem_uint8_put_nbi -#define shmem_uint16_put_nbi pshmem_uint16_put_nbi -#define shmem_uint32_put_nbi pshmem_uint32_put_nbi -#define shmem_uint64_put_nbi pshmem_uint64_put_nbi -#define shmem_size_put_nbi pshmem_size_put_nbi -#define shmem_ptrdiff_put_nbi pshmem_ptrdiff_put_nbi - -#define shmem_ctx_put8_nbi pshmem_ctx_put8_nbi -#define shmem_ctx_put16_nbi pshmem_ctx_put16_nbi -#define shmem_ctx_put32_nbi pshmem_ctx_put32_nbi -#define shmem_ctx_put64_nbi pshmem_ctx_put64_nbi -#define shmem_ctx_put128_nbi pshmem_ctx_put128_nbi -#define shmem_ctx_putmem_nbi pshmem_ctx_putmem_nbi - -#define shmem_put8_nbi pshmem_put8_nbi -#define shmem_put16_nbi pshmem_put16_nbi -#define shmem_put32_nbi pshmem_put32_nbi -#define shmem_put64_nbi pshmem_put64_nbi -#define shmem_put128_nbi pshmem_put128_nbi -#define shmem_putmem_nbi pshmem_putmem_nbi - -/* - * Elemental get routines - */ -#define shmem_ctx_char_g pshmem_ctx_char_g -#define shmem_ctx_short_g pshmem_ctx_short_g -#define shmem_ctx_int_g pshmem_ctx_int_g -#define shmem_ctx_long_g pshmem_ctx_long_g -#define shmem_ctx_float_g pshmem_ctx_float_g -#define shmem_ctx_double_g pshmem_ctx_double_g -#define shmem_ctx_longlong_g pshmem_ctx_longlong_g -#define shmem_ctx_schar_g pshmem_ctx_schar_g -#define shmem_ctx_uchar_g pshmem_ctx_uchar_g -#define shmem_ctx_ushort_g pshmem_ctx_ushort_g -#define shmem_ctx_uint_g pshmem_ctx_uint_g -#define shmem_ctx_ulong_g pshmem_ctx_ulong_g -#define shmem_ctx_ulonglong_g pshmem_ctx_ulonglong_g -#define shmem_ctx_longdouble_g pshmem_ctx_longdouble_g -#define shmem_ctx_int8_g pshmem_ctx_int8_g -#define shmem_ctx_int16_g pshmem_ctx_int16_g -#define shmem_ctx_int32_g pshmem_ctx_int32_g -#define shmem_ctx_int64_g pshmem_ctx_int64_g -#define shmem_ctx_uint8_g pshmem_ctx_uint8_g -#define shmem_ctx_uint16_g pshmem_ctx_uint16_g -#define shmem_ctx_uint32_g pshmem_ctx_uint32_g -#define shmem_ctx_uint64_g pshmem_ctx_uint64_g -#define shmem_ctx_size_g pshmem_ctx_size_g -#define shmem_ctx_ptrdiff_g pshmem_ctx_ptrdiff_g - -#define shmem_char_g pshmem_char_g -#define shmem_short_g pshmem_short_g -#define shmem_int_g pshmem_int_g -#define shmem_long_g pshmem_long_g -#define shmem_float_g pshmem_float_g -#define shmem_double_g pshmem_double_g -#define shmem_longlong_g pshmem_longlong_g -#define shmem_schar_g pshmem_schar_g -#define shmem_uchar_g pshmem_uchar_g -#define shmem_ushort_g pshmem_ushort_g -#define shmem_uint_g pshmem_uint_g -#define shmem_ulong_g pshmem_ulong_g -#define shmem_ulonglong_g pshmem_ulonglong_g -#define shmem_longdouble_g pshmem_longdouble_g -#define shmem_int8_g pshmem_int8_g -#define shmem_int16_g pshmem_int16_g -#define shmem_int32_g pshmem_int32_g -#define shmem_int64_g pshmem_int64_g -#define shmem_uint8_g pshmem_uint8_g -#define shmem_uint16_g pshmem_uint16_g -#define shmem_uint32_g pshmem_uint32_g -#define shmem_uint64_g pshmem_uint64_g -#define shmem_size_g pshmem_size_g -#define shmem_ptrdiff_g pshmem_ptrdiff_g - -#define shmemx_int16_g pshmemx_int16_g -#define shmemx_int32_g pshmemx_int32_g -#define shmemx_int64_g pshmemx_int64_g - -/* - * Block data get routines - */ -#define shmem_ctx_char_get pshmem_ctx_char_get -#define shmem_ctx_short_get pshmem_ctx_short_get -#define shmem_ctx_int_get pshmem_ctx_int_get -#define shmem_ctx_long_get pshmem_ctx_long_get -#define shmem_ctx_float_get pshmem_ctx_float_get -#define shmem_ctx_double_get pshmem_ctx_double_get -#define shmem_ctx_longlong_get pshmem_ctx_longlong_get -#define shmem_ctx_schar_get pshmem_ctx_schar_get -#define shmem_ctx_uchar_get pshmem_ctx_uchar_get -#define shmem_ctx_ushort_get pshmem_ctx_ushort_get -#define shmem_ctx_uint_get pshmem_ctx_uint_get -#define shmem_ctx_ulong_get pshmem_ctx_ulong_get -#define shmem_ctx_ulonglong_get pshmem_ctx_ulonglong_get -#define shmem_ctx_longdouble_get pshmem_ctx_longdouble_get -#define shmem_ctx_int8_get pshmem_ctx_int8_get -#define shmem_ctx_int16_get pshmem_ctx_int16_get -#define shmem_ctx_int32_get pshmem_ctx_int32_get -#define shmem_ctx_int64_get pshmem_ctx_int64_get -#define shmem_ctx_uint8_get pshmem_ctx_uint8_get -#define shmem_ctx_uint16_get pshmem_ctx_uint16_get -#define shmem_ctx_uint32_get pshmem_ctx_uint32_get -#define shmem_ctx_uint64_get pshmem_ctx_uint64_get -#define shmem_ctx_size_get pshmem_ctx_size_get -#define shmem_ctx_ptrdiff_get pshmem_ctx_ptrdiff_get - -#define shmem_char_get pshmem_char_get /* shmem-compat.h */ -#define shmem_short_get pshmem_short_get -#define shmem_int_get pshmem_int_get -#define shmem_long_get pshmem_long_get -#define shmem_float_get pshmem_float_get -#define shmem_double_get pshmem_double_get -#define shmem_longlong_get pshmem_longlong_get -#define shmem_schar_get pshmem_schar_get -#define shmem_uchar_get pshmem_uchar_get -#define shmem_ushort_get pshmem_ushort_get -#define shmem_uint_get pshmem_uint_get -#define shmem_ulong_get pshmem_ulong_get -#define shmem_ulonglong_get pshmem_ulonglong_get -#define shmem_longdouble_get pshmem_longdouble_get -#define shmem_int8_get pshmem_int8_get -#define shmem_int16_get pshmem_int16_get -#define shmem_int32_get pshmem_int32_get -#define shmem_int64_get pshmem_int64_get -#define shmem_uint8_get pshmem_uint8_get -#define shmem_uint16_get pshmem_uint16_get -#define shmem_uint32_get pshmem_uint32_get -#define shmem_uint64_get pshmem_uint64_get -#define shmem_size_get pshmem_size_get -#define shmem_ptrdiff_get pshmem_ptrdiff_get - -#define shmem_ctx_get8 pshmem_ctx_get8 -#define shmem_ctx_get16 pshmem_ctx_get16 -#define shmem_ctx_get32 pshmem_ctx_get32 -#define shmem_ctx_get64 pshmem_ctx_get64 -#define shmem_ctx_get128 pshmem_ctx_get128 -#define shmem_ctx_getmem pshmem_ctx_getmem - -#define shmem_get8 pshmem_get8 -#define shmem_get16 pshmem_get16 -#define shmem_get32 pshmem_get32 -#define shmem_get64 pshmem_get64 -#define shmem_get128 pshmem_get128 -#define shmem_getmem pshmem_getmem - -/* - * Strided get routines - */ -#define shmem_ctx_char_iget pshmem_ctx_char_iget -#define shmem_ctx_short_iget pshmem_ctx_short_iget -#define shmem_ctx_int_iget pshmem_ctx_int_iget -#define shmem_ctx_long_iget pshmem_ctx_long_iget -#define shmem_ctx_float_iget pshmem_ctx_float_iget -#define shmem_ctx_double_iget pshmem_ctx_double_iget -#define shmem_ctx_longlong_iget pshmem_ctx_longlong_iget -#define shmem_ctx_schar_iget pshmem_ctx_schar_iget -#define shmem_ctx_uchar_iget pshmem_ctx_uchar_iget -#define shmem_ctx_ushort_iget pshmem_ctx_ushort_iget -#define shmem_ctx_uint_iget pshmem_ctx_uint_iget -#define shmem_ctx_ulong_iget pshmem_ctx_ulong_iget -#define shmem_ctx_ulonglong_iget pshmem_ctx_ulonglong_iget -#define shmem_ctx_longdouble_iget pshmem_ctx_longdouble_iget -#define shmem_ctx_int8_iget pshmem_ctx_int8_iget -#define shmem_ctx_int16_iget pshmem_ctx_int16_iget -#define shmem_ctx_int32_iget pshmem_ctx_int32_iget -#define shmem_ctx_int64_iget pshmem_ctx_int64_iget -#define shmem_ctx_uint8_iget pshmem_ctx_uint8_iget -#define shmem_ctx_uint16_iget pshmem_ctx_uint16_iget -#define shmem_ctx_uint32_iget pshmem_ctx_uint32_iget -#define shmem_ctx_uint64_iget pshmem_ctx_uint64_iget -#define shmem_ctx_size_iget pshmem_ctx_size_iget -#define shmem_ctx_ptrdiff_iget pshmem_ctx_ptrdiff_iget - -#define shmem_char_iget pshmem_char_iget -#define shmem_short_iget pshmem_short_iget -#define shmem_int_iget pshmem_int_iget -#define shmem_long_iget pshmem_long_iget -#define shmem_float_iget pshmem_float_iget -#define shmem_double_iget pshmem_double_iget -#define shmem_longlong_iget pshmem_longlong_iget -#define shmem_schar_iget pshmem_schar_iget -#define shmem_uchar_iget pshmem_uchar_iget -#define shmem_ushort_iget pshmem_ushort_iget -#define shmem_uint_iget pshmem_uint_iget -#define shmem_ulong_iget pshmem_ulong_iget -#define shmem_ulonglong_iget pshmem_ulonglong_iget -#define shmem_longdouble_iget pshmem_longdouble_iget -#define shmem_int8_iget pshmem_int8_iget -#define shmem_int16_iget pshmem_int16_iget -#define shmem_int32_iget pshmem_int32_iget -#define shmem_int64_iget pshmem_int64_iget -#define shmem_uint8_iget pshmem_uint8_iget -#define shmem_uint16_iget pshmem_uint16_iget -#define shmem_uint32_iget pshmem_uint32_iget -#define shmem_uint64_iget pshmem_uint64_iget -#define shmem_size_iget pshmem_size_iget -#define shmem_ptrdiff_iget pshmem_ptrdiff_iget - -#define shmem_ctx_iget8 pshmem_ctx_iget8 -#define shmem_ctx_iget16 pshmem_ctx_iget16 -#define shmem_ctx_iget32 pshmem_ctx_iget32 -#define shmem_ctx_iget64 pshmem_ctx_iget64 -#define shmem_ctx_iget128 pshmem_ctx_iget128 - -#define shmem_iget8 pshmem_iget8 -#define shmem_iget16 pshmem_iget16 -#define shmem_iget32 pshmem_iget32 -#define shmem_iget64 pshmem_iget64 -#define shmem_iget128 pshmem_iget128 - -/* - * Non-block data get routines - */ -#define shmem_ctx_char_get_nbi pshmem_ctx_char_get_nbi -#define shmem_ctx_short_get_nbi pshmem_ctx_short_get_nbi -#define shmem_ctx_int_get_nbi pshmem_ctx_int_get_nbi -#define shmem_ctx_long_get_nbi pshmem_ctx_long_get_nbi -#define shmem_ctx_float_get_nbi pshmem_ctx_float_get_nbi -#define shmem_ctx_double_get_nbi pshmem_ctx_double_get_nbi -#define shmem_ctx_longlong_get_nbi pshmem_ctx_longlong_get_nbi -#define shmem_ctx_schar_get_nbi pshmem_ctx_schar_get_nbi -#define shmem_ctx_uchar_get_nbi pshmem_ctx_uchar_get_nbi -#define shmem_ctx_ushort_get_nbi pshmem_ctx_ushort_get_nbi -#define shmem_ctx_uint_get_nbi pshmem_ctx_uint_get_nbi -#define shmem_ctx_ulong_get_nbi pshmem_ctx_ulong_get_nbi -#define shmem_ctx_ulonglong_get_nbi pshmem_ctx_ulonglong_get_nbi -#define shmem_ctx_longdouble_get_nbi pshmem_ctx_longdouble_get_nbi -#define shmem_ctx_int8_get_nbi pshmem_ctx_int8_get_nbi -#define shmem_ctx_int16_get_nbi pshmem_ctx_int16_get_nbi -#define shmem_ctx_int32_get_nbi pshmem_ctx_int32_get_nbi -#define shmem_ctx_int64_get_nbi pshmem_ctx_int64_get_nbi -#define shmem_ctx_uint8_get_nbi pshmem_ctx_uint8_get_nbi -#define shmem_ctx_uint16_get_nbi pshmem_ctx_uint16_get_nbi -#define shmem_ctx_uint32_get_nbi pshmem_ctx_uint32_get_nbi -#define shmem_ctx_uint64_get_nbi pshmem_ctx_uint64_get_nbi -#define shmem_ctx_size_get_nbi pshmem_ctx_size_get_nbi -#define shmem_ctx_ptrdiff_get_nbi pshmem_ctx_ptrdiff_get_nbi - -#define shmem_char_get_nbi pshmem_char_get_nbi -#define shmem_short_get_nbi pshmem_short_get_nbi -#define shmem_int_get_nbi pshmem_int_get_nbi -#define shmem_long_get_nbi pshmem_long_get_nbi -#define shmem_float_get_nbi pshmem_float_get_nbi -#define shmem_double_get_nbi pshmem_double_get_nbi -#define shmem_longlong_get_nbi pshmem_longlong_get_nbi -#define shmem_schar_get_nbi pshmem_schar_get_nbi -#define shmem_uchar_get_nbi pshmem_uchar_get_nbi -#define shmem_ushort_get_nbi pshmem_ushort_get_nbi -#define shmem_uint_get_nbi pshmem_uint_get_nbi -#define shmem_ulong_get_nbi pshmem_ulong_get_nbi -#define shmem_ulonglong_get_nbi pshmem_ulonglong_get_nbi -#define shmem_longdouble_get_nbi pshmem_longdouble_get_nbi -#define shmem_int8_get_nbi pshmem_int8_get_nbi -#define shmem_int16_get_nbi pshmem_int16_get_nbi -#define shmem_int32_get_nbi pshmem_int32_get_nbi -#define shmem_int64_get_nbi pshmem_int64_get_nbi -#define shmem_uint8_get_nbi pshmem_uint8_get_nbi -#define shmem_uint16_get_nbi pshmem_uint16_get_nbi -#define shmem_uint32_get_nbi pshmem_uint32_get_nbi -#define shmem_uint64_get_nbi pshmem_uint64_get_nbi -#define shmem_size_get_nbi pshmem_size_get_nbi -#define shmem_ptrdiff_get_nbi pshmem_ptrdiff_get_nbi - -#define shmem_ctx_get8_nbi pshmem_ctx_get8_nbi -#define shmem_ctx_get16_nbi pshmem_ctx_get16_nbi -#define shmem_ctx_get32_nbi pshmem_ctx_get32_nbi -#define shmem_ctx_get64_nbi pshmem_ctx_get64_nbi -#define shmem_ctx_get128_nbi pshmem_ctx_get128_nbi -#define shmem_ctx_getmem_nbi pshmem_ctx_getmem_nbi - -#define shmem_get8_nbi pshmem_get8_nbi -#define shmem_get16_nbi pshmem_get16_nbi -#define shmem_get32_nbi pshmem_get32_nbi -#define shmem_get64_nbi pshmem_get64_nbi -#define shmem_get128_nbi pshmem_get128_nbi -#define shmem_getmem_nbi pshmem_getmem_nbi - -/* - * Atomic operations - */ -/* Atomic swap */ -#define shmem_ctx_double_atomic_swap pshmem_ctx_double_atomic_swap -#define shmem_ctx_float_atomic_swap pshmem_ctx_float_atomic_swap -#define shmem_ctx_int_atomic_swap pshmem_ctx_int_atomic_swap -#define shmem_ctx_long_atomic_swap pshmem_ctx_long_atomic_swap -#define shmem_ctx_longlong_atomic_swap pshmem_ctx_longlong_atomic_swap -#define shmem_ctx_uint_atomic_swap pshmem_ctx_uint_atomic_swap -#define shmem_ctx_ulong_atomic_swap pshmem_ctx_ulong_atomic_swap -#define shmem_ctx_ulonglong_atomic_swap pshmem_ctx_ulonglong_atomic_swap - -#define shmem_double_atomic_swap pshmem_double_atomic_swap -#define shmem_float_atomic_swap pshmem_float_atomic_swap -#define shmem_int_atomic_swap pshmem_int_atomic_swap -#define shmem_long_atomic_swap pshmem_long_atomic_swap -#define shmem_longlong_atomic_swap pshmem_longlong_atomic_swap -#define shmem_uint_atomic_swap pshmem_uint_atomic_swap -#define shmem_ulong_atomic_swap pshmem_ulong_atomic_swap -#define shmem_ulonglong_atomic_swap pshmem_ulonglong_atomic_swap - -#define shmem_double_swap pshmem_double_swap -#define shmem_float_swap pshmem_float_swap -#define shmem_int_swap pshmem_int_swap -#define shmem_long_swap pshmem_long_swap -#define shmem_longlong_swap pshmem_longlong_swap - -#define shmemx_int32_swap pshmemx_int32_swap -#define shmemx_int64_swap pshmemx_int64_swap - -/* Atomic set */ -#define shmem_ctx_double_atomic_set pshmem_ctx_double_atomic_set -#define shmem_ctx_float_atomic_set pshmem_ctx_float_atomic_set -#define shmem_ctx_int_atomic_set pshmem_ctx_int_atomic_set -#define shmem_ctx_long_atomic_set pshmem_ctx_long_atomic_set -#define shmem_ctx_longlong_atomic_set pshmem_ctx_longlong_atomic_set -#define shmem_ctx_uint_atomic_set pshmem_ctx_uint_atomic_set -#define shmem_ctx_ulong_atomic_set pshmem_ctx_ulong_atomic_set -#define shmem_ctx_ulonglong_atomic_set pshmem_ctx_ulonglong_atomic_set - -#define shmem_double_atomic_set pshmem_double_atomic_set -#define shmem_float_atomic_set pshmem_float_atomic_set -#define shmem_int_atomic_set pshmem_int_atomic_set -#define shmem_long_atomic_set pshmem_long_atomic_set -#define shmem_longlong_atomic_set pshmem_longlong_atomic_set -#define shmem_uint_atomic_set pshmem_uint_atomic_set -#define shmem_ulong_atomic_set pshmem_ulong_atomic_set -#define shmem_ulonglong_atomic_set pshmem_ulonglong_atomic_set - -#define shmem_double_set pshmem_double_set -#define shmem_float_set pshmem_float_set -#define shmem_int_set pshmem_int_set -#define shmem_long_set pshmem_long_set -#define shmem_longlong_set pshmem_longlong_set - -#define shmemx_int32_set pshmemx_int32_set -#define shmemx_int64_set pshmemx_int64_set - -/* Atomic conditional swap */ -#define shmem_ctx_int_atomic_compare_swap pshmem_ctx_int_atomic_compare_swap -#define shmem_ctx_long_atomic_compare_swap pshmem_ctx_long_atomic_compare_swap -#define shmem_ctx_longlong_atomic_compare_swap pshmem_ctx_longlong_atomic_compare_swap -#define shmem_ctx_uint_atomic_compare_swap pshmem_ctx_uint_atomic_compare_swap -#define shmem_ctx_ulong_atomic_compare_swap pshmem_ctx_ulong_atomic_compare_swap -#define shmem_ctx_ulonglong_atomic_compare_swap pshmem_ctx_ulonglong_atomic_compare_swap - -#define shmem_int_atomic_compare_swap pshmem_int_atomic_compare_swap -#define shmem_long_atomic_compare_swap pshmem_long_atomic_compare_swap -#define shmem_longlong_atomic_compare_swap pshmem_longlong_atomic_compare_swap -#define shmem_uint_atomic_compare_swap pshmem_uint_atomic_compare_swap -#define shmem_ulong_atomic_compare_swap pshmem_ulong_atomic_compare_swap -#define shmem_ulonglong_atomic_compare_swap pshmem_ulonglong_atomic_compare_swap - -#define shmem_int_cswap pshmem_int_cswap -#define shmem_long_cswap pshmem_long_cswap -#define shmem_longlong_cswap pshmem_longlong_cswap - -#define shmemx_int32_cswap pshmemx_int32_cswap -#define shmemx_int64_cswap pshmemx_int64_cswap - -/* Atomic Fetch&Add */ -#define shmem_ctx_int_atomic_fetch_add pshmem_ctx_int_atomic_fetch_add -#define shmem_ctx_long_atomic_fetch_add pshmem_ctx_long_atomic_fetch_add -#define shmem_ctx_longlong_atomic_fetch_add pshmem_ctx_longlong_atomic_fetch_add -#define shmem_ctx_uint_atomic_fetch_add pshmem_ctx_uint_atomic_fetch_add -#define shmem_ctx_ulong_atomic_fetch_add pshmem_ctx_ulong_atomic_fetch_add -#define shmem_ctx_ulonglong_atomic_fetch_add pshmem_ctx_ulonglong_atomic_fetch_add - -#define shmem_int_atomic_fetch_add pshmem_int_atomic_fetch_add -#define shmem_long_atomic_fetch_add pshmem_long_atomic_fetch_add -#define shmem_longlong_atomic_fetch_add pshmem_longlong_atomic_fetch_add -#define shmem_uint_atomic_fetch_add pshmem_uint_atomic_fetch_add -#define shmem_ulong_atomic_fetch_add pshmem_ulong_atomic_fetch_add -#define shmem_ulonglong_atomic_fetch_add pshmem_ulonglong_atomic_fetch_add - -#define shmem_int_fadd pshmem_int_fadd -#define shmem_long_fadd pshmem_long_fadd -#define shmem_longlong_fadd pshmem_longlong_fadd - -#define shmemx_int32_fadd pshmemx_int32_fadd -#define shmemx_int64_fadd pshmemx_int64_fadd - -/* Atomic Fetch&And */ -#define shmem_int_atomic_fetch_and pshmem_int_atomic_fetch_and -#define shmem_long_atomic_fetch_and pshmem_long_atomic_fetch_and -#define shmem_longlong_atomic_fetch_and pshmem_longlong_atomic_fetch_and -#define shmem_uint_atomic_fetch_and pshmem_uint_atomic_fetch_and -#define shmem_ulong_atomic_fetch_and pshmem_ulong_atomic_fetch_and -#define shmem_ulonglong_atomic_fetch_and pshmem_ulonglong_atomic_fetch_and -#define shmem_int32_atomic_fetch_and pshmem_int32_atomic_fetch_and -#define shmem_int64_atomic_fetch_and pshmem_int64_atomic_fetch_and -#define shmem_uint32_atomic_fetch_and pshmem_uint32_atomic_fetch_and -#define shmem_uint64_atomic_fetch_and pshmem_uint64_atomic_fetch_and - -#define shmem_ctx_int_atomic_fetch_and pshmem_ctx_int_atomic_fetch_and -#define shmem_ctx_long_atomic_fetch_and pshmem_ctx_long_atomic_fetch_and -#define shmem_ctx_longlong_atomic_fetch_and pshmem_ctx_longlong_atomic_fetch_and -#define shmem_ctx_uint_atomic_fetch_and pshmem_ctx_uint_atomic_fetch_and -#define shmem_ctx_ulong_atomic_fetch_and pshmem_ctx_ulong_atomic_fetch_and -#define shmem_ctx_ulonglong_atomic_fetch_and pshmem_ctx_ulonglong_atomic_fetch_and -#define shmem_ctx_int32_atomic_fetch_and pshmem_ctx_int32_atomic_fetch_and -#define shmem_ctx_int64_atomic_fetch_and pshmem_ctx_int64_atomic_fetch_and -#define shmem_ctx_uint32_atomic_fetch_and pshmem_ctx_uint32_atomic_fetch_and -#define shmem_ctx_uint64_atomic_fetch_and pshmem_ctx_uint64_atomic_fetch_and - -#define shmemx_int32_atomic_fetch_and pshmemx_int32_atomic_fetch_and -#define shmemx_int64_atomic_fetch_and pshmemx_int64_atomic_fetch_and -#define shmemx_uint32_atomic_fetch_and pshmemx_uint32_atomic_fetch_and -#define shmemx_uint64_atomic_fetch_and pshmemx_uint64_atomic_fetch_and - -/* Atomic Fetch&Or */ -#define shmem_int_atomic_fetch_or pshmem_int_atomic_fetch_or -#define shmem_long_atomic_fetch_or pshmem_long_atomic_fetch_or -#define shmem_longlong_atomic_fetch_or pshmem_longlong_atomic_fetch_or -#define shmem_uint_atomic_fetch_or pshmem_uint_atomic_fetch_or -#define shmem_ulong_atomic_fetch_or pshmem_ulong_atomic_fetch_or -#define shmem_ulonglong_atomic_fetch_or pshmem_ulonglong_atomic_fetch_or -#define shmem_int32_atomic_fetch_or pshmem_int32_atomic_fetch_or -#define shmem_int64_atomic_fetch_or pshmem_int64_atomic_fetch_or -#define shmem_uint32_atomic_fetch_or pshmem_uint32_atomic_fetch_or -#define shmem_uint64_atomic_fetch_or pshmem_uint64_atomic_fetch_or - -#define shmem_ctx_int_atomic_fetch_or pshmem_ctx_int_atomic_fetch_or -#define shmem_ctx_long_atomic_fetch_or pshmem_ctx_long_atomic_fetch_or -#define shmem_ctx_longlong_atomic_fetch_or pshmem_ctx_longlong_atomic_fetch_or -#define shmem_ctx_uint_atomic_fetch_or pshmem_ctx_uint_atomic_fetch_or -#define shmem_ctx_ulong_atomic_fetch_or pshmem_ctx_ulong_atomic_fetch_or -#define shmem_ctx_ulonglong_atomic_fetch_or pshmem_ctx_ulonglong_atomic_fetch_or -#define shmem_ctx_int32_atomic_fetch_or pshmem_ctx_int32_atomic_fetch_or -#define shmem_ctx_int64_atomic_fetch_or pshmem_ctx_int64_atomic_fetch_or -#define shmem_ctx_uint32_atomic_fetch_or pshmem_ctx_uint32_atomic_fetch_or -#define shmem_ctx_uint64_atomic_fetch_or pshmem_ctx_uint64_atomic_fetch_or - -#define shmemx_int32_atomic_fetch_or pshmemx_int32_atomic_fetch_or -#define shmemx_int64_atomic_fetch_or pshmemx_int64_atomic_fetch_or -#define shmemx_uint32_atomic_fetch_or pshmemx_uint32_atomic_fetch_or -#define shmemx_uint64_atomic_fetch_or pshmemx_uint64_atomic_fetch_or - -/* Atomic Fetch&Xor */ -#define shmem_int_atomic_fetch_xor pshmem_int_atomic_fetch_xor -#define shmem_long_atomic_fetch_xor pshmem_long_atomic_fetch_xor -#define shmem_longlong_atomic_fetch_xor pshmem_longlong_atomic_fetch_xor -#define shmem_uint_atomic_fetch_xor pshmem_uint_atomic_fetch_xor -#define shmem_ulong_atomic_fetch_xor pshmem_ulong_atomic_fetch_xor -#define shmem_ulonglong_atomic_fetch_xor pshmem_ulonglong_atomic_fetch_xor -#define shmem_int32_atomic_fetch_xor pshmem_int32_atomic_fetch_xor -#define shmem_int64_atomic_fetch_xor pshmem_int64_atomic_fetch_xor -#define shmem_uint32_atomic_fetch_xor pshmem_uint32_atomic_fetch_xor -#define shmem_uint64_atomic_fetch_xor pshmem_uint64_atomic_fetch_xor - -#define shmem_ctx_int_atomic_fetch_xor pshmem_ctx_int_atomic_fetch_xor -#define shmem_ctx_long_atomic_fetch_xor pshmem_ctx_long_atomic_fetch_xor -#define shmem_ctx_longlong_atomic_fetch_xor pshmem_ctx_longlong_atomic_fetch_xor -#define shmem_ctx_uint_atomic_fetch_xor pshmem_ctx_uint_atomic_fetch_xor -#define shmem_ctx_ulong_atomic_fetch_xor pshmem_ctx_ulong_atomic_fetch_xor -#define shmem_ctx_ulonglong_atomic_fetch_xor pshmem_ctx_ulonglong_atomic_fetch_xor -#define shmem_ctx_int32_atomic_fetch_xor pshmem_ctx_int32_atomic_fetch_xor -#define shmem_ctx_int64_atomic_fetch_xor pshmem_ctx_int64_atomic_fetch_xor -#define shmem_ctx_uint32_atomic_fetch_xor pshmem_ctx_uint32_atomic_fetch_xor -#define shmem_ctx_uint64_atomic_fetch_xor pshmem_ctx_uint64_atomic_fetch_xor - -#define shmemx_int32_atomic_fetch_xor pshmemx_int32_atomic_fetch_xor -#define shmemx_int64_atomic_fetch_xor pshmemx_int64_atomic_fetch_xor -#define shmemx_uint32_atomic_fetch_xor pshmemx_uint32_atomic_fetch_xor -#define shmemx_uint64_atomic_fetch_xor pshmemx_uint64_atomic_fetch_xor - -/* Atomic Fetch */ -#define shmem_ctx_double_atomic_fetch pshmem_ctx_double_atomic_fetch -#define shmem_ctx_float_atomic_fetch pshmem_ctx_float_atomic_fetch -#define shmem_ctx_int_atomic_fetch pshmem_ctx_int_atomic_fetch -#define shmem_ctx_long_atomic_fetch pshmem_ctx_long_atomic_fetch -#define shmem_ctx_longlong_atomic_fetch pshmem_ctx_longlong_atomic_fetch -#define shmem_ctx_uint_atomic_fetch pshmem_ctx_uint_atomic_fetch -#define shmem_ctx_ulong_atomic_fetch pshmem_ctx_ulong_atomic_fetch -#define shmem_ctx_ulonglong_atomic_fetch pshmem_ctx_ulonglong_atomic_fetch - -#define shmem_double_atomic_fetch pshmem_double_atomic_fetch -#define shmem_float_atomic_fetch pshmem_float_atomic_fetch -#define shmem_int_atomic_fetch pshmem_int_atomic_fetch -#define shmem_long_atomic_fetch pshmem_long_atomic_fetch -#define shmem_longlong_atomic_fetch pshmem_longlong_atomic_fetch -#define shmem_uint_atomic_fetch pshmem_uint_atomic_fetch -#define shmem_ulong_atomic_fetch pshmem_ulong_atomic_fetch -#define shmem_ulonglong_atomic_fetch pshmem_ulonglong_atomic_fetch - -#define shmem_double_fetch pshmem_double_fetch -#define shmem_float_fetch pshmem_float_fetch -#define shmem_int_fetch pshmem_int_fetch -#define shmem_long_fetch pshmem_long_fetch -#define shmem_longlong_fetch pshmem_longlong_fetch - -#define shmemx_int32_fetch pshmemx_int32_fetch -#define shmemx_int64_fetch pshmemx_int64_fetch - -/* Atomic Fetch&Inc */ -#define shmem_ctx_int_atomic_fetch_inc pshmem_ctx_int_atomic_fetch_inc -#define shmem_ctx_long_atomic_fetch_inc pshmem_ctx_long_atomic_fetch_inc -#define shmem_ctx_longlong_atomic_fetch_inc pshmem_ctx_longlong_atomic_fetch_inc -#define shmem_ctx_uint_atomic_fetch_inc pshmem_ctx_uint_atomic_fetch_inc -#define shmem_ctx_ulong_atomic_fetch_inc pshmem_ctx_ulong_atomic_fetch_inc -#define shmem_ctx_ulonglong_atomic_fetch_inc pshmem_ctx_ulonglong_atomic_fetch_inc - -#define shmem_uint_atomic_fetch_inc pshmem_uint_atomic_fetch_inc -#define shmem_ulong_atomic_fetch_inc pshmem_ulong_atomic_fetch_inc -#define shmem_ulonglong_atomic_fetch_inc pshmem_ulonglong_atomic_fetch_inc -#define shmem_int_atomic_fetch_inc pshmem_int_atomic_fetch_inc -#define shmem_long_atomic_fetch_inc pshmem_long_atomic_fetch_inc -#define shmem_longlong_atomic_fetch_inc pshmem_longlong_atomic_fetch_inc - -#define shmem_int_finc pshmem_int_finc -#define shmem_long_finc pshmem_long_finc -#define shmem_longlong_finc pshmem_longlong_finc - -#define shmemx_int32_finc pshmemx_int32_finc -#define shmemx_int64_finc pshmemx_int64_finc - -/* Atomic Add */ -#define shmem_ctx_int_atomic_add pshmem_ctx_int_atomic_add -#define shmem_ctx_long_atomic_add pshmem_ctx_long_atomic_add -#define shmem_ctx_longlong_atomic_add pshmem_ctx_longlong_atomic_add -#define shmem_ctx_uint_atomic_add pshmem_ctx_uint_atomic_add -#define shmem_ctx_ulong_atomic_add pshmem_ctx_ulong_atomic_add -#define shmem_ctx_ulonglong_atomic_add pshmem_ctx_ulonglong_atomic_add - -#define shmem_int_atomic_add pshmem_int_atomic_add -#define shmem_long_atomic_add pshmem_long_atomic_add -#define shmem_longlong_atomic_add pshmem_longlong_atomic_add -#define shmem_uint_atomic_add pshmem_uint_atomic_add -#define shmem_ulong_atomic_add pshmem_ulong_atomic_add -#define shmem_ulonglong_atomic_add pshmem_ulonglong_atomic_add - -#define shmem_int_add pshmem_int_add -#define shmem_long_add pshmem_long_add -#define shmem_longlong_add pshmem_longlong_add - -#define shmemx_int32_add pshmemx_int32_add -#define shmemx_int64_add pshmemx_int64_add - -/* Atomic And */ -#define shmem_int_atomic_and pshmem_int_atomic_and -#define shmem_long_atomic_and pshmem_long_atomic_and -#define shmem_longlong_atomic_and pshmem_longlong_atomic_and -#define shmem_uint_atomic_and pshmem_uint_atomic_and -#define shmem_ulong_atomic_and pshmem_ulong_atomic_and -#define shmem_ulonglong_atomic_and pshmem_ulonglong_atomic_and -#define shmem_int32_atomic_and pshmem_int32_atomic_and -#define shmem_int64_atomic_and pshmem_int64_atomic_and -#define shmem_uint32_atomic_and pshmem_uint32_atomic_and -#define shmem_uint64_atomic_and pshmem_uint64_atomic_and - -#define shmem_ctx_int_atomic_and pshmem_ctx_int_atomic_and -#define shmem_ctx_long_atomic_and pshmem_ctx_long_atomic_and -#define shmem_ctx_longlong_atomic_and pshmem_ctx_longlong_atomic_and -#define shmem_ctx_uint_atomic_and pshmem_ctx_uint_atomic_and -#define shmem_ctx_ulong_atomic_and pshmem_ctx_ulong_atomic_and -#define shmem_ctx_ulonglong_atomic_and pshmem_ctx_ulonglong_atomic_and -#define shmem_ctx_int32_atomic_and pshmem_ctx_int32_atomic_and -#define shmem_ctx_int64_atomic_and pshmem_ctx_int64_atomic_and -#define shmem_ctx_uint32_atomic_and pshmem_ctx_uint32_atomic_and -#define shmem_ctx_uint64_atomic_and pshmem_ctx_uint64_atomic_and - -#define shmemx_int32_atomic_and pshmemx_int32_atomic_and -#define shmemx_int64_atomic_and pshmemx_int64_atomic_and - -#define shmemx_uint32_atomic_and pshmemx_uint32_atomic_and -#define shmemx_uint64_atomic_and pshmemx_uint64_atomic_and - -/* Atomic Or */ -#define shmem_int_atomic_or pshmem_int_atomic_or -#define shmem_long_atomic_or pshmem_long_atomic_or -#define shmem_longlong_atomic_or pshmem_longlong_atomic_or -#define shmem_uint_atomic_or pshmem_uint_atomic_or -#define shmem_ulong_atomic_or pshmem_ulong_atomic_or -#define shmem_ulonglong_atomic_or pshmem_ulonglong_atomic_or -#define shmem_int32_atomic_or pshmem_int32_atomic_or -#define shmem_int64_atomic_or pshmem_int64_atomic_or -#define shmem_uint32_atomic_or pshmem_uint32_atomic_or -#define shmem_uint64_atomic_or pshmem_uint64_atomic_or - -#define shmem_ctx_int_atomic_or pshmem_ctx_int_atomic_or -#define shmem_ctx_long_atomic_or pshmem_ctx_long_atomic_or -#define shmem_ctx_longlong_atomic_or pshmem_ctx_longlong_atomic_or -#define shmem_ctx_uint_atomic_or pshmem_ctx_uint_atomic_or -#define shmem_ctx_ulong_atomic_or pshmem_ctx_ulong_atomic_or -#define shmem_ctx_ulonglong_atomic_or pshmem_ctx_ulonglong_atomic_or -#define shmem_ctx_int32_atomic_or pshmem_ctx_int32_atomic_or -#define shmem_ctx_int64_atomic_or pshmem_ctx_int64_atomic_or -#define shmem_ctx_uint32_atomic_or pshmem_ctx_uint32_atomic_or -#define shmem_ctx_uint64_atomic_or pshmem_ctx_uint64_atomic_or - -#define shmemx_int32_atomic_or pshmemx_int32_atomic_or -#define shmemx_int64_atomic_or pshmemx_int64_atomic_or - -#define shmemx_uint32_atomic_or pshmemx_uint32_atomic_or -#define shmemx_uint64_atomic_or pshmemx_uint64_atomic_or - -/* Atomic Xor */ -#define shmem_int_atomic_xor pshmem_int_atomic_xor -#define shmem_long_atomic_xor pshmem_long_atomic_xor -#define shmem_longlong_atomic_xor pshmem_longlong_atomic_xor -#define shmem_uint_atomic_xor pshmem_uint_atomic_xor -#define shmem_ulong_atomic_xor pshmem_ulong_atomic_xor -#define shmem_ulonglong_atomic_xor pshmem_ulonglong_atomic_xor -#define shmem_int32_atomic_xor pshmem_int32_atomic_xor -#define shmem_int64_atomic_xor pshmem_int64_atomic_xor -#define shmem_uint32_atomic_xor pshmem_uint32_atomic_xor -#define shmem_uint64_atomic_xor pshmem_uint64_atomic_xor - -#define shmem_ctx_int_atomic_xor pshmem_ctx_int_atomic_xor -#define shmem_ctx_long_atomic_xor pshmem_ctx_long_atomic_xor -#define shmem_ctx_longlong_atomic_xor pshmem_ctx_longlong_atomic_xor -#define shmem_ctx_uint_atomic_xor pshmem_ctx_uint_atomic_xor -#define shmem_ctx_ulong_atomic_xor pshmem_ctx_ulong_atomic_xor -#define shmem_ctx_ulonglong_atomic_xor pshmem_ctx_ulonglong_atomic_xor -#define shmem_ctx_int32_atomic_xor pshmem_ctx_int32_atomic_xor -#define shmem_ctx_int64_atomic_xor pshmem_ctx_int64_atomic_xor -#define shmem_ctx_uint32_atomic_xor pshmem_ctx_uint32_atomic_xor -#define shmem_ctx_uint64_atomic_xor pshmem_ctx_uint64_atomic_xor - -#define shmemx_int32_atomic_xor pshmemx_int32_atomic_xor -#define shmemx_int64_atomic_xor pshmemx_int64_atomic_xor - -#define shmemx_uint32_atomic_xor pshmemx_uint32_atomic_xor -#define shmemx_uint64_atomic_xor pshmemx_uint64_atomic_xor - -/* Atomic Inc */ -#define shmem_ctx_int_atomic_inc pshmem_ctx_int_atomic_inc -#define shmem_ctx_long_atomic_inc pshmem_ctx_long_atomic_inc -#define shmem_ctx_longlong_atomic_inc pshmem_ctx_longlong_atomic_inc -#define shmem_ctx_uint_atomic_inc pshmem_ctx_uint_atomic_inc -#define shmem_ctx_ulong_atomic_inc pshmem_ctx_ulong_atomic_inc -#define shmem_ctx_ulonglong_atomic_inc pshmem_ctx_ulonglong_atomic_inc - -#define shmem_int_atomic_inc pshmem_int_atomic_inc -#define shmem_long_atomic_inc pshmem_long_atomic_inc -#define shmem_longlong_atomic_inc pshmem_longlong_atomic_inc -#define shmem_uint_atomic_inc pshmem_uint_atomic_inc -#define shmem_ulong_atomic_inc pshmem_ulong_atomic_inc -#define shmem_ulonglong_atomic_inc pshmem_ulonglong_atomic_inc - -#define shmem_int_inc pshmem_int_inc -#define shmem_long_inc pshmem_long_inc -#define shmem_longlong_inc pshmem_longlong_inc - -#define shmemx_int32_inc pshmemx_int32_inc -#define shmemx_int64_inc pshmemx_int64_inc - -/* - * Lock functions - */ -#define shmem_set_lock pshmem_set_lock -#define shmem_clear_lock pshmem_clear_lock -#define shmem_test_lock pshmem_test_lock - -/* - * P2P sync routines - */ -#define shmem_short_wait pshmem_short_wait -#define shmem_int_wait pshmem_int_wait -#define shmem_long_wait pshmem_long_wait -#define shmem_longlong_wait pshmem_longlong_wait -#define shmem_wait pshmem_wait -#define shmemx_int32_wait pshmemx_int32_wait -#define shmemx_int64_wait pshmemx_int64_wait - -#define shmem_short_wait_until pshmem_short_wait_until -#define shmem_int_wait_until pshmem_int_wait_until -#define shmem_long_wait_until pshmem_long_wait_until -#define shmem_longlong_wait_until pshmem_longlong_wait_until -#define shmem_ushort_wait_until pshmem_ushort_wait_until -#define shmem_uint_wait_until pshmem_uint_wait_until -#define shmem_ulong_wait_until pshmem_ulong_wait_until -#define shmem_ulonglong_wait_until pshmem_ulonglong_wait_until -#define shmem_int32_wait_until pshmem_int32_wait_until -#define shmem_int64_wait_until pshmem_int64_wait_until -#define shmem_uint32_wait_until pshmem_uint32_wait_until -#define shmem_uint64_wait_until pshmem_uint64_wait_until -#define shmem_size_wait_until pshmem_size_wait_until -#define shmem_ptrdiff_wait_until pshmem_ptrdiff_wait_until - -#define shmemx_int32_wait_until pshmemx_int32_wait_until -#define shmemx_int64_wait_until pshmemx_int64_wait_until - -#define shmem_short_test pshmem_short_test -#define shmem_int_test pshmem_int_test -#define shmem_long_test pshmem_long_test -#define shmem_longlong_test pshmem_longlong_test -#define shmem_ushort_test pshmem_ushort_test -#define shmem_uint_test pshmem_uint_test -#define shmem_ulong_test pshmem_ulong_test -#define shmem_ulonglong_test pshmem_ulonglong_test -#define shmem_int32_test pshmem_int32_test -#define shmem_int64_test pshmem_int64_test -#define shmem_uint32_test pshmem_uint32_test -#define shmem_uint64_test pshmem_uint64_test -#define shmem_size_test pshmem_size_test -#define shmem_ptrdiff_test pshmem_ptrdiff_test - -/* - * Barrier sync routines - */ -#define shmem_barrier pshmem_barrier -#define shmem_barrier_all pshmem_barrier_all -#define shmem_sync pshmem_sync -#define shmem_sync_all pshmem_sync_all -#define shmem_fence pshmem_fence -#define shmem_ctx_fence pshmem_ctx_fence -#define shmem_quiet pshmem_quiet -#define shmem_ctx_quiet pshmem_ctx_quiet - -/* - * Collective routines - */ -#define shmem_broadcast32 pshmem_broadcast32 -#define shmem_broadcast64 pshmem_broadcast64 -#define shmem_collect32 pshmem_collect32 -#define shmem_collect64 pshmem_collect64 -#define shmem_fcollect32 pshmem_fcollect32 -#define shmem_fcollect64 pshmem_fcollect64 - -/* - * Reduction routines - */ -#define shmem_short_and_to_all pshmem_short_and_to_all -#define shmem_int_and_to_all pshmem_int_and_to_all -#define shmem_long_and_to_all pshmem_long_and_to_all -#define shmem_longlong_and_to_all pshmem_longlong_and_to_all -#define shmemx_int16_and_to_all pshmemx_int16_and_to_all -#define shmemx_int32_and_to_all pshmemx_int32_and_to_all -#define shmemx_int64_and_to_all pshmemx_int64_and_to_all - -#define shmem_short_or_to_all pshmem_short_or_to_all -#define shmem_int_or_to_all pshmem_int_or_to_all -#define shmem_long_or_to_all pshmem_long_or_to_all -#define shmem_longlong_or_to_all pshmem_longlong_or_to_all -#define shmemx_int16_or_to_all pshmemx_int16_or_to_all -#define shmemx_int32_or_to_all pshmemx_int32_or_to_all -#define shmemx_int64_or_to_all pshmemx_int64_or_to_all - -#define shmem_short_xor_to_all pshmem_short_xor_to_all -#define shmem_int_xor_to_all pshmem_int_xor_to_all -#define shmem_long_xor_to_all pshmem_long_xor_to_all -#define shmem_longlong_xor_to_all pshmem_longlong_xor_to_all -#define shmemx_int16_xor_to_all pshmemx_int16_xor_to_all -#define shmemx_int32_xor_to_all pshmemx_int32_xor_to_all -#define shmemx_int64_xor_to_all pshmemx_int64_xor_to_all - -#define shmem_short_max_to_all pshmem_short_max_to_all -#define shmem_int_max_to_all pshmem_int_max_to_all -#define shmem_long_max_to_all pshmem_long_max_to_all -#define shmem_longlong_max_to_all pshmem_longlong_max_to_all -#define shmem_float_max_to_all pshmem_float_max_to_all -#define shmem_double_max_to_all pshmem_double_max_to_all -#define shmem_longdouble_max_to_all pshmem_longdouble_max_to_all -#define shmemx_int16_max_to_all pshmemx_int16_max_to_all -#define shmemx_int32_max_to_all pshmemx_int32_max_to_all -#define shmemx_int64_max_to_all pshmemx_int64_max_to_all - -#define shmem_short_min_to_all pshmem_short_min_to_all -#define shmem_int_min_to_all pshmem_int_min_to_all -#define shmem_long_min_to_all pshmem_long_min_to_all -#define shmem_longlong_min_to_all pshmem_longlong_min_to_all -#define shmem_float_min_to_all pshmem_float_min_to_all -#define shmem_double_min_to_all pshmem_double_min_to_all -#define shmem_longdouble_min_to_all pshmem_longdouble_min_to_all -#define shmemx_int16_min_to_all pshmemx_int16_min_to_all -#define shmemx_int32_min_to_all pshmemx_int32_min_to_all -#define shmemx_int64_min_to_all pshmemx_int64_min_to_all - -#define shmem_short_sum_to_all pshmem_short_sum_to_all -#define shmem_int_sum_to_all pshmem_int_sum_to_all -#define shmem_long_sum_to_all pshmem_long_sum_to_all -#define shmem_longlong_sum_to_all pshmem_longlong_sum_to_all -#define shmem_float_sum_to_all pshmem_float_sum_to_all -#define shmem_double_sum_to_all pshmem_double_sum_to_all -#define shmem_longdouble_sum_to_all pshmem_longdouble_sum_to_all -#define shmem_complexf_sum_to_all pshmem_complexf_sum_to_all -#define shmem_complexd_sum_to_all pshmem_complexd_sum_to_all -#define shmemx_int16_sum_to_all pshmemx_int16_sum_to_all -#define shmemx_int32_sum_to_all pshmemx_int32_sum_to_all -#define shmemx_int64_sum_to_all pshmemx_int64_sum_to_all - -#define shmem_short_prod_to_all pshmem_short_prod_to_all -#define shmem_int_prod_to_all pshmem_int_prod_to_all -#define shmem_long_prod_to_all pshmem_long_prod_to_all -#define shmem_longlong_prod_to_all pshmem_longlong_prod_to_all -#define shmem_float_prod_to_all pshmem_float_prod_to_all -#define shmem_double_prod_to_all pshmem_double_prod_to_all -#define shmem_longdouble_prod_to_all pshmem_longdouble_prod_to_all -#define shmem_complexf_prod_to_all pshmem_complexf_prod_to_all -#define shmem_complexd_prod_to_all pshmem_complexd_prod_to_all -#define shmemx_int16_prod_to_all pshmemx_int16_prod_to_all -#define shmemx_int32_prod_to_all pshmemx_int32_prod_to_all -#define shmemx_int64_prod_to_all pshmemx_int64_prod_to_all - -/* - * Alltoall routines - */ -#define shmem_alltoall32 pshmem_alltoall32 -#define shmem_alltoall64 pshmem_alltoall64 -#define shmem_alltoalls32 pshmem_alltoalls32 -#define shmem_alltoalls64 pshmem_alltoalls64 - -/* - * Platform specific cache management routines - */ -#define shmem_udcflush pshmem_udcflush -#define shmem_udcflush_line pshmem_udcflush_line -#define shmem_set_cache_inv pshmem_set_cache_inv -#define shmem_set_cache_line_inv pshmem_set_cache_line_inv -#define shmem_clear_cache_inv pshmem_clear_cache_inv -#define shmem_clear_cache_line_inv pshmem_clear_cache_line_inv - -#endif /* OSHMEM_C_PROFILE_DEFINES_H */ diff --git a/oshmem/shmem/c/shmem_add.c b/oshmem/shmem/c/shmem_add.c index 6435496892a..4b78c278f3a 100644 --- a/oshmem/shmem/c/shmem_add.c +++ b/oshmem/shmem/c/shmem_add.c @@ -66,6 +66,12 @@ #pragma weak shmem_ctx_uint_atomic_add = pshmem_ctx_uint_atomic_add #pragma weak shmem_ctx_ulong_atomic_add = pshmem_ctx_ulong_atomic_add #pragma weak shmem_ctx_ulonglong_atomic_add = pshmem_ctx_ulonglong_atomic_add +#pragma weak shmem_ctx_int32_atomic_add = pshmem_ctx_int32_atomic_add +#pragma weak shmem_ctx_int64_atomic_add = pshmem_ctx_int64_atomic_add +#pragma weak shmem_ctx_uint32_atomic_add = pshmem_ctx_uint32_atomic_add +#pragma weak shmem_ctx_uint64_atomic_add = pshmem_ctx_uint64_atomic_add +#pragma weak shmem_ctx_size_atomic_add = pshmem_ctx_size_atomic_add +#pragma weak shmem_ctx_ptrdiff_atomic_add = pshmem_ctx_ptrdiff_atomic_add #pragma weak shmem_int_atomic_add = pshmem_int_atomic_add #pragma weak shmem_long_atomic_add = pshmem_long_atomic_add @@ -73,6 +79,12 @@ #pragma weak shmem_uint_atomic_add = pshmem_uint_atomic_add #pragma weak shmem_ulong_atomic_add = pshmem_ulong_atomic_add #pragma weak shmem_ulonglong_atomic_add = pshmem_ulonglong_atomic_add +#pragma weak shmem_int32_atomic_add = pshmem_int32_atomic_add +#pragma weak shmem_int64_atomic_add = pshmem_int64_atomic_add +#pragma weak shmem_uint32_atomic_add = pshmem_uint32_atomic_add +#pragma weak shmem_uint64_atomic_add = pshmem_uint64_atomic_add +#pragma weak shmem_size_atomic_add = pshmem_size_atomic_add +#pragma weak shmem_ptrdiff_atomic_add = pshmem_ptrdiff_atomic_add #pragma weak shmem_int_add = pshmem_int_add #pragma weak shmem_long_add = pshmem_long_add @@ -80,7 +92,7 @@ #pragma weak shmemx_int32_add = pshmemx_int32_add #pragma weak shmemx_int64_add = pshmemx_int64_add -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_ATOMIC_ADD(_uint, unsigned int, shmem) @@ -89,12 +101,24 @@ SHMEM_CTX_TYPE_ATOMIC_ADD(_ulonglong, unsigned long long, shmem) SHMEM_CTX_TYPE_ATOMIC_ADD(_int, int, shmem) SHMEM_CTX_TYPE_ATOMIC_ADD(_long, long, shmem) SHMEM_CTX_TYPE_ATOMIC_ADD(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_ptrdiff, ptrdiff_t, shmem) SHMEM_TYPE_ATOMIC_ADD(_int, int, shmem) SHMEM_TYPE_ATOMIC_ADD(_long, long, shmem) SHMEM_TYPE_ATOMIC_ADD(_longlong, long long, shmem) SHMEM_TYPE_ATOMIC_ADD(_uint, unsigned int, shmem) SHMEM_TYPE_ATOMIC_ADD(_ulong, unsigned long, shmem) SHMEM_TYPE_ATOMIC_ADD(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_ADD(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_ADD(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_ADD(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_ADD(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_ADD(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_ADD(_ptrdiff, ptrdiff_t, shmem) /* deprecated APIs */ #define SHMEM_TYPE_ADD(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_addr_accessible.c b/oshmem/shmem/c/shmem_addr_accessible.c index 724318a894f..fbfe1ec2b33 100644 --- a/oshmem/shmem/c/shmem_addr_accessible.c +++ b/oshmem/shmem/c/shmem_addr_accessible.c @@ -19,7 +19,7 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_addr_accessible = pshmem_addr_accessible -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif int shmem_addr_accessible(const void *addr, int pe) diff --git a/oshmem/shmem/c/shmem_align.c b/oshmem/shmem/c/shmem_align.c index 52c7d625396..7f5046412de 100644 --- a/oshmem/shmem/c/shmem_align.c +++ b/oshmem/shmem/c/shmem_align.c @@ -21,7 +21,7 @@ #include "oshmem/include/pshmem.h" #pragma weak shmem_align = pshmem_align #pragma weak shmemalign = pshmemalign -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif static inline void* _shmemalign(size_t align, size_t size); diff --git a/oshmem/shmem/c/shmem_alloc.c b/oshmem/shmem/c/shmem_alloc.c index 92592ce8ca3..08f5918c79f 100644 --- a/oshmem/shmem/c/shmem_alloc.c +++ b/oshmem/shmem/c/shmem_alloc.c @@ -24,8 +24,9 @@ #pragma weak shmem_malloc = pshmem_malloc #pragma weak shmem_calloc = pshmem_calloc #pragma weak shmalloc = pshmalloc -#pragma weak shmemx_malloc_with_hint = pshmemx_malloc_with_hint -#include "oshmem/shmem/c/profile/defines.h" +#pragma weak shmem_malloc_with_hints = pshmem_malloc_with_hints + +#include "oshmem/shmem/c/profile-defines.h" #endif static inline void* _shmalloc(size_t size); @@ -76,7 +77,7 @@ static inline void* _shmalloc(size_t size) return pBuff; } -void* shmemx_malloc_with_hint(size_t size, long hint) +void* shmem_malloc_with_hints(size_t size, long hint) { int rc; void* pBuff = NULL; @@ -105,3 +106,5 @@ void* shmemx_malloc_with_hint(size_t size, long hint) #endif return pBuff; } + + diff --git a/oshmem/shmem/c/shmem_alltoall.c b/oshmem/shmem/c/shmem_alltoall.c index 8dc8b01312f..bf2ff21a5de 100644 --- a/oshmem/shmem/c/shmem_alltoall.c +++ b/oshmem/shmem/c/shmem_alltoall.c @@ -102,10 +102,155 @@ static void _shmem_alltoall(void *target, #pragma weak shmem_alltoall64 = pshmem_alltoall64 #pragma weak shmem_alltoalls32 = pshmem_alltoalls32 #pragma weak shmem_alltoalls64 = pshmem_alltoalls64 -#include "oshmem/shmem/c/profile/defines.h" + +/* Teams alltoall */ +#pragma weak shmem_char_alltoall = pshmem_char_alltoall +#pragma weak shmem_short_alltoall = pshmem_short_alltoall +#pragma weak shmem_int_alltoall = pshmem_int_alltoall +#pragma weak shmem_long_alltoall = pshmem_long_alltoall +#pragma weak shmem_float_alltoall = pshmem_float_alltoall +#pragma weak shmem_double_alltoall = pshmem_double_alltoall +#pragma weak shmem_longlong_alltoall = pshmem_longlong_alltoall +#pragma weak shmem_schar_alltoall = pshmem_schar_alltoall +#pragma weak shmem_uchar_alltoall = pshmem_uchar_alltoall +#pragma weak shmem_ushort_alltoall = pshmem_ushort_alltoall +#pragma weak shmem_uint_alltoall = pshmem_uint_alltoall +#pragma weak shmem_ulong_alltoall = pshmem_ulong_alltoall +#pragma weak shmem_ulonglong_alltoall = pshmem_ulonglong_alltoall +#pragma weak shmem_longdouble_alltoall = pshmem_longdouble_alltoall +#pragma weak shmem_int8_alltoall = pshmem_int8_alltoall +#pragma weak shmem_int16_alltoall = pshmem_int16_alltoall +#pragma weak shmem_int32_alltoall = pshmem_int32_alltoall +#pragma weak shmem_int64_alltoall = pshmem_int64_alltoall +#pragma weak shmem_uint8_alltoall = pshmem_uint8_alltoall +#pragma weak shmem_uint16_alltoall = pshmem_uint16_alltoall +#pragma weak shmem_uint32_alltoall = pshmem_uint32_alltoall +#pragma weak shmem_uint64_alltoall = pshmem_uint64_alltoall +#pragma weak shmem_size_alltoall = pshmem_size_alltoall +#pragma weak shmem_ptrdiff_alltoall = pshmem_ptrdiff_alltoall + +#pragma weak shmem_alltoallmem = pshmem_alltoallmem + +/* Teams alltoalls */ +#pragma weak shmem_char_alltoalls = pshmem_char_alltoalls +#pragma weak shmem_short_alltoalls = pshmem_short_alltoalls +#pragma weak shmem_int_alltoalls = pshmem_int_alltoalls +#pragma weak shmem_long_alltoalls = pshmem_long_alltoalls +#pragma weak shmem_float_alltoalls = pshmem_float_alltoalls +#pragma weak shmem_double_alltoalls = pshmem_double_alltoalls +#pragma weak shmem_longlong_alltoalls = pshmem_longlong_alltoalls +#pragma weak shmem_schar_alltoalls = pshmem_schar_alltoalls +#pragma weak shmem_uchar_alltoalls = pshmem_uchar_alltoalls +#pragma weak shmem_ushort_alltoalls = pshmem_ushort_alltoalls +#pragma weak shmem_uint_alltoalls = pshmem_uint_alltoalls +#pragma weak shmem_ulong_alltoalls = pshmem_ulong_alltoalls +#pragma weak shmem_ulonglong_alltoalls = pshmem_ulonglong_alltoalls +#pragma weak shmem_longdouble_alltoalls = pshmem_longdouble_alltoalls +#pragma weak shmem_int8_alltoalls = pshmem_int8_alltoalls +#pragma weak shmem_int16_alltoalls = pshmem_int16_alltoalls +#pragma weak shmem_int32_alltoalls = pshmem_int32_alltoalls +#pragma weak shmem_int64_alltoalls = pshmem_int64_alltoalls +#pragma weak shmem_uint8_alltoalls = pshmem_uint8_alltoalls +#pragma weak shmem_uint16_alltoalls = pshmem_uint16_alltoalls +#pragma weak shmem_uint32_alltoalls = pshmem_uint32_alltoalls +#pragma weak shmem_uint64_alltoalls = pshmem_uint64_alltoalls +#pragma weak shmem_size_alltoalls = pshmem_size_alltoalls +#pragma weak shmem_ptrdiff_alltoalls = pshmem_ptrdiff_alltoalls + +#pragma weak shmem_alltoallsmem = pshmem_alltoallsmem + +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_TYPE_ALLTOALL(_alltoall32, sizeof(uint32_t)) SHMEM_TYPE_ALLTOALL(_alltoall64, sizeof(uint64_t)) SHMEM_TYPE_ALLTOALLS(_alltoalls32, sizeof(uint32_t)) SHMEM_TYPE_ALLTOALLS(_alltoalls64, sizeof(uint64_t)) + + + +#define SHMEM_TYPE_TEAM_ALLTOALL(type_name, type, code, postfix) \ + int shmem##type_name##postfix(shmem_team_t team, type *dest, const type *source, size_t nelems) \ + { \ + int rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(team_alltoall( \ + team, (void*)dest, (void*)source, nelems, code)); \ + RUNTIME_CHECK_RC(rc); \ + \ + return rc; \ + } + + +SHMEM_TYPE_TEAM_ALLTOALL(_char, char, SHMEM_CHAR, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_short, short, SHMEM_SHORT, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_int, int, SHMEM_INT, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_long, long, SHMEM_LONG, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_float, float, SHMEM_FLOAT, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_double, double, SHMEM_DOUBLE, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_longlong, long long, SHMEM_LLONG, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_schar, signed char, SHMEM_SCHAR, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_uchar, unsigned char, SHMEM_UCHAR, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_ushort, unsigned short, SHMEM_USHORT, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_uint, unsigned int, SHMEM_UINT, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_ulong, unsigned long, SHMEM_ULONG, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_ulonglong, unsigned long long, SHMEM_ULLONG, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_longdouble, long double, SHMEM_LDOUBLE, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_int8, int8_t, SHMEM_INT8_T, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_int16, int16_t, SHMEM_INT16_T, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_int32, int32_t, SHMEM_INT32_T, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_int64, int64_t, SHMEM_INT64_T, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_uint8, uint8_t, SHMEM_UINT8_T, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_uint16, uint16_t, SHMEM_UINT16_T, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_uint32, uint32_t, SHMEM_UINT32_T, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_uint64, uint64_t, SHMEM_UINT64_T, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_size, size_t, SHMEM_SIZE_T, _alltoall) +SHMEM_TYPE_TEAM_ALLTOALL(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, _alltoall) + +SHMEM_TYPE_TEAM_ALLTOALL(, void, SHMEM_BYTE, _alltoallmem) + + +#define SHMEM_TYPE_TEAM_ALLTOALLS(type_name, type, code, postfix) \ + int shmem##type_name##postfix(shmem_team_t team, type *dest, const type *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems) \ + { \ + int rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(team_alltoalls( \ + team, (void*)dest, (void*)source, \ + dst, sst, nelems, code)); \ + RUNTIME_CHECK_RC(rc); \ + \ + return rc; \ + } + + +SHMEM_TYPE_TEAM_ALLTOALLS(_char, char, SHMEM_CHAR, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_short, short, SHMEM_SHORT, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_int, int, SHMEM_INT, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_long, long, SHMEM_LONG, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_float, float, SHMEM_FLOAT, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_double, double, SHMEM_DOUBLE, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_longlong, long long, SHMEM_LLONG, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_schar, signed char, SHMEM_SCHAR, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_uchar, unsigned char, SHMEM_UCHAR, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_ushort, unsigned short, SHMEM_USHORT, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_uint, unsigned int, SHMEM_UINT, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_ulong, unsigned long, SHMEM_ULONG, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_ulonglong, unsigned long long, SHMEM_ULLONG, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_longdouble, long double, SHMEM_LDOUBLE, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_int8, int8_t, SHMEM_INT8_T, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_int16, int16_t, SHMEM_INT16_T, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_int32, int32_t, SHMEM_INT32_T, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_int64, int64_t, SHMEM_INT64_T, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_uint8, uint8_t, SHMEM_UINT8_T, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_uint16, uint16_t, SHMEM_UINT16_T, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_uint32, uint32_t, SHMEM_UINT32_T, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_uint64, uint64_t, SHMEM_UINT64_T, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_size, size_t, SHMEM_SIZE_T, _alltoalls) +SHMEM_TYPE_TEAM_ALLTOALLS(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, _alltoalls) + +SHMEM_TYPE_TEAM_ALLTOALLS(, void, SHMEM_BYTE, _alltoallsmem) diff --git a/oshmem/shmem/c/shmem_and.c b/oshmem/shmem/c/shmem_and.c index 0f4c5be9d39..eb271adb62e 100644 --- a/oshmem/shmem/c/shmem_and.c +++ b/oshmem/shmem/c/shmem_and.c @@ -51,7 +51,7 @@ #pragma weak shmemx_int64_atomic_and = pshmemx_int64_atomic_and #pragma weak shmemx_uint32_atomic_and = pshmemx_uint32_atomic_and #pragma weak shmemx_uint64_atomic_and = pshmemx_uint64_atomic_and -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif OSHMEM_TYPE_OP(int, int, shmem, and) diff --git a/oshmem/shmem/c/shmem_barrier.c b/oshmem/shmem/c/shmem_barrier.c index 7ce0ddc96f7..8ef2458236b 100644 --- a/oshmem/shmem/c/shmem_barrier.c +++ b/oshmem/shmem/c/shmem_barrier.c @@ -24,7 +24,7 @@ #include "oshmem/include/pshmem.h" #pragma weak shmem_barrier = pshmem_barrier #pragma weak shmem_barrier_all = pshmem_barrier_all -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync) diff --git a/oshmem/shmem/c/shmem_broadcast.c b/oshmem/shmem/c/shmem_broadcast.c index fea737825da..62f31b109b1 100644 --- a/oshmem/shmem/c/shmem_broadcast.c +++ b/oshmem/shmem/c/shmem_broadcast.c @@ -69,7 +69,7 @@ static void _shmem_broadcast(void *target, } /* Define actual PE using relative in active set */ - PE_root = oshmem_proc_pe(group->proc_array[PE_root]); + PE_root = oshmem_proc_pe_vpid(group, PE_root); /* Call collective broadcast operation */ rc = group->g_scoll.scoll_broadcast(group, @@ -90,8 +90,82 @@ static void _shmem_broadcast(void *target, #include "oshmem/include/pshmem.h" #pragma weak shmem_broadcast32 = pshmem_broadcast32 #pragma weak shmem_broadcast64 = pshmem_broadcast64 -#include "oshmem/shmem/c/profile/defines.h" + +/* Teams broadcast */ +#pragma weak shmem_char_broadcast = pshmem_char_broadcast +#pragma weak shmem_short_broadcast = pshmem_short_broadcast +#pragma weak shmem_int_broadcast = pshmem_int_broadcast +#pragma weak shmem_long_broadcast = pshmem_long_broadcast +#pragma weak shmem_float_broadcast = pshmem_float_broadcast +#pragma weak shmem_double_broadcast = pshmem_double_broadcast +#pragma weak shmem_longlong_broadcast = pshmem_longlong_broadcast +#pragma weak shmem_schar_broadcast = pshmem_schar_broadcast +#pragma weak shmem_uchar_broadcast = pshmem_uchar_broadcast +#pragma weak shmem_ushort_broadcast = pshmem_ushort_broadcast +#pragma weak shmem_uint_broadcast = pshmem_uint_broadcast +#pragma weak shmem_ulong_broadcast = pshmem_ulong_broadcast +#pragma weak shmem_ulonglong_broadcast = pshmem_ulonglong_broadcast +#pragma weak shmem_longdouble_broadcast = pshmem_longdouble_broadcast +#pragma weak shmem_int8_broadcast = pshmem_int8_broadcast +#pragma weak shmem_int16_broadcast = pshmem_int16_broadcast +#pragma weak shmem_int32_broadcast = pshmem_int32_broadcast +#pragma weak shmem_int64_broadcast = pshmem_int64_broadcast +#pragma weak shmem_uint8_broadcast = pshmem_uint8_broadcast +#pragma weak shmem_uint16_broadcast = pshmem_uint16_broadcast +#pragma weak shmem_uint32_broadcast = pshmem_uint32_broadcast +#pragma weak shmem_uint64_broadcast = pshmem_uint64_broadcast +#pragma weak shmem_size_broadcast = pshmem_size_broadcast +#pragma weak shmem_ptrdiff_broadcast = pshmem_ptrdiff_broadcast + +#pragma weak shmem_broadcastmem = pshmem_broadcastmem + +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_TYPE_BROADCAST(_broadcast32, sizeof(uint32_t)) SHMEM_TYPE_BROADCAST(_broadcast64, sizeof(uint64_t)) + + + +#define SHMEM_TYPE_TEAM_BROADCAST(type_name, type, code, postfix) \ + int shmem##type_name##postfix(shmem_team_t team, type *dest, const type *source, size_t nelems, int PE_root) \ + { \ + int rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(team_broadcast( \ + team, (void*)dest, (void*)source, \ + nelems, PE_root, code)); \ + RUNTIME_CHECK_RC(rc); \ + \ + return rc; \ + } + + +SHMEM_TYPE_TEAM_BROADCAST(_char, char, SHMEM_CHAR, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_short, short, SHMEM_SHORT, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_int, int, SHMEM_INT, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_long, long, SHMEM_LONG, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_float, float, SHMEM_FLOAT, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_double, double, SHMEM_DOUBLE, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_longlong, long long, SHMEM_LLONG, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_schar, signed char, SHMEM_SCHAR, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_uchar, unsigned char, SHMEM_UCHAR, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_ushort, unsigned short, SHMEM_USHORT, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_uint, unsigned int, SHMEM_UINT, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_ulong, unsigned long, SHMEM_ULONG, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_ulonglong, unsigned long long, SHMEM_ULLONG, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_longdouble, long double, SHMEM_LDOUBLE, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_int8, int8_t, SHMEM_INT8_T, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_int16, int16_t, SHMEM_INT16_T, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_int32, int32_t, SHMEM_INT32_T, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_int64, int64_t, SHMEM_INT64_T, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_uint8, uint8_t, SHMEM_UINT8_T, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_uint16, uint16_t, SHMEM_UINT16_T, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_uint32, uint32_t, SHMEM_UINT32_T, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_uint64, uint64_t, SHMEM_UINT64_T, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_size, size_t, SHMEM_SIZE_T, _broadcast) +SHMEM_TYPE_TEAM_BROADCAST(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, _broadcast) + +SHMEM_TYPE_TEAM_BROADCAST(, void, SHMEM_BYTE, _broadcastmem) diff --git a/oshmem/shmem/c/shmem_clear_cache_inv.c b/oshmem/shmem/c/shmem_clear_cache_inv.c index f9d084b3a61..c80cf7e6feb 100644 --- a/oshmem/shmem/c/shmem_clear_cache_inv.c +++ b/oshmem/shmem/c/shmem_clear_cache_inv.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,17 +20,17 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_clear_cache_inv = pshmem_clear_cache_inv -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_clear_cache_inv(void) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_clear_cache_inv is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_clear_cache_line_inv.c b/oshmem/shmem/c/shmem_clear_cache_line_inv.c index 3ce8af8fc8f..720b2d6ed20 100644 --- a/oshmem/shmem/c/shmem_clear_cache_line_inv.c +++ b/oshmem/shmem/c/shmem_clear_cache_line_inv.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,17 +20,17 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_clear_cache_line_inv = pshmem_clear_cache_line_inv -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_clear_cache_line_inv(void *target) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_clear_cache_line_inv is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_clear_lock.c b/oshmem/shmem/c/shmem_clear_lock.c index 2b731d73477..3051047a686 100644 --- a/oshmem/shmem/c/shmem_clear_lock.c +++ b/oshmem/shmem/c/shmem_clear_lock.c @@ -22,7 +22,7 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_clear_lock = pshmem_clear_lock -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_clear_lock(volatile long *lock) diff --git a/oshmem/shmem/c/shmem_collect.c b/oshmem/shmem/c/shmem_collect.c index 8d14a80ffa1..dd347cbe4a4 100644 --- a/oshmem/shmem/c/shmem_collect.c +++ b/oshmem/shmem/c/shmem_collect.c @@ -80,10 +80,162 @@ static void _shmem_collect(void *target, #pragma weak shmem_collect64 = pshmem_collect64 #pragma weak shmem_fcollect32 = pshmem_fcollect32 #pragma weak shmem_fcollect64 = pshmem_fcollect64 -#include "oshmem/shmem/c/profile/defines.h" + +/* Teams collect */ +#pragma weak shmem_char_collect = pshmem_char_collect +#pragma weak shmem_short_collect = pshmem_short_collect +#pragma weak shmem_int_collect = pshmem_int_collect +#pragma weak shmem_long_collect = pshmem_long_collect +#pragma weak shmem_float_collect = pshmem_float_collect +#pragma weak shmem_double_collect = pshmem_double_collect +#pragma weak shmem_longlong_collect = pshmem_longlong_collect +#pragma weak shmem_schar_collect = pshmem_schar_collect +#pragma weak shmem_uchar_collect = pshmem_uchar_collect +#pragma weak shmem_ushort_collect = pshmem_ushort_collect +#pragma weak shmem_uint_collect = pshmem_uint_collect +#pragma weak shmem_ulong_collect = pshmem_ulong_collect +#pragma weak shmem_ulonglong_collect = pshmem_ulonglong_collect +#pragma weak shmem_longdouble_collect = pshmem_longdouble_collect +#pragma weak shmem_int8_collect = pshmem_int8_collect +#pragma weak shmem_int16_collect = pshmem_int16_collect +#pragma weak shmem_int32_collect = pshmem_int32_collect +#pragma weak shmem_int64_collect = pshmem_int64_collect +#pragma weak shmem_uint8_collect = pshmem_uint8_collect +#pragma weak shmem_uint16_collect = pshmem_uint16_collect +#pragma weak shmem_uint32_collect = pshmem_uint32_collect +#pragma weak shmem_uint64_collect = pshmem_uint64_collect +#pragma weak shmem_size_collect = pshmem_size_collect +#pragma weak shmem_ptrdiff_collect = pshmem_ptrdiff_collect + +#pragma weak shmem_collectmem = pshmem_collectmem + + +/* Teams fcollect */ +#pragma weak shmem_char_fcollect = pshmem_char_fcollect +#pragma weak shmem_short_fcollect = pshmem_short_fcollect +#pragma weak shmem_int_fcollect = pshmem_int_fcollect +#pragma weak shmem_long_fcollect = pshmem_long_fcollect +#pragma weak shmem_float_fcollect = pshmem_float_fcollect +#pragma weak shmem_double_fcollect = pshmem_double_fcollect +#pragma weak shmem_longlong_fcollect = pshmem_longlong_fcollect +#pragma weak shmem_schar_fcollect = pshmem_schar_fcollect +#pragma weak shmem_uchar_fcollect = pshmem_uchar_fcollect +#pragma weak shmem_ushort_fcollect = pshmem_ushort_fcollect +#pragma weak shmem_uint_fcollect = pshmem_uint_fcollect +#pragma weak shmem_ulong_fcollect = pshmem_ulong_fcollect +#pragma weak shmem_ulonglong_fcollect = pshmem_ulonglong_fcollect +#pragma weak shmem_longdouble_fcollect = pshmem_longdouble_fcollect +#pragma weak shmem_int8_fcollect = pshmem_int8_fcollect +#pragma weak shmem_int16_fcollect = pshmem_int16_fcollect +#pragma weak shmem_int32_fcollect = pshmem_int32_fcollect +#pragma weak shmem_int64_fcollect = pshmem_int64_fcollect +#pragma weak shmem_uint8_fcollect = pshmem_uint8_fcollect +#pragma weak shmem_uint16_fcollect = pshmem_uint16_fcollect +#pragma weak shmem_uint32_fcollect = pshmem_uint32_fcollect +#pragma weak shmem_uint64_fcollect = pshmem_uint64_fcollect +#pragma weak shmem_size_fcollect = pshmem_size_fcollect +#pragma weak shmem_ptrdiff_fcollect = pshmem_ptrdiff_fcollect + +#pragma weak shmem_fcollectmem = pshmem_fcollectmem + + + +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_TYPE_COLLECT(_collect32, sizeof(uint32_t), false) SHMEM_TYPE_COLLECT(_collect64, sizeof(uint64_t), false) SHMEM_TYPE_COLLECT(_fcollect32, sizeof(uint32_t), true) SHMEM_TYPE_COLLECT(_fcollect64, sizeof(uint64_t), true) + + +#define SHMEM_TYPE_TEAM_COLLECT(type_name, type, code, postfix) \ + int shmem##type_name##postfix(shmem_team_t team, type *dest, const type *source, size_t nelems) \ + { \ + int rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(team_collect( \ + team, (void*)dest, (void*)source, \ + nelems, code)); \ + RUNTIME_CHECK_RC(rc); \ + \ + return rc; \ + } + + +SHMEM_TYPE_TEAM_COLLECT(_char, char, SHMEM_CHAR, _collect) +SHMEM_TYPE_TEAM_COLLECT(_short, short, SHMEM_SHORT, _collect) +SHMEM_TYPE_TEAM_COLLECT(_int, int, SHMEM_INT, _collect) +SHMEM_TYPE_TEAM_COLLECT(_long, long, SHMEM_LONG, _collect) +SHMEM_TYPE_TEAM_COLLECT(_float, float, SHMEM_FLOAT, _collect) +SHMEM_TYPE_TEAM_COLLECT(_double, double, SHMEM_DOUBLE, _collect) +SHMEM_TYPE_TEAM_COLLECT(_longlong, long long, SHMEM_LLONG, _collect) +SHMEM_TYPE_TEAM_COLLECT(_schar, signed char, SHMEM_SCHAR, _collect) +SHMEM_TYPE_TEAM_COLLECT(_uchar, unsigned char, SHMEM_UCHAR, _collect) +SHMEM_TYPE_TEAM_COLLECT(_ushort, unsigned short, SHMEM_USHORT, _collect) +SHMEM_TYPE_TEAM_COLLECT(_uint, unsigned int, SHMEM_UINT, _collect) +SHMEM_TYPE_TEAM_COLLECT(_ulong, unsigned long, SHMEM_ULONG, _collect) +SHMEM_TYPE_TEAM_COLLECT(_ulonglong, unsigned long long, SHMEM_ULLONG, _collect) +SHMEM_TYPE_TEAM_COLLECT(_longdouble, long double, SHMEM_LDOUBLE, _collect) +SHMEM_TYPE_TEAM_COLLECT(_int8, int8_t, SHMEM_INT8_T, _collect) +SHMEM_TYPE_TEAM_COLLECT(_int16, int16_t, SHMEM_INT16_T, _collect) +SHMEM_TYPE_TEAM_COLLECT(_int32, int32_t, SHMEM_INT32_T, _collect) +SHMEM_TYPE_TEAM_COLLECT(_int64, int64_t, SHMEM_INT64_T, _collect) +SHMEM_TYPE_TEAM_COLLECT(_uint8, uint8_t, SHMEM_UINT8_T, _collect) +SHMEM_TYPE_TEAM_COLLECT(_uint16, uint16_t, SHMEM_UINT16_T, _collect) +SHMEM_TYPE_TEAM_COLLECT(_uint32, uint32_t, SHMEM_UINT32_T, _collect) +SHMEM_TYPE_TEAM_COLLECT(_uint64, uint64_t, SHMEM_UINT64_T, _collect) +SHMEM_TYPE_TEAM_COLLECT(_size, size_t, SHMEM_SIZE_T, _collect) +SHMEM_TYPE_TEAM_COLLECT(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, _collect) + +SHMEM_TYPE_TEAM_COLLECT(, void, SHMEM_BYTE, _collectmem) + + + + +#define SHMEM_TYPE_TEAM_FCOLLECT(type_name, type, code, postfix) \ + int shmem##type_name##postfix(shmem_team_t team, type *dest, const type *source, size_t nelems) \ + { \ + int rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(team_fcollect( \ + team, (void*)dest, (void*)source, \ + nelems, code)); \ + RUNTIME_CHECK_RC(rc); \ + \ + return rc; \ + } + + + +SHMEM_TYPE_TEAM_FCOLLECT(_char, char, SHMEM_CHAR, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_short, short, SHMEM_SHORT, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_int, int, SHMEM_INT, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_long, long, SHMEM_LONG, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_float, float, SHMEM_FLOAT, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_double, double, SHMEM_DOUBLE, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_longlong, long long, SHMEM_LLONG, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_schar, signed char, SHMEM_SCHAR, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_uchar, unsigned char, SHMEM_UCHAR, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_ushort, unsigned short, SHMEM_USHORT, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_uint, unsigned int, SHMEM_UINT, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_ulong, unsigned long, SHMEM_ULONG, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_ulonglong, unsigned long long, SHMEM_ULLONG, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_longdouble, long double, SHMEM_LDOUBLE, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_int8, int8_t, SHMEM_INT8_T, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_int16, int16_t, SHMEM_INT16_T, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_int32, int32_t, SHMEM_INT32_T, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_int64, int64_t, SHMEM_INT64_T, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_uint8, uint8_t, SHMEM_UINT8_T, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_uint16, uint16_t, SHMEM_UINT16_T, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_uint32, uint32_t, SHMEM_UINT32_T, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_uint64, uint64_t, SHMEM_UINT64_T, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_size, size_t, SHMEM_SIZE_T, _fcollect) +SHMEM_TYPE_TEAM_FCOLLECT(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, _fcollect) + +SHMEM_TYPE_TEAM_FCOLLECT(, void, SHMEM_BYTE, _fcollectmem) + diff --git a/oshmem/shmem/c/shmem_context.c b/oshmem/shmem/c/shmem_context.c index d82736568f0..a73731bb977 100644 --- a/oshmem/shmem/c/shmem_context.c +++ b/oshmem/shmem/c/shmem_context.c @@ -27,7 +27,7 @@ #include "oshmem/include/pshmem.h" #pragma weak shmem_ctx_create = pshmem_ctx_create #pragma weak shmem_ctx_destroy = pshmem_ctx_destroy -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif int shmem_ctx_create(long options, shmem_ctx_t *ctx) diff --git a/oshmem/shmem/c/shmem_cswap.c b/oshmem/shmem/c/shmem_cswap.c index 99e3b1f9420..a80511af253 100644 --- a/oshmem/shmem/c/shmem_cswap.c +++ b/oshmem/shmem/c/shmem_cswap.c @@ -71,6 +71,12 @@ #pragma weak shmem_ctx_int_atomic_compare_swap = pshmem_ctx_int_atomic_compare_swap #pragma weak shmem_ctx_long_atomic_compare_swap = pshmem_ctx_long_atomic_compare_swap #pragma weak shmem_ctx_longlong_atomic_compare_swap = pshmem_ctx_longlong_atomic_compare_swap +#pragma weak shmem_ctx_int32_atomic_compare_swap = pshmem_ctx_int32_atomic_compare_swap +#pragma weak shmem_ctx_int64_atomic_compare_swap = pshmem_ctx_int64_atomic_compare_swap +#pragma weak shmem_ctx_uint32_atomic_compare_swap = pshmem_ctx_uint32_atomic_compare_swap +#pragma weak shmem_ctx_uint64_atomic_compare_swap = pshmem_ctx_uint64_atomic_compare_swap +#pragma weak shmem_ctx_size_atomic_compare_swap = pshmem_ctx_size_atomic_compare_swap +#pragma weak shmem_ctx_ptrdiff_atomic_compare_swap = pshmem_ctx_ptrdiff_atomic_compare_swap #pragma weak shmem_int_atomic_compare_swap = pshmem_int_atomic_compare_swap #pragma weak shmem_long_atomic_compare_swap = pshmem_long_atomic_compare_swap @@ -78,6 +84,12 @@ #pragma weak shmem_uint_atomic_compare_swap = pshmem_uint_atomic_compare_swap #pragma weak shmem_ulong_atomic_compare_swap = pshmem_ulong_atomic_compare_swap #pragma weak shmem_ulonglong_atomic_compare_swap = pshmem_ulonglong_atomic_compare_swap +#pragma weak shmem_int32_atomic_compare_swap = pshmem_int32_atomic_compare_swap +#pragma weak shmem_int64_atomic_compare_swap = pshmem_int64_atomic_compare_swap +#pragma weak shmem_uint32_atomic_compare_swap = pshmem_uint32_atomic_compare_swap +#pragma weak shmem_uint64_atomic_compare_swap = pshmem_uint64_atomic_compare_swap +#pragma weak shmem_size_atomic_compare_swap = pshmem_size_atomic_compare_swap +#pragma weak shmem_ptrdiff_atomic_compare_swap = pshmem_ptrdiff_atomic_compare_swap #pragma weak shmem_int_cswap = pshmem_int_cswap #pragma weak shmem_long_cswap = pshmem_long_cswap @@ -85,7 +97,8 @@ #pragma weak shmemx_int32_cswap = pshmemx_int32_cswap #pragma weak shmemx_int64_cswap = pshmemx_int64_cswap -#include "oshmem/shmem/c/profile/defines.h" + +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_int, int, shmem) @@ -94,12 +107,24 @@ SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_longlong, long long, shmem) SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_uint, unsigned int, shmem) SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_ulong, unsigned long, shmem) SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_ulonglong, unsigned long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_ptrdiff, ptrdiff_t, shmem) SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_int, int, shmem) SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_long, long, shmem) SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_longlong, long long, shmem) SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_uint, unsigned int, shmem) SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_ulong, unsigned long, shmem) SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_ptrdiff, ptrdiff_t, shmem) /* deprecated APIs */ #define SHMEM_TYPE_CSWAP(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_cswap_nb.c b/oshmem/shmem/c/shmem_cswap_nb.c new file mode 100644 index 00000000000..fe69bf54302 --- /dev/null +++ b/oshmem/shmem/c/shmem_cswap_nb.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/mca/atomic/atomic.h" + +/* + * shmem_cswap performs an nonblocking atomic conditional swap operation. + * The conditional swap routines write value to address target on PE pe, and return the previous + * contents of target. The replacement must occur only if cond is equal to target; + * otherwise target is left unchanged. In either case, the routine must return the initial value + * of target. The operation must be completed without the possibility of another process updating + * target between the time of the fetch and the update. + */ +#define DO_SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(ctx, type, fetch, target, cond, value, pe, out_value) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(target); \ + \ + size = sizeof(value); \ + rc = MCA_ATOMIC_CALL(cswap_nb( \ + ctx, \ + fetch, \ + (void*)target, \ + (uint64_t*)&out_value, \ + OSHMEM_ATOMIC_PTR_2_INT(&cond, sizeof(cond)), \ + OSHMEM_ATOMIC_PTR_2_INT(&value, sizeof(value)), \ + size, \ + pe)); \ + RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(type_name, type, prefix) \ + void prefix##_ctx##type_name##_atomic_compare_swap_nbi(shmem_ctx_t ctx, type *fetch, type *target, type cond, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(ctx, type, fetch, target, cond, value, \ + pe, out_value); \ + return ; \ + } + +#define SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(type_name, type, prefix) \ + void prefix##type_name##_atomic_compare_swap_nbi(type *fetch, type *target, type cond, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(oshmem_ctx_default, type, fetch, target, \ + cond, value, pe, out_value); \ + return ; \ + } + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +/* Nonblocking Atomic Compare Swap */ +#pragma weak shmem_ctx_int_atomic_compare_swap_nbi = pshmem_ctx_int_atomic_compare_swap_nbi +#pragma weak shmem_ctx_long_atomic_compare_swap_nbi = pshmem_ctx_long_atomic_compare_swap_nbi +#pragma weak shmem_ctx_longlong_atomic_compare_swap_nbi = pshmem_ctx_longlong_atomic_compare_swap_nbi +#pragma weak shmem_ctx_uint_atomic_compare_swap_nbi = pshmem_ctx_uint_atomic_compare_swap_nbi +#pragma weak shmem_ctx_ulong_atomic_compare_swap_nbi = pshmem_ctx_ulong_atomic_compare_swap_nbi +#pragma weak shmem_ctx_ulonglong_atomic_compare_swap_nbi = pshmem_ctx_ulonglong_atomic_compare_swap_nbi +#pragma weak shmem_ctx_int32_atomic_compare_swap_nbi = pshmem_ctx_int32_atomic_compare_swap_nbi +#pragma weak shmem_ctx_int64_atomic_compare_swap_nbi = pshmem_ctx_int64_atomic_compare_swap_nbi +#pragma weak shmem_ctx_uint32_atomic_compare_swap_nbi = pshmem_ctx_uint32_atomic_compare_swap_nbi +#pragma weak shmem_ctx_uint64_atomic_compare_swap_nbi = pshmem_ctx_uint64_atomic_compare_swap_nbi +#pragma weak shmem_ctx_size_atomic_compare_swap_nbi = pshmem_ctx_size_atomic_compare_swap_nbi +#pragma weak shmem_ctx_ptrdiff_atomic_compare_swap_nbi = pshmem_ctx_ptrdiff_atomic_compare_swap_nbi + +#pragma weak shmem_int_atomic_compare_swap_nbi = pshmem_int_atomic_compare_swap_nbi +#pragma weak shmem_long_atomic_compare_swap_nbi = pshmem_long_atomic_compare_swap_nbi +#pragma weak shmem_longlong_atomic_compare_swap_nbi = pshmem_longlong_atomic_compare_swap_nbi +#pragma weak shmem_uint_atomic_compare_swap_nbi = pshmem_uint_atomic_compare_swap_nbi +#pragma weak shmem_ulong_atomic_compare_swap_nbi = pshmem_ulong_atomic_compare_swap_nbi +#pragma weak shmem_ulonglong_atomic_compare_swap_nbi = pshmem_ulonglong_atomic_compare_swap_nbi +#pragma weak shmem_int32_atomic_compare_swap_nbi = pshmem_int32_atomic_compare_swap_nbi +#pragma weak shmem_int64_atomic_compare_swap_nbi = pshmem_int64_atomic_compare_swap_nbi +#pragma weak shmem_uint32_atomic_compare_swap_nbi = pshmem_uint32_atomic_compare_swap_nbi +#pragma weak shmem_uint64_atomic_compare_swap_nbi = pshmem_uint64_atomic_compare_swap_nbi +#pragma weak shmem_size_atomic_compare_swap_nbi = pshmem_size_atomic_compare_swap_nbi +#pragma weak shmem_ptrdiff_atomic_compare_swap_nbi = pshmem_ptrdiff_atomic_compare_swap_nbi + + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP_NBI(_ptrdiff, ptrdiff_t, shmem) + +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_int, int, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_long, long, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP_NBI(_ptrdiff, ptrdiff_t, shmem) + + diff --git a/oshmem/shmem/c/shmem_fadd.c b/oshmem/shmem/c/shmem_fadd.c index 33f18973aca..5b06712a570 100644 --- a/oshmem/shmem/c/shmem_fadd.c +++ b/oshmem/shmem/c/shmem_fadd.c @@ -70,6 +70,12 @@ #pragma weak shmem_ctx_uint_atomic_fetch_add = pshmem_ctx_uint_atomic_fetch_add #pragma weak shmem_ctx_ulong_atomic_fetch_add = pshmem_ctx_ulong_atomic_fetch_add #pragma weak shmem_ctx_ulonglong_atomic_fetch_add = pshmem_ctx_ulonglong_atomic_fetch_add +#pragma weak shmem_ctx_int32_atomic_fetch_add = pshmem_ctx_int32_atomic_fetch_add +#pragma weak shmem_ctx_int64_atomic_fetch_add = pshmem_ctx_int64_atomic_fetch_add +#pragma weak shmem_ctx_uint32_atomic_fetch_add = pshmem_ctx_uint32_atomic_fetch_add +#pragma weak shmem_ctx_uint64_atomic_fetch_add = pshmem_ctx_uint64_atomic_fetch_add +#pragma weak shmem_ctx_size_atomic_fetch_add = pshmem_ctx_size_atomic_fetch_add +#pragma weak shmem_ctx_ptrdiff_atomic_fetch_add = pshmem_ctx_ptrdiff_atomic_fetch_add #pragma weak shmem_int_atomic_fetch_add = pshmem_int_atomic_fetch_add #pragma weak shmem_long_atomic_fetch_add = pshmem_long_atomic_fetch_add @@ -77,6 +83,12 @@ #pragma weak shmem_uint_atomic_fetch_add = pshmem_uint_atomic_fetch_add #pragma weak shmem_ulong_atomic_fetch_add = pshmem_ulong_atomic_fetch_add #pragma weak shmem_ulonglong_atomic_fetch_add = pshmem_ulonglong_atomic_fetch_add +#pragma weak shmem_int32_atomic_fetch_add = pshmem_int32_atomic_fetch_add +#pragma weak shmem_int64_atomic_fetch_add = pshmem_int64_atomic_fetch_add +#pragma weak shmem_uint32_atomic_fetch_add = pshmem_uint32_atomic_fetch_add +#pragma weak shmem_uint64_atomic_fetch_add = pshmem_uint64_atomic_fetch_add +#pragma weak shmem_size_atomic_fetch_add = pshmem_size_atomic_fetch_add +#pragma weak shmem_ptrdiff_atomic_fetch_add = pshmem_ptrdiff_atomic_fetch_add #pragma weak shmem_int_fadd = pshmem_int_fadd #pragma weak shmem_long_fadd = pshmem_long_fadd @@ -84,7 +96,9 @@ #pragma weak shmemx_int32_fadd = pshmemx_int32_fadd #pragma weak shmemx_int64_fadd = pshmemx_int64_fadd -#include "oshmem/shmem/c/profile/defines.h" + + +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_int, int, shmem) @@ -93,12 +107,24 @@ SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_longlong, long long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_uint, unsigned int, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_ulong, unsigned long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_ulonglong, unsigned long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_ptrdiff, ptrdiff_t, shmem) SHMEM_TYPE_ATOMIC_FETCH_ADD(_int, int, shmem) SHMEM_TYPE_ATOMIC_FETCH_ADD(_long, long, shmem) SHMEM_TYPE_ATOMIC_FETCH_ADD(_longlong, long long, shmem) SHMEM_TYPE_ATOMIC_FETCH_ADD(_uint, unsigned int, shmem) SHMEM_TYPE_ATOMIC_FETCH_ADD(_ulong, unsigned long, shmem) SHMEM_TYPE_ATOMIC_FETCH_ADD(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_ptrdiff, ptrdiff_t, shmem) /* deprecated APIs */ #define SHMEM_TYPE_FADD(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_fadd_nb.c b/oshmem/shmem/c/shmem_fadd_nb.c new file mode 100644 index 00000000000..0ed6bc0e2f9 --- /dev/null +++ b/oshmem/shmem/c/shmem_fadd_nb.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform a nonblocking fetch-and-add operation. + * The fetch and add routines retrieve the value at address target on PE pe, and update + * target with the result of incrementing the retrieved value by one. The operation must be + * completed without the possibility of another process updating target between the time of + * the fetch and the update. + */ +#define DO_SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(ctx, type_name, type, fetch, target, value, pe, out_value) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(target); \ + \ + size = sizeof(out_value); \ + rc = MCA_ATOMIC_CALL(fadd_nb( \ + ctx, \ + fetch, \ + (void*)target, \ + (void*)&out_value, \ + value, \ + size, \ + pe)); \ + RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(type_name, type, prefix) \ + void prefix##_ctx##type_name##_atomic_fetch_add_nbi(shmem_ctx_t ctx, type *fetch, type *target, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(ctx, type_name, type, fetch, target, \ + value, pe, out_value); \ + return ; \ + } + +#define SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(type_name, type, prefix) \ + void prefix##type_name##_atomic_fetch_add_nbi(type *fetch, type *target, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(oshmem_ctx_default, type_name, \ + type, fetch, target, value, pe, out_value); \ + return ; \ + } + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +/* Nonblocking Atomic Fetch and Add */ +#pragma weak shmem_ctx_int_atomic_fetch_add_nbi = pshmem_ctx_int_atomic_fetch_add_nbi +#pragma weak shmem_ctx_long_atomic_fetch_add_nbi = pshmem_ctx_long_atomic_fetch_add_nbi +#pragma weak shmem_ctx_longlong_atomic_fetch_add_nbi = pshmem_ctx_longlong_atomic_fetch_add_nbi +#pragma weak shmem_ctx_uint_atomic_fetch_add_nbi = pshmem_ctx_uint_atomic_fetch_add_nbi +#pragma weak shmem_ctx_ulong_atomic_fetch_add_nbi = pshmem_ctx_ulong_atomic_fetch_add_nbi +#pragma weak shmem_ctx_ulonglong_atomic_fetch_add_nbi = pshmem_ctx_ulonglong_atomic_fetch_add_nbi +#pragma weak shmem_ctx_int32_atomic_fetch_add_nbi = pshmem_ctx_int32_atomic_fetch_add_nbi +#pragma weak shmem_ctx_int64_atomic_fetch_add_nbi = pshmem_ctx_int64_atomic_fetch_add_nbi +#pragma weak shmem_ctx_uint32_atomic_fetch_add_nbi = pshmem_ctx_uint32_atomic_fetch_add_nbi +#pragma weak shmem_ctx_uint64_atomic_fetch_add_nbi = pshmem_ctx_uint64_atomic_fetch_add_nbi +#pragma weak shmem_ctx_size_atomic_fetch_add_nbi = pshmem_ctx_size_atomic_fetch_add_nbi +#pragma weak shmem_ctx_ptrdiff_atomic_fetch_add_nbi = pshmem_ctx_ptrdiff_atomic_fetch_add_nbi + +#pragma weak shmem_int_atomic_fetch_add_nbi = pshmem_int_atomic_fetch_add_nbi +#pragma weak shmem_long_atomic_fetch_add_nbi = pshmem_long_atomic_fetch_add_nbi +#pragma weak shmem_longlong_atomic_fetch_add_nbi = pshmem_longlong_atomic_fetch_add_nbi +#pragma weak shmem_uint_atomic_fetch_add_nbi = pshmem_uint_atomic_fetch_add_nbi +#pragma weak shmem_ulong_atomic_fetch_add_nbi = pshmem_ulong_atomic_fetch_add_nbi +#pragma weak shmem_ulonglong_atomic_fetch_add_nbi = pshmem_ulonglong_atomic_fetch_add_nbi +#pragma weak shmem_int32_atomic_fetch_add_nbi = pshmem_int32_atomic_fetch_add_nbi +#pragma weak shmem_int64_atomic_fetch_add_nbi = pshmem_int64_atomic_fetch_add_nbi +#pragma weak shmem_uint32_atomic_fetch_add_nbi = pshmem_uint32_atomic_fetch_add_nbi +#pragma weak shmem_uint64_atomic_fetch_add_nbi = pshmem_uint64_atomic_fetch_add_nbi +#pragma weak shmem_size_atomic_fetch_add_nbi = pshmem_size_atomic_fetch_add_nbi +#pragma weak shmem_ptrdiff_atomic_fetch_add_nbi = pshmem_ptrdiff_atomic_fetch_add_nbi + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD_NBI(_ptrdiff, ptrdiff_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_int, int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_long, long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD_NBI(_ptrdiff, ptrdiff_t, shmem) diff --git a/oshmem/shmem/c/shmem_fand.c b/oshmem/shmem/c/shmem_fand.c index 6761844f291..7bbf5952d1e 100644 --- a/oshmem/shmem/c/shmem_fand.c +++ b/oshmem/shmem/c/shmem_fand.c @@ -53,7 +53,8 @@ #pragma weak shmemx_int64_atomic_fetch_and = pshmemx_int64_atomic_fetch_and #pragma weak shmemx_uint32_atomic_fetch_and = pshmemx_uint32_atomic_fetch_and #pragma weak shmemx_uint64_atomic_fetch_and = pshmemx_uint64_atomic_fetch_and -#include "oshmem/shmem/c/profile/defines.h" + +#include "oshmem/shmem/c/profile-defines.h" #endif OSHMEM_TYPE_FOP(int, int, shmem, and) diff --git a/oshmem/shmem/c/shmem_fand_nb.c b/oshmem/shmem/c/shmem_fand_nb.c new file mode 100644 index 00000000000..ded0df46928 --- /dev/null +++ b/oshmem/shmem/c/shmem_fand_nb.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an atomic nonblocking fetch-and-and operation. + * The fetch and and routines retrieve the value at address target on PE pe, and update + * target with the result of 'and' operation value to the retrieved value. The operation + * must be completed without the possibility of another process updating target between + * the time of the fetch and the update. + */ +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +/* Nonblocking Atomic Fetch and And */ +#pragma weak shmem_ctx_uint_atomic_fetch_and_nbi = pshmem_ctx_uint_atomic_fetch_and_nbi +#pragma weak shmem_ctx_ulong_atomic_fetch_and_nbi = pshmem_ctx_ulong_atomic_fetch_and_nbi +#pragma weak shmem_ctx_ulonglong_atomic_fetch_and_nbi = pshmem_ctx_ulonglong_atomic_fetch_and_nbi +#pragma weak shmem_ctx_int32_atomic_fetch_and_nbi = pshmem_ctx_int32_atomic_fetch_and_nbi +#pragma weak shmem_ctx_int64_atomic_fetch_and_nbi = pshmem_ctx_int64_atomic_fetch_and_nbi +#pragma weak shmem_ctx_uint32_atomic_fetch_and_nbi = pshmem_ctx_uint32_atomic_fetch_and_nbi +#pragma weak shmem_ctx_uint64_atomic_fetch_and_nbi = pshmem_ctx_uint64_atomic_fetch_and_nbi + +#pragma weak shmem_uint_atomic_fetch_and_nbi = pshmem_uint_atomic_fetch_and_nbi +#pragma weak shmem_ulong_atomic_fetch_and_nbi = pshmem_ulong_atomic_fetch_and_nbi +#pragma weak shmem_ulonglong_atomic_fetch_and_nbi = pshmem_ulonglong_atomic_fetch_and_nbi +#pragma weak shmem_int32_atomic_fetch_and_nbi = pshmem_int32_atomic_fetch_and_nbi +#pragma weak shmem_int64_atomic_fetch_and_nbi = pshmem_int64_atomic_fetch_and_nbi +#pragma weak shmem_uint32_atomic_fetch_and_nbi = pshmem_uint32_atomic_fetch_and_nbi +#pragma weak shmem_uint64_atomic_fetch_and_nbi = pshmem_uint64_atomic_fetch_and_nbi + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +OSHMEM_TYPE_FOP_NBI(uint, unsigned int, shmem, and) +OSHMEM_TYPE_FOP_NBI(ulong, unsigned long, shmem, and) +OSHMEM_TYPE_FOP_NBI(ulonglong, unsigned long long, shmem, and) +OSHMEM_TYPE_FOP_NBI(int32, int32_t, shmem, and) +OSHMEM_TYPE_FOP_NBI(int64, int64_t, shmem, and) +OSHMEM_TYPE_FOP_NBI(uint32, uint32_t, shmem, and) +OSHMEM_TYPE_FOP_NBI(uint64, uint64_t, shmem, and) + +OSHMEM_CTX_TYPE_FOP_NBI(uint, unsigned int, shmem, and) +OSHMEM_CTX_TYPE_FOP_NBI(ulong, unsigned long, shmem, and) +OSHMEM_CTX_TYPE_FOP_NBI(ulonglong, unsigned long long, shmem, and) +OSHMEM_CTX_TYPE_FOP_NBI(int32, int32_t, shmem, and) +OSHMEM_CTX_TYPE_FOP_NBI(int64, int64_t, shmem, and) +OSHMEM_CTX_TYPE_FOP_NBI(uint32, uint32_t, shmem, and) +OSHMEM_CTX_TYPE_FOP_NBI(uint64, uint64_t, shmem, and) + diff --git a/oshmem/shmem/c/shmem_fence.c b/oshmem/shmem/c/shmem_fence.c index 0a049bee57b..d1fc2b442aa 100644 --- a/oshmem/shmem/c/shmem_fence.c +++ b/oshmem/shmem/c/shmem_fence.c @@ -18,7 +18,7 @@ #include "oshmem/include/pshmem.h" #pragma weak shmem_fence = pshmem_fence #pragma weak shmem_ctx_fence = pshmem_ctx_fence -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_fence(void) diff --git a/oshmem/shmem/c/shmem_fetch.c b/oshmem/shmem/c/shmem_fetch.c index fe02d5a16e5..58092eda8c6 100644 --- a/oshmem/shmem/c/shmem_fetch.c +++ b/oshmem/shmem/c/shmem_fetch.c @@ -73,6 +73,12 @@ #pragma weak shmem_ctx_ulonglong_atomic_fetch = pshmem_ctx_ulonglong_atomic_fetch #pragma weak shmem_ctx_double_atomic_fetch = pshmem_ctx_double_atomic_fetch #pragma weak shmem_ctx_float_atomic_fetch = pshmem_ctx_float_atomic_fetch +#pragma weak shmem_ctx_int32_atomic_fetch = pshmem_ctx_int32_atomic_fetch +#pragma weak shmem_ctx_int64_atomic_fetch = pshmem_ctx_int64_atomic_fetch +#pragma weak shmem_ctx_uint32_atomic_fetch = pshmem_ctx_uint32_atomic_fetch +#pragma weak shmem_ctx_uint64_atomic_fetch = pshmem_ctx_uint64_atomic_fetch +#pragma weak shmem_ctx_size_atomic_fetch = pshmem_ctx_size_atomic_fetch +#pragma weak shmem_ctx_ptrdiff_atomic_fetch = pshmem_ctx_ptrdiff_atomic_fetch #pragma weak shmem_int_atomic_fetch = pshmem_int_atomic_fetch #pragma weak shmem_long_atomic_fetch = pshmem_long_atomic_fetch @@ -82,6 +88,12 @@ #pragma weak shmem_ulonglong_atomic_fetch = pshmem_ulonglong_atomic_fetch #pragma weak shmem_double_atomic_fetch = pshmem_double_atomic_fetch #pragma weak shmem_float_atomic_fetch = pshmem_float_atomic_fetch +#pragma weak shmem_int32_atomic_fetch = pshmem_int32_atomic_fetch +#pragma weak shmem_int64_atomic_fetch = pshmem_int64_atomic_fetch +#pragma weak shmem_uint32_atomic_fetch = pshmem_uint32_atomic_fetch +#pragma weak shmem_uint64_atomic_fetch = pshmem_uint64_atomic_fetch +#pragma weak shmem_size_atomic_fetch = pshmem_size_atomic_fetch +#pragma weak shmem_ptrdiff_atomic_fetch = pshmem_ptrdiff_atomic_fetch #pragma weak shmem_int_fetch = pshmem_int_fetch #pragma weak shmem_long_fetch = pshmem_long_fetch @@ -91,7 +103,12 @@ #pragma weak shmemx_int32_fetch = pshmemx_int32_fetch #pragma weak shmemx_int64_fetch = pshmemx_int64_fetch -#include "oshmem/shmem/c/profile/defines.h" + +#include "oshmem/shmem/c/profile-defines.h" + + + + #endif SHMEM_CTX_TYPE_ATOMIC_FETCH(_int, int, shmem) @@ -102,6 +119,12 @@ SHMEM_CTX_TYPE_ATOMIC_FETCH(_ulong, unsigned long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH(_ulonglong, unsigned long long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH(_double, double, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH(_float, float, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_ptrdiff, ptrdiff_t, shmem) SHMEM_TYPE_ATOMIC_FETCH(_int, int, shmem) SHMEM_TYPE_ATOMIC_FETCH(_long, long, shmem) SHMEM_TYPE_ATOMIC_FETCH(_longlong, long long, shmem) @@ -110,6 +133,12 @@ SHMEM_TYPE_ATOMIC_FETCH(_ulong, unsigned long, shmem) SHMEM_TYPE_ATOMIC_FETCH(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_ATOMIC_FETCH(_double, double, shmem) SHMEM_TYPE_ATOMIC_FETCH(_float, float, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_ptrdiff, ptrdiff_t, shmem) /* deprecated APIs */ #define SHMEM_TYPE_FETCH(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_fetch_nb.c b/oshmem/shmem/c/shmem_fetch_nb.c new file mode 100644 index 00000000000..9fe1b31be9f --- /dev/null +++ b/oshmem/shmem/c/shmem_fetch_nb.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an nonblocking atomic fetch operation. + * The fetch routines retrieve the value at address target on PE pe. + * The operation must be completed without the possibility of another process + * updating target during the fetch. + */ +#define DO_SHMEM_TYPE_ATOMIC_FETCH_NBI(ctx, type_name, type, fetch, target, pe, out_value) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + type value = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(target); \ + \ + size = sizeof(out_value); \ + rc = MCA_ATOMIC_CALL(fadd_nb( \ + ctx, \ + fetch, \ + (void*)target, \ + (void*)&out_value, \ + value, \ + size, \ + pe)); \ + RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(type_name, type, prefix) \ + void prefix##_ctx##type_name##_atomic_fetch_nbi(shmem_ctx_t ctx, type *fetch, const type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_NBI(ctx, type_name, type, fetch, target, \ + pe, out_value); \ + return ; \ + } + +#define SHMEM_TYPE_ATOMIC_FETCH_NBI(type_name, type, prefix) \ + void prefix##type_name##_atomic_fetch_nbi(type *fetch, const type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_NBI(oshmem_ctx_default, type_name, \ + type, fetch, target, pe, out_value); \ + \ + return ; \ + } + + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +/* Nonblocking Atomic Fetch */ +#pragma weak shmem_ctx_double_atomic_fetch_nbi = pshmem_ctx_double_atomic_fetch_nbi +#pragma weak shmem_ctx_float_atomic_fetch_nbi = pshmem_ctx_float_atomic_fetch_nbi +#pragma weak shmem_ctx_int_atomic_fetch_nbi = pshmem_ctx_int_atomic_fetch_nbi +#pragma weak shmem_ctx_long_atomic_fetch_nbi = pshmem_ctx_long_atomic_fetch_nbi +#pragma weak shmem_ctx_longlong_atomic_fetch_nbi = pshmem_ctx_longlong_atomic_fetch_nbi +#pragma weak shmem_ctx_uint_atomic_fetch_nbi = pshmem_ctx_uint_atomic_fetch_nbi +#pragma weak shmem_ctx_ulong_atomic_fetch_nbi = pshmem_ctx_ulong_atomic_fetch_nbi +#pragma weak shmem_ctx_ulonglong_atomic_fetch_nbi = pshmem_ctx_ulonglong_atomic_fetch_nbi +#pragma weak shmem_ctx_int32_atomic_fetch_nbi = pshmem_ctx_int32_atomic_fetch_nbi +#pragma weak shmem_ctx_int64_atomic_fetch_nbi = pshmem_ctx_int64_atomic_fetch_nbi +#pragma weak shmem_ctx_uint32_atomic_fetch_nbi = pshmem_ctx_uint32_atomic_fetch_nbi +#pragma weak shmem_ctx_uint64_atomic_fetch_nbi = pshmem_ctx_uint64_atomic_fetch_nbi +#pragma weak shmem_ctx_size_atomic_fetch_nbi = pshmem_ctx_size_atomic_fetch_nbi +#pragma weak shmem_ctx_ptrdiff_atomic_fetch_nbi = pshmem_ctx_ptrdiff_atomic_fetch_nbi + +#pragma weak shmem_double_atomic_fetch_nbi = pshmem_double_atomic_fetch_nbi +#pragma weak shmem_float_atomic_fetch_nbi = pshmem_float_atomic_fetch_nbi +#pragma weak shmem_int_atomic_fetch_nbi = pshmem_int_atomic_fetch_nbi +#pragma weak shmem_long_atomic_fetch_nbi = pshmem_long_atomic_fetch_nbi +#pragma weak shmem_longlong_atomic_fetch_nbi = pshmem_longlong_atomic_fetch_nbi +#pragma weak shmem_uint_atomic_fetch_nbi = pshmem_uint_atomic_fetch_nbi +#pragma weak shmem_ulong_atomic_fetch_nbi = pshmem_ulong_atomic_fetch_nbi +#pragma weak shmem_ulonglong_atomic_fetch_nbi = pshmem_ulonglong_atomic_fetch_nbi +#pragma weak shmem_int32_atomic_fetch_nbi = pshmem_int32_atomic_fetch_nbi +#pragma weak shmem_int64_atomic_fetch_nbi = pshmem_int64_atomic_fetch_nbi +#pragma weak shmem_uint32_atomic_fetch_nbi = pshmem_uint32_atomic_fetch_nbi +#pragma weak shmem_uint64_atomic_fetch_nbi = pshmem_uint64_atomic_fetch_nbi +#pragma weak shmem_size_atomic_fetch_nbi = pshmem_size_atomic_fetch_nbi +#pragma weak shmem_ptrdiff_atomic_fetch_nbi = pshmem_ptrdiff_atomic_fetch_nbi + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_double, double, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_float, float, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_NBI(_ptrdiff, ptrdiff_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_int, int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_long, long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_double, double, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_float, float, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_NBI(_ptrdiff, ptrdiff_t, shmem) + + diff --git a/oshmem/shmem/c/shmem_finalize.c b/oshmem/shmem/c/shmem_finalize.c index e35a7a72e33..4e0a2681948 100644 --- a/oshmem/shmem/c/shmem_finalize.c +++ b/oshmem/shmem/c/shmem_finalize.c @@ -17,7 +17,7 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_finalize = pshmem_finalize -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_finalize(void) diff --git a/oshmem/shmem/c/shmem_finc.c b/oshmem/shmem/c/shmem_finc.c index de3ae9b2dd1..913dea8b90a 100644 --- a/oshmem/shmem/c/shmem_finc.c +++ b/oshmem/shmem/c/shmem_finc.c @@ -71,6 +71,12 @@ #pragma weak shmem_ctx_uint_atomic_fetch_inc = pshmem_ctx_uint_atomic_fetch_inc #pragma weak shmem_ctx_ulong_atomic_fetch_inc = pshmem_ctx_ulong_atomic_fetch_inc #pragma weak shmem_ctx_ulonglong_atomic_fetch_inc = pshmem_ctx_ulonglong_atomic_fetch_inc +#pragma weak shmem_ctx_int32_atomic_fetch_inc = pshmem_ctx_int32_atomic_fetch_inc +#pragma weak shmem_ctx_int64_atomic_fetch_inc = pshmem_ctx_int64_atomic_fetch_inc +#pragma weak shmem_ctx_uint32_atomic_fetch_inc = pshmem_ctx_uint32_atomic_fetch_inc +#pragma weak shmem_ctx_uint64_atomic_fetch_inc = pshmem_ctx_uint64_atomic_fetch_inc +#pragma weak shmem_ctx_size_atomic_fetch_inc = pshmem_ctx_size_atomic_fetch_inc +#pragma weak shmem_ctx_ptrdiff_atomic_fetch_inc = pshmem_ctx_ptrdiff_atomic_fetch_inc #pragma weak shmem_int_atomic_fetch_inc = pshmem_int_atomic_fetch_inc #pragma weak shmem_long_atomic_fetch_inc = pshmem_long_atomic_fetch_inc @@ -78,6 +84,12 @@ #pragma weak shmem_uint_atomic_fetch_inc = pshmem_uint_atomic_fetch_inc #pragma weak shmem_ulong_atomic_fetch_inc = pshmem_ulong_atomic_fetch_inc #pragma weak shmem_ulonglong_atomic_fetch_inc = pshmem_ulonglong_atomic_fetch_inc +#pragma weak shmem_int32_atomic_fetch_inc = pshmem_int32_atomic_fetch_inc +#pragma weak shmem_int64_atomic_fetch_inc = pshmem_int64_atomic_fetch_inc +#pragma weak shmem_uint32_atomic_fetch_inc = pshmem_uint32_atomic_fetch_inc +#pragma weak shmem_uint64_atomic_fetch_inc = pshmem_uint64_atomic_fetch_inc +#pragma weak shmem_size_atomic_fetch_inc = pshmem_size_atomic_fetch_inc +#pragma weak shmem_ptrdiff_atomic_fetch_inc = pshmem_ptrdiff_atomic_fetch_inc #pragma weak shmem_int_finc = pshmem_int_finc #pragma weak shmem_long_finc = pshmem_long_finc @@ -85,7 +97,8 @@ #pragma weak shmemx_int32_finc = pshmemx_int32_finc #pragma weak shmemx_int64_finc = pshmemx_int64_finc -#include "oshmem/shmem/c/profile/defines.h" + +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_int, int, shmem) @@ -94,12 +107,25 @@ SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_longlong, long long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_uint, unsigned int, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_ulong, unsigned long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_ulonglong, unsigned long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_ptrdiff, ptrdiff_t, shmem) + SHMEM_TYPE_ATOMIC_FETCH_INC(_int, int, shmem) SHMEM_TYPE_ATOMIC_FETCH_INC(_long, long, shmem) SHMEM_TYPE_ATOMIC_FETCH_INC(_longlong, long long, shmem) SHMEM_TYPE_ATOMIC_FETCH_INC(_uint, unsigned int, shmem) SHMEM_TYPE_ATOMIC_FETCH_INC(_ulong, unsigned long, shmem) SHMEM_TYPE_ATOMIC_FETCH_INC(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_ptrdiff, ptrdiff_t, shmem) /* deprecated APIs */ #define SHMEM_TYPE_FINC(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_finc_nb.c b/oshmem/shmem/c/shmem_finc_nb.c new file mode 100644 index 00000000000..7a0b4d59923 --- /dev/null +++ b/oshmem/shmem/c/shmem_finc_nb.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform a nonblocking fetch-and-increment operation. + * The fetch and increment routines retrieve the value at address target on PE pe, and update + * target with the result of incrementing the retrieved value by one. The operation must be + * completed without the possibility of another process updating target between the time of + * the fetch and the update. + */ +#define DO_SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(ctx, type_name, type, fetch, target, pe, out_value) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size; \ + type value = 1; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(target); \ + \ + size = sizeof(out_value); \ + rc = MCA_ATOMIC_CALL(fadd_nb( \ + ctx, \ + fetch, \ + (void*)target, \ + (void*)&out_value, \ + value, \ + size, \ + pe)); \ + RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(type_name, type, prefix) \ + void prefix##_ctx##type_name##_atomic_fetch_inc_nbi(shmem_ctx_t ctx, type *fetch, type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(ctx, type_name, type, fetch, target, \ + pe, out_value); \ + return ; \ + } + +#define SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(type_name, type, prefix) \ + void prefix##type_name##_atomic_fetch_inc_nbi(type *fetch, type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(oshmem_ctx_default, type_name, \ + type, fetch, target, pe, out_value); \ + return ; \ + } + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +/* Nonblocking Atomic Fetch and Increment */ +#pragma weak shmem_ctx_int_atomic_fetch_inc_nbi = pshmem_ctx_int_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_long_atomic_fetch_inc_nbi = pshmem_ctx_long_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_longlong_atomic_fetch_inc_nbi = pshmem_ctx_longlong_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_uint_atomic_fetch_inc_nbi = pshmem_ctx_uint_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_ulong_atomic_fetch_inc_nbi = pshmem_ctx_ulong_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_ulonglong_atomic_fetch_inc_nbi = pshmem_ctx_ulonglong_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_int32_atomic_fetch_inc_nbi = pshmem_ctx_int32_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_int64_atomic_fetch_inc_nbi = pshmem_ctx_int64_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_uint32_atomic_fetch_inc_nbi = pshmem_ctx_uint32_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_uint64_atomic_fetch_inc_nbi = pshmem_ctx_uint64_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_size_atomic_fetch_inc_nbi = pshmem_ctx_size_atomic_fetch_inc_nbi +#pragma weak shmem_ctx_ptrdiff_atomic_fetch_inc_nbi = pshmem_ctx_ptrdiff_atomic_fetch_inc_nbi + +#pragma weak shmem_int_atomic_fetch_inc_nbi = pshmem_int_atomic_fetch_inc_nbi +#pragma weak shmem_long_atomic_fetch_inc_nbi = pshmem_long_atomic_fetch_inc_nbi +#pragma weak shmem_longlong_atomic_fetch_inc_nbi = pshmem_longlong_atomic_fetch_inc_nbi +#pragma weak shmem_uint_atomic_fetch_inc_nbi = pshmem_uint_atomic_fetch_inc_nbi +#pragma weak shmem_ulong_atomic_fetch_inc_nbi = pshmem_ulong_atomic_fetch_inc_nbi +#pragma weak shmem_ulonglong_atomic_fetch_inc_nbi = pshmem_ulonglong_atomic_fetch_inc_nbi +#pragma weak shmem_int32_atomic_fetch_inc_nbi = pshmem_int32_atomic_fetch_inc_nbi +#pragma weak shmem_int64_atomic_fetch_inc_nbi = pshmem_int64_atomic_fetch_inc_nbi +#pragma weak shmem_uint32_atomic_fetch_inc_nbi = pshmem_uint32_atomic_fetch_inc_nbi +#pragma weak shmem_uint64_atomic_fetch_inc_nbi = pshmem_uint64_atomic_fetch_inc_nbi +#pragma weak shmem_size_atomic_fetch_inc_nbi = pshmem_size_atomic_fetch_inc_nbi +#pragma weak shmem_ptrdiff_atomic_fetch_inc_nbi = pshmem_ptrdiff_atomic_fetch_inc_nbi + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC_NBI(_ptrdiff, ptrdiff_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_int, int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_long, long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC_NBI(_ptrdiff, ptrdiff_t, shmem) diff --git a/oshmem/shmem/c/shmem_for.c b/oshmem/shmem/c/shmem_for.c index 4d0e732004d..c9289f4985e 100644 --- a/oshmem/shmem/c/shmem_for.c +++ b/oshmem/shmem/c/shmem_for.c @@ -53,7 +53,8 @@ #pragma weak shmemx_int64_atomic_fetch_or = pshmemx_int64_atomic_fetch_or #pragma weak shmemx_uint32_atomic_fetch_or = pshmemx_uint32_atomic_fetch_or #pragma weak shmemx_uint64_atomic_fetch_or = pshmemx_uint64_atomic_fetch_or -#include "oshmem/shmem/c/profile/defines.h" + +#include "oshmem/shmem/c/profile-defines.h" #endif OSHMEM_TYPE_FOP(int, int, shmem, or) diff --git a/oshmem/shmem/c/shmem_for_nb.c b/oshmem/shmem/c/shmem_for_nb.c new file mode 100644 index 00000000000..ddbfeabb21b --- /dev/null +++ b/oshmem/shmem/c/shmem_for_nb.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an atomic nonblocking fetch-and-or operation. + * The fetch and or routines retrieve the value at address target on PE pe, and update + * target with the result of 'or' operation value to the retrieved value. The operation + * must be completed without the possibility of another process updating target between + * the time of the fetch and the update. + */ +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +/* Nonblocking Atomic Fetch and Or */ +#pragma weak shmem_ctx_uint_atomic_fetch_or_nbi = pshmem_ctx_uint_atomic_fetch_or_nbi +#pragma weak shmem_ctx_ulong_atomic_fetch_or_nbi = pshmem_ctx_ulong_atomic_fetch_or_nbi +#pragma weak shmem_ctx_ulonglong_atomic_fetch_or_nbi = pshmem_ctx_ulonglong_atomic_fetch_or_nbi +#pragma weak shmem_ctx_int32_atomic_fetch_or_nbi = pshmem_ctx_int32_atomic_fetch_or_nbi +#pragma weak shmem_ctx_int64_atomic_fetch_or_nbi = pshmem_ctx_int64_atomic_fetch_or_nbi +#pragma weak shmem_ctx_uint32_atomic_fetch_or_nbi = pshmem_ctx_uint32_atomic_fetch_or_nbi +#pragma weak shmem_ctx_uint64_atomic_fetch_or_nbi = pshmem_ctx_uint64_atomic_fetch_or_nbi + +#pragma weak shmem_uint_atomic_fetch_or_nbi = pshmem_uint_atomic_fetch_or_nbi +#pragma weak shmem_ulong_atomic_fetch_or_nbi = pshmem_ulong_atomic_fetch_or_nbi +#pragma weak shmem_ulonglong_atomic_fetch_or_nbi = pshmem_ulonglong_atomic_fetch_or_nbi +#pragma weak shmem_int32_atomic_fetch_or_nbi = pshmem_int32_atomic_fetch_or_nbi +#pragma weak shmem_int64_atomic_fetch_or_nbi = pshmem_int64_atomic_fetch_or_nbi +#pragma weak shmem_uint32_atomic_fetch_or_nbi = pshmem_uint32_atomic_fetch_or_nbi +#pragma weak shmem_uint64_atomic_fetch_or_nbi = pshmem_uint64_atomic_fetch_or_nbi + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +OSHMEM_TYPE_FOP_NBI(uint, unsigned int, shmem, or) +OSHMEM_TYPE_FOP_NBI(ulong, unsigned long, shmem, or) +OSHMEM_TYPE_FOP_NBI(ulonglong, unsigned long long, shmem, or) +OSHMEM_TYPE_FOP_NBI(int32, int32_t, shmem, or) +OSHMEM_TYPE_FOP_NBI(int64, int64_t, shmem, or) +OSHMEM_TYPE_FOP_NBI(uint32, uint32_t, shmem, or) +OSHMEM_TYPE_FOP_NBI(uint64, uint64_t, shmem, or) + +OSHMEM_CTX_TYPE_FOP_NBI(uint, unsigned int, shmem, or) +OSHMEM_CTX_TYPE_FOP_NBI(ulong, unsigned long, shmem, or) +OSHMEM_CTX_TYPE_FOP_NBI(ulonglong, unsigned long long, shmem, or) +OSHMEM_CTX_TYPE_FOP_NBI(int32, int32_t, shmem, or) +OSHMEM_CTX_TYPE_FOP_NBI(int64, int64_t, shmem, or) +OSHMEM_CTX_TYPE_FOP_NBI(uint32, uint32_t, shmem, or) +OSHMEM_CTX_TYPE_FOP_NBI(uint64, uint64_t, shmem, or) + diff --git a/oshmem/shmem/c/shmem_free.c b/oshmem/shmem/c/shmem_free.c index 22da004589b..a03d8bf5119 100644 --- a/oshmem/shmem/c/shmem_free.c +++ b/oshmem/shmem/c/shmem_free.c @@ -24,7 +24,7 @@ #include "oshmem/include/pshmem.h" #pragma weak shmem_free = pshmem_free #pragma weak shfree = pshfree -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif static inline void _shfree(void* ptr); diff --git a/oshmem/shmem/c/shmem_fxor.c b/oshmem/shmem/c/shmem_fxor.c index 41fe2249c6b..a1d977f6ce0 100644 --- a/oshmem/shmem/c/shmem_fxor.c +++ b/oshmem/shmem/c/shmem_fxor.c @@ -53,7 +53,8 @@ #pragma weak shmemx_int64_atomic_fetch_xor = pshmemx_int64_atomic_fetch_xor #pragma weak shmemx_uint32_atomic_fetch_xor = pshmemx_uint32_atomic_fetch_xor #pragma weak shmemx_uint64_atomic_fetch_xor = pshmemx_uint64_atomic_fetch_xor -#include "oshmem/shmem/c/profile/defines.h" + +#include "oshmem/shmem/c/profile-defines.h" #endif OSHMEM_TYPE_FOP(int, int, shmem, xor) diff --git a/oshmem/shmem/c/shmem_fxor_nb.c b/oshmem/shmem/c/shmem_fxor_nb.c new file mode 100644 index 00000000000..2a40265f734 --- /dev/null +++ b/oshmem/shmem/c/shmem_fxor_nb.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an atomic nonblocking fetch-and-xor operation. + * The fetch and xor routines retrieve the value at address target on PE pe, and update + * target with the result of 'xor' operation value to the retrieved value. The operation + * must be completed without the possibility of another process updating target between + * the time of the fetch and the update. + */ +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +/* Nonblocking Atomic Fetch and Xor */ +#pragma weak shmem_ctx_uint_atomic_fetch_xor_nbi = pshmem_ctx_uint_atomic_fetch_xor_nbi +#pragma weak shmem_ctx_ulong_atomic_fetch_xor_nbi = pshmem_ctx_ulong_atomic_fetch_xor_nbi +#pragma weak shmem_ctx_ulonglong_atomic_fetch_xor_nbi = pshmem_ctx_ulonglong_atomic_fetch_xor_nbi +#pragma weak shmem_ctx_int32_atomic_fetch_xor_nbi = pshmem_ctx_int32_atomic_fetch_xor_nbi +#pragma weak shmem_ctx_int64_atomic_fetch_xor_nbi = pshmem_ctx_int64_atomic_fetch_xor_nbi +#pragma weak shmem_ctx_uint32_atomic_fetch_xor_nbi = pshmem_ctx_uint32_atomic_fetch_xor_nbi +#pragma weak shmem_ctx_uint64_atomic_fetch_xor_nbi = pshmem_ctx_uint64_atomic_fetch_xor_nbi + +#pragma weak shmem_uint_atomic_fetch_xor_nbi = pshmem_uint_atomic_fetch_xor_nbi +#pragma weak shmem_ulong_atomic_fetch_xor_nbi = pshmem_ulong_atomic_fetch_xor_nbi +#pragma weak shmem_ulonglong_atomic_fetch_xor_nbi = pshmem_ulonglong_atomic_fetch_xor_nbi +#pragma weak shmem_int32_atomic_fetch_xor_nbi = pshmem_int32_atomic_fetch_xor_nbi +#pragma weak shmem_int64_atomic_fetch_xor_nbi = pshmem_int64_atomic_fetch_xor_nbi +#pragma weak shmem_uint32_atomic_fetch_xor_nbi = pshmem_uint32_atomic_fetch_xor_nbi +#pragma weak shmem_uint64_atomic_fetch_xor_nbi = pshmem_uint64_atomic_fetch_xor_nbi + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +OSHMEM_TYPE_FOP_NBI(uint, unsigned int, shmem, xor) +OSHMEM_TYPE_FOP_NBI(ulong, unsigned long, shmem, xor) +OSHMEM_TYPE_FOP_NBI(ulonglong, unsigned long long, shmem, xor) +OSHMEM_TYPE_FOP_NBI(int32, int32_t, shmem, xor) +OSHMEM_TYPE_FOP_NBI(int64, int64_t, shmem, xor) +OSHMEM_TYPE_FOP_NBI(uint32, uint32_t, shmem, xor) +OSHMEM_TYPE_FOP_NBI(uint64, uint64_t, shmem, xor) + +OSHMEM_CTX_TYPE_FOP_NBI(uint, unsigned int, shmem, xor) +OSHMEM_CTX_TYPE_FOP_NBI(ulong, unsigned long, shmem, xor) +OSHMEM_CTX_TYPE_FOP_NBI(ulonglong, unsigned long long, shmem, xor) +OSHMEM_CTX_TYPE_FOP_NBI(int32, int32_t, shmem, xor) +OSHMEM_CTX_TYPE_FOP_NBI(int64, int64_t, shmem, xor) +OSHMEM_CTX_TYPE_FOP_NBI(uint32, uint32_t, shmem, xor) +OSHMEM_CTX_TYPE_FOP_NBI(uint64, uint64_t, shmem, xor) + diff --git a/oshmem/shmem/c/shmem_g.c b/oshmem/shmem/c/shmem_g.c index aa96afaba68..816b09e7f63 100644 --- a/oshmem/shmem/c/shmem_g.c +++ b/oshmem/shmem/c/shmem_g.c @@ -111,7 +111,7 @@ #pragma weak shmemx_int16_g = pshmemx_int16_g #pragma weak shmemx_int32_g = pshmemx_int32_g #pragma weak shmemx_int64_g = pshmemx_int64_g -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_G(_char, char, shmem) diff --git a/oshmem/shmem/c/shmem_get.c b/oshmem/shmem/c/shmem_get.c index 452557fdd6a..eac21d5563c 100644 --- a/oshmem/shmem/c/shmem_get.c +++ b/oshmem/shmem/c/shmem_get.c @@ -120,7 +120,7 @@ #pragma weak shmem_get32 = pshmem_get32 #pragma weak shmem_get64 = pshmem_get64 #pragma weak shmem_get128 = pshmem_get128 -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_GET(_char, char) diff --git a/oshmem/shmem/c/shmem_get_nb.c b/oshmem/shmem/c/shmem_get_nb.c index 877b7ac0201..c89f7df587e 100644 --- a/oshmem/shmem/c/shmem_get_nb.c +++ b/oshmem/shmem/c/shmem_get_nb.c @@ -120,7 +120,7 @@ #pragma weak shmem_get64_nbi = pshmem_get64_nbi #pragma weak shmem_get128_nbi = pshmem_get128_nbi #pragma weak shmem_getmem_nbi = pshmem_getmem_nbi -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_GET_NB(_char, char) diff --git a/oshmem/shmem/c/shmem_global_exit.c b/oshmem/shmem/c/shmem_global_exit.c index c71ee62b636..3512ba72f40 100644 --- a/oshmem/shmem/c/shmem_global_exit.c +++ b/oshmem/shmem/c/shmem_global_exit.c @@ -17,7 +17,7 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_global_exit = pshmem_global_exit -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif extern int oshmem_shmem_inglobalexit; diff --git a/oshmem/shmem/c/shmem_iget.c b/oshmem/shmem/c/shmem_iget.c index eb4d31cf802..679f73f5d67 100644 --- a/oshmem/shmem/c/shmem_iget.c +++ b/oshmem/shmem/c/shmem_iget.c @@ -123,7 +123,7 @@ #pragma weak shmem_iget32 = pshmem_iget32 #pragma weak shmem_iget64 = pshmem_iget64 #pragma weak shmem_iget128 = pshmem_iget128 -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_IGET(_char, char) diff --git a/oshmem/shmem/c/shmem_inc.c b/oshmem/shmem/c/shmem_inc.c index 8c44c0258d1..8389e19e35c 100644 --- a/oshmem/shmem/c/shmem_inc.c +++ b/oshmem/shmem/c/shmem_inc.c @@ -66,12 +66,24 @@ #pragma weak shmem_ctx_uint_atomic_inc = pshmem_ctx_uint_atomic_inc #pragma weak shmem_ctx_ulong_atomic_inc = pshmem_ctx_ulong_atomic_inc #pragma weak shmem_ctx_ulonglong_atomic_inc = pshmem_ctx_ulonglong_atomic_inc +#pragma weak shmem_ctx_int32_atomic_inc = pshmem_ctx_int32_atomic_inc +#pragma weak shmem_ctx_int64_atomic_inc = pshmem_ctx_int64_atomic_inc +#pragma weak shmem_ctx_uint32_atomic_inc = pshmem_ctx_uint32_atomic_inc +#pragma weak shmem_ctx_uint64_atomic_inc = pshmem_ctx_uint64_atomic_inc +#pragma weak shmem_ctx_size_atomic_inc = pshmem_ctx_size_atomic_inc +#pragma weak shmem_ctx_ptrdiff_atomic_inc = pshmem_ctx_ptrdiff_atomic_inc #pragma weak shmem_int_atomic_inc = pshmem_int_atomic_inc #pragma weak shmem_long_atomic_inc = pshmem_long_atomic_inc #pragma weak shmem_longlong_atomic_inc = pshmem_longlong_atomic_inc #pragma weak shmem_uint_atomic_inc = pshmem_uint_atomic_inc #pragma weak shmem_ulong_atomic_inc = pshmem_ulong_atomic_inc +#pragma weak shmem_int32_atomic_inc = pshmem_int32_atomic_inc +#pragma weak shmem_int64_atomic_inc = pshmem_int64_atomic_inc +#pragma weak shmem_uint32_atomic_inc = pshmem_uint32_atomic_inc +#pragma weak shmem_uint64_atomic_inc = pshmem_uint64_atomic_inc +#pragma weak shmem_size_atomic_inc = pshmem_size_atomic_inc +#pragma weak shmem_ptrdiff_atomic_inc = pshmem_ptrdiff_atomic_inc #pragma weak shmem_ulonglong_atomic_inc = pshmem_ulonglong_atomic_inc #pragma weak shmem_int_inc = pshmem_int_inc @@ -80,7 +92,7 @@ #pragma weak shmemx_int32_inc = pshmemx_int32_inc #pragma weak shmemx_int64_inc = pshmemx_int64_inc -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_ATOMIC_INC(_int, int, shmem) @@ -89,12 +101,24 @@ SHMEM_CTX_TYPE_ATOMIC_INC(_longlong, long long, shmem) SHMEM_CTX_TYPE_ATOMIC_INC(_uint, unsigned int, shmem) SHMEM_CTX_TYPE_ATOMIC_INC(_ulong, unsigned long, shmem) SHMEM_CTX_TYPE_ATOMIC_INC(_ulonglong, unsigned long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_ptrdiff, ptrdiff_t, shmem) SHMEM_TYPE_ATOMIC_INC(_int, int, shmem) SHMEM_TYPE_ATOMIC_INC(_long, long, shmem) SHMEM_TYPE_ATOMIC_INC(_longlong, long long, shmem) SHMEM_TYPE_ATOMIC_INC(_uint, unsigned int, shmem) SHMEM_TYPE_ATOMIC_INC(_ulong, unsigned long, shmem) SHMEM_TYPE_ATOMIC_INC(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_INC(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_INC(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_INC(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_INC(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_INC(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_INC(_ptrdiff, ptrdiff_t, shmem) #define SHMEM_TYPE_INC(type_name, type, prefix) \ void prefix##type_name##_inc(type *target, int pe) \ diff --git a/oshmem/shmem/c/shmem_init.c b/oshmem/shmem/c/shmem_init.c index 0b8330d8525..16e8731b2f1 100644 --- a/oshmem/shmem/c/shmem_init.c +++ b/oshmem/shmem/c/shmem_init.c @@ -27,7 +27,7 @@ #pragma weak shmem_init = pshmem_init #pragma weak shmem_init_thread = pshmem_init_thread #pragma weak start_pes = pstart_pes -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif extern int oshmem_shmem_globalexit_status; diff --git a/oshmem/shmem/c/shmem_iput.c b/oshmem/shmem/c/shmem_iput.c index 0e83b6c8495..08f93f79ea1 100644 --- a/oshmem/shmem/c/shmem_iput.c +++ b/oshmem/shmem/c/shmem_iput.c @@ -124,7 +124,7 @@ #pragma weak shmem_iput32 = pshmem_iput32 #pragma weak shmem_iput64 = pshmem_iput64 #pragma weak shmem_iput128 = pshmem_iput128 -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_IPUT(_char, char) diff --git a/oshmem/shmem/c/shmem_or.c b/oshmem/shmem/c/shmem_or.c index 7bdbb59ad34..3d440cc7328 100644 --- a/oshmem/shmem/c/shmem_or.c +++ b/oshmem/shmem/c/shmem_or.c @@ -51,7 +51,7 @@ #pragma weak shmemx_int64_atomic_or = pshmemx_int64_atomic_or #pragma weak shmemx_uint32_atomic_or = pshmemx_uint32_atomic_or #pragma weak shmemx_uint64_atomic_or = pshmemx_uint64_atomic_or -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif OSHMEM_TYPE_OP(int, int, shmem, or) diff --git a/oshmem/shmem/c/shmem_p.c b/oshmem/shmem/c/shmem_p.c index 974333a4361..ae34bcefd32 100644 --- a/oshmem/shmem/c/shmem_p.c +++ b/oshmem/shmem/c/shmem_p.c @@ -112,7 +112,8 @@ #pragma weak shmemx_int16_p = pshmemx_int16_p #pragma weak shmemx_int32_p = pshmemx_int32_p #pragma weak shmemx_int64_p = pshmemx_int64_p -#include "oshmem/shmem/c/profile/defines.h" + +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_P(_char, char, shmem) diff --git a/oshmem/shmem/c/shmem_pcontrol.c b/oshmem/shmem/c/shmem_pcontrol.c new file mode 100644 index 00000000000..b87a2829ced --- /dev/null +++ b/oshmem/shmem/c/shmem_pcontrol.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/mca/scoll/scoll.h" + +#include "oshmem/proc/proc.h" + + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +#pragma weak shmem_pcontrol = pshmem_pcontrol +#include "oshmem/shmem/c/profile-defines.h" +#endif + + + + +void shmem_pcontrol(int level, ...) +{ + return ; +} + diff --git a/oshmem/shmem/c/shmem_pe_accessible.c b/oshmem/shmem/c/shmem_pe_accessible.c index 3e6923a0bc8..923d781df03 100644 --- a/oshmem/shmem/c/shmem_pe_accessible.c +++ b/oshmem/shmem/c/shmem_pe_accessible.c @@ -22,7 +22,7 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_pe_accessible = pshmem_pe_accessible -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif int shmem_pe_accessible(int pe) diff --git a/oshmem/shmem/c/shmem_ptr.c b/oshmem/shmem/c/shmem_ptr.c index 7bfb6d014f6..f69a83766da 100644 --- a/oshmem/shmem/c/shmem_ptr.c +++ b/oshmem/shmem/c/shmem_ptr.c @@ -26,12 +26,11 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_ptr = pshmem_ptr -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void *shmem_ptr(const void *dst_addr, int pe) { - ompi_proc_t *proc; sshmem_mkey_t *mkey; int i; void *rva; @@ -46,8 +45,7 @@ void *shmem_ptr(const void *dst_addr, int pe) } /* The memory must be on the local node */ - proc = oshmem_proc_group_find(oshmem_group_all, pe); - if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) { + if (!oshmem_proc_on_local_node(pe)) { return NULL; } diff --git a/oshmem/shmem/c/shmem_put.c b/oshmem/shmem/c/shmem_put.c index 98b6f24c7b8..16306d6d97e 100644 --- a/oshmem/shmem/c/shmem_put.c +++ b/oshmem/shmem/c/shmem_put.c @@ -123,7 +123,7 @@ #pragma weak shmem_put32 = pshmem_put32 #pragma weak shmem_put64 = pshmem_put64 #pragma weak shmem_put128 = pshmem_put128 -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_PUT(_char, char) diff --git a/oshmem/shmem/c/shmem_put_nb.c b/oshmem/shmem/c/shmem_put_nb.c index bf63130e236..89e4bf18240 100644 --- a/oshmem/shmem/c/shmem_put_nb.c +++ b/oshmem/shmem/c/shmem_put_nb.c @@ -127,7 +127,7 @@ #pragma weak shmem_ctx_put64_nbi = pshmem_ctx_put64_nbi #pragma weak shmem_ctx_put128_nbi = pshmem_ctx_put128_nbi #pragma weak shmem_ctx_putmem_nbi = pshmem_ctx_putmem_nbi -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_PUT_NB(_char, char) diff --git a/oshmem/shmem/c/shmem_put_signal.c b/oshmem/shmem/c/shmem_put_signal.c new file mode 100644 index 00000000000..4c50dc10ede --- /dev/null +++ b/oshmem/shmem/c/shmem_put_signal.c @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/mca/spml/spml.h" + +#define DO_SHMEM_TYPE_PUT_SIGNAL(ctx, type, dest, source, nelems, sig_addr, signal, sig_op, pe) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(dest); \ + \ + size = nelems * sizeof(type); \ + rc = MCA_SPML_CALL(put_signal( \ + ctx, \ + (void*)dest, \ + size, \ + (void*)source, \ + sig_addr, signal, sig_op, pe)); \ + RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_PUT_SIGNAL(type_name, type) \ + void shmem_ctx##type_name##_put_signal(shmem_ctx_t ctx, type *dest, const type *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe)\ + { \ + DO_SHMEM_TYPE_PUT_SIGNAL(ctx, type, dest, source, nelems, sig_addr, signal, sig_op, pe); \ + return ; \ + } + +#define SHMEM_TYPE_PUT_SIGNAL(type_name, type) \ + void shmem##type_name##_put_signal(type *dest, const type *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe)\ + { \ + DO_SHMEM_TYPE_PUT_SIGNAL(oshmem_ctx_default, type, dest, \ + source, nelems, sig_addr, signal, sig_op, pe); \ + return ; \ + } + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_char_put_signal = pshmem_ctx_char_put_signal +#pragma weak shmem_ctx_short_put_signal = pshmem_ctx_short_put_signal +#pragma weak shmem_ctx_int_put_signal = pshmem_ctx_int_put_signal +#pragma weak shmem_ctx_long_put_signal = pshmem_ctx_long_put_signal +#pragma weak shmem_ctx_float_put_signal = pshmem_ctx_float_put_signal +#pragma weak shmem_ctx_double_put_signal = pshmem_ctx_double_put_signal +#pragma weak shmem_ctx_longlong_put_signal = pshmem_ctx_longlong_put_signal +#pragma weak shmem_ctx_schar_put_signal = pshmem_ctx_schar_put_signal +#pragma weak shmem_ctx_uchar_put_signal = pshmem_ctx_uchar_put_signal +#pragma weak shmem_ctx_ushort_put_signal = pshmem_ctx_ushort_put_signal +#pragma weak shmem_ctx_uint_put_signal = pshmem_ctx_uint_put_signal +#pragma weak shmem_ctx_ulong_put_signal = pshmem_ctx_ulong_put_signal +#pragma weak shmem_ctx_ulonglong_put_signal = pshmem_ctx_ulonglong_put_signal +#pragma weak shmem_ctx_longdouble_put_signal = pshmem_ctx_longdouble_put_signal +#pragma weak shmem_ctx_int8_put_signal = pshmem_ctx_int8_put_signal +#pragma weak shmem_ctx_int16_put_signal = pshmem_ctx_int16_put_signal +#pragma weak shmem_ctx_int32_put_signal = pshmem_ctx_int32_put_signal +#pragma weak shmem_ctx_int64_put_signal = pshmem_ctx_int64_put_signal +#pragma weak shmem_ctx_uint8_put_signal = pshmem_ctx_uint8_put_signal +#pragma weak shmem_ctx_uint16_put_signal = pshmem_ctx_uint16_put_signal +#pragma weak shmem_ctx_uint32_put_signal = pshmem_ctx_uint32_put_signal +#pragma weak shmem_ctx_uint64_put_signal = pshmem_ctx_uint64_put_signal +#pragma weak shmem_ctx_size_put_signal = pshmem_ctx_size_put_signal +#pragma weak shmem_ctx_ptrdiff_put_signal = pshmem_ctx_ptrdiff_put_signal + +#pragma weak shmem_char_put_signal = pshmem_char_put_signal +#pragma weak shmem_short_put_signal = pshmem_short_put_signal +#pragma weak shmem_int_put_signal = pshmem_int_put_signal +#pragma weak shmem_long_put_signal = pshmem_long_put_signal +#pragma weak shmem_float_put_signal = pshmem_float_put_signal +#pragma weak shmem_double_put_signal = pshmem_double_put_signal +#pragma weak shmem_longlong_put_signal = pshmem_longlong_put_signal +#pragma weak shmem_schar_put_signal = pshmem_schar_put_signal +#pragma weak shmem_uchar_put_signal = pshmem_uchar_put_signal +#pragma weak shmem_ushort_put_signal = pshmem_ushort_put_signal +#pragma weak shmem_uint_put_signal = pshmem_uint_put_signal +#pragma weak shmem_ulong_put_signal = pshmem_ulong_put_signal +#pragma weak shmem_ulonglong_put_signal = pshmem_ulonglong_put_signal +#pragma weak shmem_longdouble_put_signal = pshmem_longdouble_put_signal +#pragma weak shmem_int8_put_signal = pshmem_int8_put_signal +#pragma weak shmem_int16_put_signal = pshmem_int16_put_signal +#pragma weak shmem_int32_put_signal = pshmem_int32_put_signal +#pragma weak shmem_int64_put_signal = pshmem_int64_put_signal +#pragma weak shmem_uint8_put_signal = pshmem_uint8_put_signal +#pragma weak shmem_uint16_put_signal = pshmem_uint16_put_signal +#pragma weak shmem_uint32_put_signal = pshmem_uint32_put_signal +#pragma weak shmem_uint64_put_signal = pshmem_uint64_put_signal +#pragma weak shmem_size_put_signal = pshmem_size_put_signal +#pragma weak shmem_ptrdiff_put_signal = pshmem_ptrdiff_put_signal + +#pragma weak shmem_put8_signal = pshmem_put8_signal +#pragma weak shmem_put16_signal = pshmem_put16_signal +#pragma weak shmem_put32_signal = pshmem_put32_signal +#pragma weak shmem_put64_signal = pshmem_put64_signal +#pragma weak shmem_put128_signal = pshmem_put128_signal + +#pragma weak shmem_ctx_put8_signal = pshmem_ctx_put8_signal +#pragma weak shmem_ctx_put16_signal = pshmem_ctx_put16_signal +#pragma weak shmem_ctx_put32_signal = pshmem_ctx_put32_signal +#pragma weak shmem_ctx_put64_signal = pshmem_ctx_put64_signal +#pragma weak shmem_ctx_put128_signal = pshmem_ctx_put128_signal + +#pragma weak shmem_putmem_signal = pshmem_putmem_signal +#pragma weak shmem_ctx_putmem_signal = pshmem_ctx_putmem_signal + + +#pragma weak shmem_signal_fetch = pshmem_signal_fetch + + + + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +SHMEM_CTX_TYPE_PUT_SIGNAL(_char, char) +SHMEM_CTX_TYPE_PUT_SIGNAL(_short, short) +SHMEM_CTX_TYPE_PUT_SIGNAL(_int, int) +SHMEM_CTX_TYPE_PUT_SIGNAL(_long, long) +SHMEM_CTX_TYPE_PUT_SIGNAL(_longlong, long long) +SHMEM_CTX_TYPE_PUT_SIGNAL(_schar, signed char) +SHMEM_CTX_TYPE_PUT_SIGNAL(_uchar, unsigned char) +SHMEM_CTX_TYPE_PUT_SIGNAL(_ushort, unsigned short) +SHMEM_CTX_TYPE_PUT_SIGNAL(_uint, unsigned int) +SHMEM_CTX_TYPE_PUT_SIGNAL(_ulong, unsigned long) +SHMEM_CTX_TYPE_PUT_SIGNAL(_ulonglong, unsigned long long) +SHMEM_CTX_TYPE_PUT_SIGNAL(_float, float) +SHMEM_CTX_TYPE_PUT_SIGNAL(_double, double) +SHMEM_CTX_TYPE_PUT_SIGNAL(_longdouble, long double) +SHMEM_CTX_TYPE_PUT_SIGNAL(_int8, int8_t) +SHMEM_CTX_TYPE_PUT_SIGNAL(_int16, int16_t) +SHMEM_CTX_TYPE_PUT_SIGNAL(_int32, int32_t) +SHMEM_CTX_TYPE_PUT_SIGNAL(_int64, int64_t) +SHMEM_CTX_TYPE_PUT_SIGNAL(_uint8, uint8_t) +SHMEM_CTX_TYPE_PUT_SIGNAL(_uint16, uint16_t) +SHMEM_CTX_TYPE_PUT_SIGNAL(_uint32, uint32_t) +SHMEM_CTX_TYPE_PUT_SIGNAL(_uint64, uint64_t) +SHMEM_CTX_TYPE_PUT_SIGNAL(_size, size_t) +SHMEM_CTX_TYPE_PUT_SIGNAL(_ptrdiff, ptrdiff_t) + +SHMEM_TYPE_PUT_SIGNAL(_char, char) +SHMEM_TYPE_PUT_SIGNAL(_short, short) +SHMEM_TYPE_PUT_SIGNAL(_int, int) +SHMEM_TYPE_PUT_SIGNAL(_long, long) +SHMEM_TYPE_PUT_SIGNAL(_longlong, long long) +SHMEM_TYPE_PUT_SIGNAL(_schar, signed char) +SHMEM_TYPE_PUT_SIGNAL(_uchar, unsigned char) +SHMEM_TYPE_PUT_SIGNAL(_ushort, unsigned short) +SHMEM_TYPE_PUT_SIGNAL(_uint, unsigned int) +SHMEM_TYPE_PUT_SIGNAL(_ulong, unsigned long) +SHMEM_TYPE_PUT_SIGNAL(_ulonglong, unsigned long long) +SHMEM_TYPE_PUT_SIGNAL(_float, float) +SHMEM_TYPE_PUT_SIGNAL(_double, double) +SHMEM_TYPE_PUT_SIGNAL(_longdouble, long double) +SHMEM_TYPE_PUT_SIGNAL(_int8, int8_t) +SHMEM_TYPE_PUT_SIGNAL(_int16, int16_t) +SHMEM_TYPE_PUT_SIGNAL(_int32, int32_t) +SHMEM_TYPE_PUT_SIGNAL(_int64, int64_t) +SHMEM_TYPE_PUT_SIGNAL(_uint8, uint8_t) +SHMEM_TYPE_PUT_SIGNAL(_uint16, uint16_t) +SHMEM_TYPE_PUT_SIGNAL(_uint32, uint32_t) +SHMEM_TYPE_PUT_SIGNAL(_uint64, uint64_t) +SHMEM_TYPE_PUT_SIGNAL(_size, size_t) +SHMEM_TYPE_PUT_SIGNAL(_ptrdiff, ptrdiff_t) + +#define DO_SHMEM_PUTMEM_SIGNAL(ctx, dest, source, element_size, nelems, sig_addr, signal, sig_op, pe) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(dest); \ + \ + size = nelems * element_size; \ + rc = MCA_SPML_CALL(put_signal( \ + ctx, \ + (void*)dest, \ + size, \ + (void*)source, \ + sig_addr, signal, sig_op, pe)); \ + RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_PUTMEM_SIGNAL(name, element_size, prefix) \ + void prefix##_ctx##name(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe) \ + { \ + DO_SHMEM_PUTMEM_SIGNAL(ctx, dest, source, \ + element_size, nelems, sig_addr, signal, sig_op, pe); \ + return ; \ + } + +#define SHMEM_TYPE_PUTMEM_SIGNAL(name, element_size, prefix) \ + void prefix##name(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe) \ + { \ + DO_SHMEM_PUTMEM_SIGNAL(oshmem_ctx_default, dest, \ + source, element_size, nelems, sig_addr, signal, sig_op, pe); \ + return ; \ + } + +SHMEM_CTX_TYPE_PUTMEM_SIGNAL(_putmem_signal, 1, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL(_put8_signal, 1, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL(_put16_signal, 2, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL(_put32_signal, 4, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL(_put64_signal, 8, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL(_put128_signal, 16, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL(_putmem_signal, 1, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL(_put8_signal, 1, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL(_put16_signal, 2, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL(_put32_signal, 4, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL(_put64_signal, 8, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL(_put128_signal, 16, shmem) + + +uint64_t shmem_signal_fetch(const uint64_t *sig_addr) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + diff --git a/oshmem/shmem/c/shmem_put_signal_nb.c b/oshmem/shmem/c/shmem_put_signal_nb.c new file mode 100644 index 00000000000..3b39e250fcd --- /dev/null +++ b/oshmem/shmem/c/shmem_put_signal_nb.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/mca/spml/spml.h" + +#define DO_SHMEM_TYPE_PUT_SIGNAL_NBI(ctx, type, dest, source, nelems, sig_addr, signal, sig_op, pe) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(dest); \ + \ + size = nelems * sizeof(type); \ + rc = MCA_SPML_CALL(put_signal_nb( \ + ctx, \ + (void*)dest, \ + size, \ + (void*)source, \ + sig_addr, signal, sig_op, pe)); \ + RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(type_name, type) \ + void shmem_ctx##type_name##_put_signal_nbi(shmem_ctx_t ctx, type *dest, const type *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe)\ + { \ + DO_SHMEM_TYPE_PUT_SIGNAL_NBI(ctx, type, dest, source, nelems, sig_addr, signal, sig_op, pe); \ + return ; \ + } + +#define SHMEM_TYPE_PUT_SIGNAL_NBI(type_name, type) \ + void shmem##type_name##_put_signal_nbi(type *dest, const type *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe)\ + { \ + DO_SHMEM_TYPE_PUT_SIGNAL_NBI(oshmem_ctx_default, type, dest, \ + source, nelems, sig_addr, signal, sig_op, pe); \ + return ; \ + } + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +#pragma weak shmem_ctx_char_put_signal_nbi = pshmem_ctx_char_put_signal_nbi +#pragma weak shmem_ctx_short_put_signal_nbi = pshmem_ctx_short_put_signal_nbi +#pragma weak shmem_ctx_int_put_signal_nbi = pshmem_ctx_int_put_signal_nbi +#pragma weak shmem_ctx_long_put_signal_nbi = pshmem_ctx_long_put_signal_nbi +#pragma weak shmem_ctx_float_put_signal_nbi = pshmem_ctx_float_put_signal_nbi +#pragma weak shmem_ctx_double_put_signal_nbi = pshmem_ctx_double_put_signal_nbi +#pragma weak shmem_ctx_longlong_put_signal_nbi = pshmem_ctx_longlong_put_signal_nbi +#pragma weak shmem_ctx_schar_put_signal_nbi = pshmem_ctx_schar_put_signal_nbi +#pragma weak shmem_ctx_uchar_put_signal_nbi = pshmem_ctx_uchar_put_signal_nbi +#pragma weak shmem_ctx_ushort_put_signal_nbi = pshmem_ctx_ushort_put_signal_nbi +#pragma weak shmem_ctx_uint_put_signal_nbi = pshmem_ctx_uint_put_signal_nbi +#pragma weak shmem_ctx_ulong_put_signal_nbi = pshmem_ctx_ulong_put_signal_nbi +#pragma weak shmem_ctx_ulonglong_put_signal_nbi = pshmem_ctx_ulonglong_put_signal_nbi +#pragma weak shmem_ctx_longdouble_put_signal_nbi = pshmem_ctx_longdouble_put_signal_nbi +#pragma weak shmem_ctx_int8_put_signal_nbi = pshmem_ctx_int8_put_signal_nbi +#pragma weak shmem_ctx_int16_put_signal_nbi = pshmem_ctx_int16_put_signal_nbi +#pragma weak shmem_ctx_int32_put_signal_nbi = pshmem_ctx_int32_put_signal_nbi +#pragma weak shmem_ctx_int64_put_signal_nbi = pshmem_ctx_int64_put_signal_nbi +#pragma weak shmem_ctx_uint8_put_signal_nbi = pshmem_ctx_uint8_put_signal_nbi +#pragma weak shmem_ctx_uint16_put_signal_nbi = pshmem_ctx_uint16_put_signal_nbi +#pragma weak shmem_ctx_uint32_put_signal_nbi = pshmem_ctx_uint32_put_signal_nbi +#pragma weak shmem_ctx_uint64_put_signal_nbi = pshmem_ctx_uint64_put_signal_nbi +#pragma weak shmem_ctx_size_put_signal_nbi = pshmem_ctx_size_put_signal_nbi +#pragma weak shmem_ctx_ptrdiff_put_signal_nbi = pshmem_ctx_ptrdiff_put_signal_nbi + +#pragma weak shmem_char_put_signal_nbi = pshmem_char_put_signal_nbi +#pragma weak shmem_short_put_signal_nbi = pshmem_short_put_signal_nbi +#pragma weak shmem_int_put_signal_nbi = pshmem_int_put_signal_nbi +#pragma weak shmem_long_put_signal_nbi = pshmem_long_put_signal_nbi +#pragma weak shmem_float_put_signal_nbi = pshmem_float_put_signal_nbi +#pragma weak shmem_double_put_signal_nbi = pshmem_double_put_signal_nbi +#pragma weak shmem_longlong_put_signal_nbi = pshmem_longlong_put_signal_nbi +#pragma weak shmem_schar_put_signal_nbi = pshmem_schar_put_signal_nbi +#pragma weak shmem_uchar_put_signal_nbi = pshmem_uchar_put_signal_nbi +#pragma weak shmem_ushort_put_signal_nbi = pshmem_ushort_put_signal_nbi +#pragma weak shmem_uint_put_signal_nbi = pshmem_uint_put_signal_nbi +#pragma weak shmem_ulong_put_signal_nbi = pshmem_ulong_put_signal_nbi +#pragma weak shmem_ulonglong_put_signal_nbi = pshmem_ulonglong_put_signal_nbi +#pragma weak shmem_longdouble_put_signal_nbi = pshmem_longdouble_put_signal_nbi +#pragma weak shmem_int8_put_signal_nbi = pshmem_int8_put_signal_nbi +#pragma weak shmem_int16_put_signal_nbi = pshmem_int16_put_signal_nbi +#pragma weak shmem_int32_put_signal_nbi = pshmem_int32_put_signal_nbi +#pragma weak shmem_int64_put_signal_nbi = pshmem_int64_put_signal_nbi +#pragma weak shmem_uint8_put_signal_nbi = pshmem_uint8_put_signal_nbi +#pragma weak shmem_uint16_put_signal_nbi = pshmem_uint16_put_signal_nbi +#pragma weak shmem_uint32_put_signal_nbi = pshmem_uint32_put_signal_nbi +#pragma weak shmem_uint64_put_signal_nbi = pshmem_uint64_put_signal_nbi +#pragma weak shmem_size_put_signal_nbi = pshmem_size_put_signal_nbi +#pragma weak shmem_ptrdiff_put_signal_nbi = pshmem_ptrdiff_put_signal_nbi + +#pragma weak shmem_put8_signal_nbi = pshmem_put8_signal_nbi +#pragma weak shmem_put16_signal_nbi = pshmem_put16_signal_nbi +#pragma weak shmem_put32_signal_nbi = pshmem_put32_signal_nbi +#pragma weak shmem_put64_signal_nbi = pshmem_put64_signal_nbi +#pragma weak shmem_put128_signal_nbi = pshmem_put128_signal_nbi + +#pragma weak shmem_ctx_put8_signal_nbi = pshmem_ctx_put8_signal_nbi +#pragma weak shmem_ctx_put16_signal_nbi = pshmem_ctx_put16_signal_nbi +#pragma weak shmem_ctx_put32_signal_nbi = pshmem_ctx_put32_signal_nbi +#pragma weak shmem_ctx_put64_signal_nbi = pshmem_ctx_put64_signal_nbi +#pragma weak shmem_ctx_put128_signal_nbi = pshmem_ctx_put128_signal_nbi + +#pragma weak shmem_putmem_signal_nbi = pshmem_putmem_signal_nbi +#pragma weak shmem_ctx_putmem_signal_nbi = pshmem_ctx_putmem_signal_nbi + + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_char, char) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_short, short) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_int, int) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_long, long) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_longlong, long long) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_schar, signed char) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_uchar, unsigned char) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_ushort, unsigned short) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_uint, unsigned int) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_ulong, unsigned long) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_ulonglong, unsigned long long) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_float, float) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_double, double) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_longdouble, long double) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_int8, int8_t) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_int16, int16_t) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_int32, int32_t) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_int64, int64_t) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_uint8, uint8_t) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_uint16, uint16_t) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_uint32, uint32_t) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_uint64, uint64_t) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_size, size_t) +SHMEM_CTX_TYPE_PUT_SIGNAL_NBI(_ptrdiff, ptrdiff_t) + +SHMEM_TYPE_PUT_SIGNAL_NBI(_char, char) +SHMEM_TYPE_PUT_SIGNAL_NBI(_short, short) +SHMEM_TYPE_PUT_SIGNAL_NBI(_int, int) +SHMEM_TYPE_PUT_SIGNAL_NBI(_long, long) +SHMEM_TYPE_PUT_SIGNAL_NBI(_longlong, long long) +SHMEM_TYPE_PUT_SIGNAL_NBI(_schar, signed char) +SHMEM_TYPE_PUT_SIGNAL_NBI(_uchar, unsigned char) +SHMEM_TYPE_PUT_SIGNAL_NBI(_ushort, unsigned short) +SHMEM_TYPE_PUT_SIGNAL_NBI(_uint, unsigned int) +SHMEM_TYPE_PUT_SIGNAL_NBI(_ulong, unsigned long) +SHMEM_TYPE_PUT_SIGNAL_NBI(_ulonglong, unsigned long long) +SHMEM_TYPE_PUT_SIGNAL_NBI(_float, float) +SHMEM_TYPE_PUT_SIGNAL_NBI(_double, double) +SHMEM_TYPE_PUT_SIGNAL_NBI(_longdouble, long double) +SHMEM_TYPE_PUT_SIGNAL_NBI(_int8, int8_t) +SHMEM_TYPE_PUT_SIGNAL_NBI(_int16, int16_t) +SHMEM_TYPE_PUT_SIGNAL_NBI(_int32, int32_t) +SHMEM_TYPE_PUT_SIGNAL_NBI(_int64, int64_t) +SHMEM_TYPE_PUT_SIGNAL_NBI(_uint8, uint8_t) +SHMEM_TYPE_PUT_SIGNAL_NBI(_uint16, uint16_t) +SHMEM_TYPE_PUT_SIGNAL_NBI(_uint32, uint32_t) +SHMEM_TYPE_PUT_SIGNAL_NBI(_uint64, uint64_t) +SHMEM_TYPE_PUT_SIGNAL_NBI(_size, size_t) +SHMEM_TYPE_PUT_SIGNAL_NBI(_ptrdiff, ptrdiff_t) + +#define DO_SHMEM_PUTMEM_SIGNAL_NBI(ctx, dest, source, element_size, nelems, sig_addr, signal, sig_op, pe) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(dest); \ + \ + size = nelems * element_size; \ + rc = MCA_SPML_CALL(put_signal_nb( \ + ctx, \ + (void*)dest, \ + size, \ + (void*)source, \ + sig_addr, signal, sig_op, pe)); \ + RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_PUTMEM_SIGNAL_NBI(name, element_size, prefix) \ + void prefix##_ctx##name(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe) \ + { \ + DO_SHMEM_PUTMEM_SIGNAL_NBI(ctx, dest, source, \ + element_size, nelems, sig_addr, signal, sig_op, pe); \ + return ; \ + } + +#define SHMEM_TYPE_PUTMEM_SIGNAL_NBI(name, element_size, prefix) \ + void prefix##name(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe) \ + { \ + DO_SHMEM_PUTMEM_SIGNAL_NBI(oshmem_ctx_default, dest, \ + source, element_size, nelems, sig_addr, signal, sig_op, pe); \ + return ; \ + } + +SHMEM_CTX_TYPE_PUTMEM_SIGNAL_NBI(_putmem_signal_nbi, 1, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL_NBI(_put8_signal_nbi, 1, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL_NBI(_put16_signal_nbi, 2, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL_NBI(_put32_signal_nbi, 4, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL_NBI(_put64_signal_nbi, 8, shmem) +SHMEM_CTX_TYPE_PUTMEM_SIGNAL_NBI(_put128_signal_nbi, 16, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL_NBI(_putmem_signal_nbi, 1, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL_NBI(_put8_signal_nbi, 1, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL_NBI(_put16_signal_nbi, 2, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL_NBI(_put32_signal_nbi, 4, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL_NBI(_put64_signal_nbi, 8, shmem) +SHMEM_TYPE_PUTMEM_SIGNAL_NBI(_put128_signal_nbi, 16, shmem) + diff --git a/oshmem/shmem/c/shmem_query.c b/oshmem/shmem/c/shmem_query.c index fd3f1771d94..68809619ac3 100644 --- a/oshmem/shmem/c/shmem_query.c +++ b/oshmem/shmem/c/shmem_query.c @@ -23,7 +23,7 @@ #pragma weak shmem_query_thread = pshmem_query_thread #pragma weak _num_pes = p_num_pes #pragma weak _my_pe = p_my_pe -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif int _num_pes(void) diff --git a/oshmem/shmem/c/shmem_quiet.c b/oshmem/shmem/c/shmem_quiet.c index e27b273d30b..75d983f7d7b 100644 --- a/oshmem/shmem/c/shmem_quiet.c +++ b/oshmem/shmem/c/shmem_quiet.c @@ -18,7 +18,7 @@ #include "oshmem/include/pshmem.h" #pragma weak shmem_quiet = pshmem_quiet #pragma weak shmem_ctx_quiet = pshmem_ctx_quiet -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_quiet(void) diff --git a/oshmem/shmem/c/shmem_realloc.c b/oshmem/shmem/c/shmem_realloc.c index a37ae7e6c4b..c637e05c7a9 100644 --- a/oshmem/shmem/c/shmem_realloc.c +++ b/oshmem/shmem/c/shmem_realloc.c @@ -24,7 +24,7 @@ #include "oshmem/include/pshmem.h" #pragma weak shmem_realloc = pshmem_realloc #pragma weak shrealloc = pshrealloc -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif static inline void* _shrealloc(void *ptr, size_t size); diff --git a/oshmem/shmem/c/shmem_reduce.c b/oshmem/shmem/c/shmem_reduce.c index 02c47023ce7..dc4bacf1efc 100644 --- a/oshmem/shmem/c/shmem_reduce.c +++ b/oshmem/shmem/c/shmem_reduce.c @@ -137,7 +137,173 @@ #pragma weak shmemx_int16_prod_to_all = pshmemx_int16_prod_to_all #pragma weak shmemx_int32_prod_to_all = pshmemx_int32_prod_to_all #pragma weak shmemx_int64_prod_to_all = pshmemx_int64_prod_to_all -#include "oshmem/shmem/c/profile/defines.h" + +/* Teams reduction: AND */ +#pragma weak shmem_uchar_and_reduce = pshmem_uchar_and_reduce +#pragma weak shmem_ushort_and_reduce = pshmem_ushort_and_reduce +#pragma weak shmem_uint_and_reduce = pshmem_uint_and_reduce +#pragma weak shmem_ulong_and_reduce = pshmem_ulong_and_reduce +#pragma weak shmem_ulonglong_and_reduce = pshmem_ulonglong_and_reduce +#pragma weak shmem_int_and_reduce = pshmem_int_and_reduce +#pragma weak shmem_longlong_and_reduce = pshmem_longlong_and_reduce +#pragma weak shmem_int8_and_reduce = pshmem_int8_and_reduce +#pragma weak shmem_int16_and_reduce = pshmem_int16_and_reduce +#pragma weak shmem_int32_and_reduce = pshmem_int32_and_reduce +#pragma weak shmem_int64_and_reduce = pshmem_int64_and_reduce +#pragma weak shmem_uint8_and_reduce = pshmem_uint8_and_reduce +#pragma weak shmem_uint16_and_reduce = pshmem_uint16_and_reduce +#pragma weak shmem_uint32_and_reduce = pshmem_uint32_and_reduce +#pragma weak shmem_uint64_and_reduce = pshmem_uint64_and_reduce +#pragma weak shmem_size_and_reduce = pshmem_size_and_reduce + +/* Teams reduction: OR */ +#pragma weak shmem_uchar_or_reduce = pshmem_uchar_or_reduce +#pragma weak shmem_ushort_or_reduce = pshmem_ushort_or_reduce +#pragma weak shmem_uint_or_reduce = pshmem_uint_or_reduce +#pragma weak shmem_ulong_or_reduce = pshmem_ulong_or_reduce +#pragma weak shmem_ulonglong_or_reduce = pshmem_ulonglong_or_reduce +#pragma weak shmem_int8_or_reduce = pshmem_int8_or_reduce +#pragma weak shmem_int16_or_reduce = pshmem_int16_or_reduce +#pragma weak shmem_int32_or_reduce = pshmem_int32_or_reduce +#pragma weak shmem_int64_or_reduce = pshmem_int64_or_reduce +#pragma weak shmem_uint8_or_reduce = pshmem_uint8_or_reduce +#pragma weak shmem_uint16_or_reduce = pshmem_uint16_or_reduce +#pragma weak shmem_uint32_or_reduce = pshmem_uint32_or_reduce +#pragma weak shmem_uint64_or_reduce = pshmem_uint64_or_reduce +#pragma weak shmem_size_or_reduce = pshmem_size_or_reduce + + +/* Teams reduction: XOR */ +#pragma weak shmem_uchar_xor_reduce = pshmem_uchar_xor_reduce +#pragma weak shmem_ushort_xor_reduce = pshmem_ushort_xor_reduce +#pragma weak shmem_uint_xor_reduce = pshmem_uint_xor_reduce +#pragma weak shmem_ulong_xor_reduce = pshmem_ulong_xor_reduce +#pragma weak shmem_ulonglong_xor_reduce = pshmem_ulonglong_xor_reduce +#pragma weak shmem_int8_xor_reduce = pshmem_int8_xor_reduce +#pragma weak shmem_int16_xor_reduce = pshmem_int16_xor_reduce +#pragma weak shmem_int32_xor_reduce = pshmem_int32_xor_reduce +#pragma weak shmem_int64_xor_reduce = pshmem_int64_xor_reduce +#pragma weak shmem_uint8_xor_reduce = pshmem_uint8_xor_reduce +#pragma weak shmem_uint16_xor_reduce = pshmem_uint16_xor_reduce +#pragma weak shmem_uint32_xor_reduce = pshmem_uint32_xor_reduce +#pragma weak shmem_uint64_xor_reduce = pshmem_uint64_xor_reduce +#pragma weak shmem_size_xor_reduce = pshmem_size_xor_reduce + + +/* Teams reduction: MAX */ +#pragma weak shmem_char_max_reduce = pshmem_char_max_reduce +#pragma weak shmem_short_max_reduce = pshmem_short_max_reduce +#pragma weak shmem_int_max_reduce = pshmem_int_max_reduce +#pragma weak shmem_long_max_reduce = pshmem_long_max_reduce +#pragma weak shmem_float_max_reduce = pshmem_float_max_reduce +#pragma weak shmem_double_max_reduce = pshmem_double_max_reduce +#pragma weak shmem_longlong_max_reduce = pshmem_longlong_max_reduce +#pragma weak shmem_schar_max_reduce = pshmem_schar_max_reduce +#pragma weak shmem_longdouble_max_reduce = pshmem_longdouble_max_reduce +#pragma weak shmem_ptrdiff_max_reduce = pshmem_ptrdiff_max_reduce +#pragma weak shmem_uchar_max_reduce = pshmem_uchar_max_reduce +#pragma weak shmem_ushort_max_reduce = pshmem_ushort_max_reduce +#pragma weak shmem_uint_max_reduce = pshmem_uint_max_reduce +#pragma weak shmem_ulong_max_reduce = pshmem_ulong_max_reduce +#pragma weak shmem_ulonglong_max_reduce = pshmem_ulonglong_max_reduce +#pragma weak shmem_int8_max_reduce = pshmem_int8_max_reduce +#pragma weak shmem_int16_max_reduce = pshmem_int16_max_reduce +#pragma weak shmem_int32_max_reduce = pshmem_int32_max_reduce +#pragma weak shmem_int64_max_reduce = pshmem_int64_max_reduce +#pragma weak shmem_uint8_max_reduce = pshmem_uint8_max_reduce +#pragma weak shmem_uint16_max_reduce = pshmem_uint16_max_reduce +#pragma weak shmem_uint32_max_reduce = pshmem_uint32_max_reduce +#pragma weak shmem_uint64_max_reduce = pshmem_uint64_max_reduce +#pragma weak shmem_size_max_reduce = pshmem_size_max_reduce + + +/* Teams reduction: MIN */ +#pragma weak shmem_char_min_reduce = pshmem_char_min_reduce +#pragma weak shmem_short_min_reduce = pshmem_short_min_reduce +#pragma weak shmem_int_min_reduce = pshmem_int_min_reduce +#pragma weak shmem_long_min_reduce = pshmem_long_min_reduce +#pragma weak shmem_float_min_reduce = pshmem_float_min_reduce +#pragma weak shmem_double_min_reduce = pshmem_double_min_reduce +#pragma weak shmem_longlong_min_reduce = pshmem_longlong_min_reduce +#pragma weak shmem_schar_min_reduce = pshmem_schar_min_reduce +#pragma weak shmem_longdouble_min_reduce = pshmem_longdouble_min_reduce +#pragma weak shmem_ptrdiff_min_reduce = pshmem_ptrdiff_min_reduce +#pragma weak shmem_uchar_min_reduce = pshmem_uchar_min_reduce +#pragma weak shmem_ushort_min_reduce = pshmem_ushort_min_reduce +#pragma weak shmem_uint_min_reduce = pshmem_uint_min_reduce +#pragma weak shmem_ulong_min_reduce = pshmem_ulong_min_reduce +#pragma weak shmem_ulonglong_min_reduce = pshmem_ulonglong_min_reduce +#pragma weak shmem_int8_min_reduce = pshmem_int8_min_reduce +#pragma weak shmem_int16_min_reduce = pshmem_int16_min_reduce +#pragma weak shmem_int32_min_reduce = pshmem_int32_min_reduce +#pragma weak shmem_int64_min_reduce = pshmem_int64_min_reduce +#pragma weak shmem_uint8_min_reduce = pshmem_uint8_min_reduce +#pragma weak shmem_uint16_min_reduce = pshmem_uint16_min_reduce +#pragma weak shmem_uint32_min_reduce = pshmem_uint32_min_reduce +#pragma weak shmem_uint64_min_reduce = pshmem_uint64_min_reduce +#pragma weak shmem_size_min_reduce = pshmem_size_min_reduce + + +/* Teams reduction: SUM */ +#pragma weak shmem_char_sum_reduce = pshmem_char_sum_reduce +#pragma weak shmem_short_sum_reduce = pshmem_short_sum_reduce +#pragma weak shmem_int_sum_reduce = pshmem_int_sum_reduce +#pragma weak shmem_long_sum_reduce = pshmem_long_sum_reduce +#pragma weak shmem_float_sum_reduce = pshmem_float_sum_reduce +#pragma weak shmem_double_sum_reduce = pshmem_double_sum_reduce +#pragma weak shmem_longlong_sum_reduce = pshmem_longlong_sum_reduce +#pragma weak shmem_schar_sum_reduce = pshmem_schar_sum_reduce +#pragma weak shmem_longdouble_sum_reduce = pshmem_longdouble_sum_reduce +#pragma weak shmem_ptrdiff_sum_reduce = pshmem_ptrdiff_sum_reduce +#pragma weak shmem_uchar_sum_reduce = pshmem_uchar_sum_reduce +#pragma weak shmem_ushort_sum_reduce = pshmem_ushort_sum_reduce +#pragma weak shmem_uint_sum_reduce = pshmem_uint_sum_reduce +#pragma weak shmem_ulong_sum_reduce = pshmem_ulong_sum_reduce +#pragma weak shmem_ulonglong_sum_reduce = pshmem_ulonglong_sum_reduce +#pragma weak shmem_int8_sum_reduce = pshmem_int8_sum_reduce +#pragma weak shmem_int16_sum_reduce = pshmem_int16_sum_reduce +#pragma weak shmem_int32_sum_reduce = pshmem_int32_sum_reduce +#pragma weak shmem_int64_sum_reduce = pshmem_int64_sum_reduce +#pragma weak shmem_uint8_sum_reduce = pshmem_uint8_sum_reduce +#pragma weak shmem_uint16_sum_reduce = pshmem_uint16_sum_reduce +#pragma weak shmem_uint32_sum_reduce = pshmem_uint32_sum_reduce +#pragma weak shmem_uint64_sum_reduce = pshmem_uint64_sum_reduce +#pragma weak shmem_size_sum_reduce = pshmem_size_sum_reduce +#pragma weak shmem_complexd_sum_reduce = pshmem_complexd_sum_reduce +#pragma weak shmem_complexf_sum_reduce = pshmem_complexf_sum_reduce + + +/* Teams reduction: PROD */ +#pragma weak shmem_char_prod_reduce = pshmem_char_prod_reduce +#pragma weak shmem_short_prod_reduce = pshmem_short_prod_reduce +#pragma weak shmem_int_prod_reduce = pshmem_int_prod_reduce +#pragma weak shmem_long_prod_reduce = pshmem_long_prod_reduce +#pragma weak shmem_float_prod_reduce = pshmem_float_prod_reduce +#pragma weak shmem_double_prod_reduce = pshmem_double_prod_reduce +#pragma weak shmem_longlong_prod_reduce = pshmem_longlong_prod_reduce +#pragma weak shmem_schar_prod_reduce = pshmem_schar_prod_reduce +#pragma weak shmem_longdouble_prod_reduce = pshmem_longdouble_prod_reduce +#pragma weak shmem_ptrdiff_prod_reduce = pshmem_ptrdiff_prod_reduce +#pragma weak shmem_uchar_prod_reduce = pshmem_uchar_prod_reduce +#pragma weak shmem_ushort_prod_reduce = pshmem_ushort_prod_reduce +#pragma weak shmem_uint_prod_reduce = pshmem_uint_prod_reduce +#pragma weak shmem_ulong_prod_reduce = pshmem_ulong_prod_reduce +#pragma weak shmem_ulonglong_prod_reduce = pshmem_ulonglong_prod_reduce +#pragma weak shmem_int8_prod_reduce = pshmem_int8_prod_reduce +#pragma weak shmem_int16_prod_reduce = pshmem_int16_prod_reduce +#pragma weak shmem_int32_prod_reduce = pshmem_int32_prod_reduce +#pragma weak shmem_int64_prod_reduce = pshmem_int64_prod_reduce +#pragma weak shmem_uint8_prod_reduce = pshmem_uint8_prod_reduce +#pragma weak shmem_uint16_prod_reduce = pshmem_uint16_prod_reduce +#pragma weak shmem_uint32_prod_reduce = pshmem_uint32_prod_reduce +#pragma weak shmem_uint64_prod_reduce = pshmem_uint64_prod_reduce +#pragma weak shmem_size_prod_reduce = pshmem_size_prod_reduce +#pragma weak shmem_complexd_prod_reduce = pshmem_complexd_prod_reduce +#pragma weak shmem_complexf_prod_reduce = pshmem_complexf_prod_reduce + + + +#include "oshmem/shmem/c/profile-defines.h" #endif /* OSHMEM_PROFILING */ SHMEM_TYPE_REDUCE_OP(and, _short, short, shmem) @@ -211,3 +377,184 @@ SHMEM_TYPE_REDUCE_OP(prod, _complexd, double complex, shmem) SHMEM_TYPE_REDUCE_OP(prod, _int16, int16_t, shmemx) SHMEM_TYPE_REDUCE_OP(prod, _int32, int32_t, shmemx) SHMEM_TYPE_REDUCE_OP(prod, _int64, int64_t, shmemx) + + +#define SHMEM_TYPE_TEAM_REDUCE_OP(_op, type_name, type, op_code, code) \ + int shmem##type_name##_##_op##_reduce( shmem_team_t team, type *dest, const type *source, size_t nreduce) \ +{ \ + int rc = OSHMEM_SUCCESS; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_ADDR_SIZE(dest, nreduce); \ + RUNTIME_CHECK_ADDR_SIZE(source, nreduce); \ + \ + { \ + \ + /* Call collective reduce operation */ \ + rc = MCA_SPML_CALL(team_reduce( \ + team, (void*)dest, (void*)source, nreduce, op_code, code)); \ + \ + } \ + RUNTIME_CHECK_RC(rc); \ + \ + return rc; \ +} + +SHMEM_TYPE_TEAM_REDUCE_OP(and, _uchar, unsigned char, OSHMEM_OP_AND, SHMEM_UCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _ushort, unsigned short, OSHMEM_OP_AND, SHMEM_USHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _uint, unsigned int, OSHMEM_OP_AND, SHMEM_UINT) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _ulong, unsigned long, OSHMEM_OP_AND, SHMEM_ULONG) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _ulonglong, unsigned long long, OSHMEM_OP_AND, SHMEM_ULLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _int, int, OSHMEM_OP_AND, SHMEM_INT) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _longlong, long long, OSHMEM_OP_AND, SHMEM_LLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _int8, int8_t, OSHMEM_OP_AND, SHMEM_INT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _int16, int16_t, OSHMEM_OP_AND, SHMEM_INT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _int32, int32_t, OSHMEM_OP_AND, SHMEM_INT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _int64, int64_t, OSHMEM_OP_AND, SHMEM_INT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _uint8, uint8_t, OSHMEM_OP_AND, SHMEM_UINT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _uint16, uint16_t, OSHMEM_OP_AND, SHMEM_UINT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _uint32, uint32_t, OSHMEM_OP_AND, SHMEM_UINT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _uint64, uint64_t, OSHMEM_OP_AND, SHMEM_UINT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(and, _size, size_t, OSHMEM_OP_AND, SHMEM_SIZE_T) + +SHMEM_TYPE_TEAM_REDUCE_OP(or, _uchar, unsigned char, OSHMEM_OP_OR, SHMEM_UCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _ushort, unsigned short, OSHMEM_OP_OR, SHMEM_USHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _uint, unsigned int, OSHMEM_OP_OR, SHMEM_UINT) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _ulong, unsigned long, OSHMEM_OP_OR, SHMEM_ULONG) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _ulonglong, unsigned long long, OSHMEM_OP_OR, SHMEM_ULLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _int, int, OSHMEM_OP_OR, SHMEM_INT) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _longlong, long long, OSHMEM_OP_OR, SHMEM_LLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _int8, int8_t, OSHMEM_OP_OR, SHMEM_INT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _int16, int16_t, OSHMEM_OP_OR, SHMEM_INT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _int32, int32_t, OSHMEM_OP_OR, SHMEM_INT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _int64, int64_t, OSHMEM_OP_OR, SHMEM_INT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _uint8, uint8_t, OSHMEM_OP_OR, SHMEM_UINT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _uint16, uint16_t, OSHMEM_OP_OR, SHMEM_UINT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _uint32, uint32_t, OSHMEM_OP_OR, SHMEM_UINT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _uint64, uint64_t, OSHMEM_OP_OR, SHMEM_UINT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(or, _size, size_t, OSHMEM_OP_OR, SHMEM_SIZE_T) + +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _uchar, unsigned char, OSHMEM_OP_XOR, SHMEM_UCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _ushort, unsigned short, OSHMEM_OP_XOR, SHMEM_USHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _uint, unsigned int, OSHMEM_OP_XOR, SHMEM_UINT) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _ulong, unsigned long, OSHMEM_OP_XOR, SHMEM_ULONG) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _ulonglong, unsigned long long, OSHMEM_OP_XOR, SHMEM_ULLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _int, int, OSHMEM_OP_XOR, SHMEM_INT) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _longlong, long long, OSHMEM_OP_XOR, SHMEM_LLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _int8, int8_t, OSHMEM_OP_XOR, SHMEM_INT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _int16, int16_t, OSHMEM_OP_XOR, SHMEM_INT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _int32, int32_t, OSHMEM_OP_XOR, SHMEM_INT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _int64, int64_t, OSHMEM_OP_XOR, SHMEM_INT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _uint8, uint8_t, OSHMEM_OP_XOR, SHMEM_UINT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _uint16, uint16_t, OSHMEM_OP_XOR, SHMEM_UINT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _uint32, uint32_t, OSHMEM_OP_XOR, SHMEM_UINT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _uint64, uint64_t, OSHMEM_OP_XOR, SHMEM_UINT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(xor, _size, size_t, OSHMEM_OP_XOR, SHMEM_SIZE_T) + + + +SHMEM_TYPE_TEAM_REDUCE_OP(max, _uchar, unsigned char, OSHMEM_OP_MAX, SHMEM_UCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _ushort, unsigned short, OSHMEM_OP_MAX, SHMEM_USHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _uint, unsigned int, OSHMEM_OP_MAX, SHMEM_UINT) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _ulong, unsigned long, OSHMEM_OP_MAX, SHMEM_ULONG) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _ulonglong, unsigned long long, OSHMEM_OP_MAX, SHMEM_ULLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _int8, int8_t, OSHMEM_OP_MAX, SHMEM_INT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _int16, int16_t, OSHMEM_OP_MAX, SHMEM_INT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _int32, int32_t, OSHMEM_OP_MAX, SHMEM_INT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _int64, int64_t, OSHMEM_OP_MAX, SHMEM_INT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _uint8, uint8_t, OSHMEM_OP_MAX, SHMEM_UINT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _uint16, uint16_t, OSHMEM_OP_MAX, SHMEM_UINT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _uint32, uint32_t, OSHMEM_OP_MAX, SHMEM_UINT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _uint64, uint64_t, OSHMEM_OP_MAX, SHMEM_UINT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _size, size_t, OSHMEM_OP_MAX, SHMEM_SIZE_T) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _char, char, OSHMEM_OP_MAX, SHMEM_CHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _short, short, OSHMEM_OP_MAX, SHMEM_SHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _int, int, OSHMEM_OP_MAX, SHMEM_INT) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _long, long, OSHMEM_OP_MAX, SHMEM_LONG) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _float, float, OSHMEM_OP_MAX, SHMEM_FLOAT) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _double, double, OSHMEM_OP_MAX, SHMEM_DOUBLE) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _longlong, long long, OSHMEM_OP_MAX, SHMEM_LLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _schar, signed char, OSHMEM_OP_MAX, SHMEM_SCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _longdouble, long double, OSHMEM_OP_MAX, SHMEM_LDOUBLE) +SHMEM_TYPE_TEAM_REDUCE_OP(max, _ptrdiff, ptrdiff_t, OSHMEM_OP_MAX, SHMEM_PTRDIFF_T) + + +SHMEM_TYPE_TEAM_REDUCE_OP(min, _uchar, unsigned char, OSHMEM_OP_MIN, SHMEM_UCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _ushort, unsigned short, OSHMEM_OP_MIN, SHMEM_USHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _uint, unsigned int, OSHMEM_OP_MIN, SHMEM_UINT) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _ulong, unsigned long, OSHMEM_OP_MIN, SHMEM_ULONG) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _ulonglong, unsigned long long, OSHMEM_OP_MIN, SHMEM_ULLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _int8, int8_t, OSHMEM_OP_MIN, SHMEM_INT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _int16, int16_t, OSHMEM_OP_MIN, SHMEM_INT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _int32, int32_t, OSHMEM_OP_MIN, SHMEM_INT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _int64, int64_t, OSHMEM_OP_MIN, SHMEM_INT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _uint8, uint8_t, OSHMEM_OP_MIN, SHMEM_UINT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _uint16, uint16_t, OSHMEM_OP_MIN, SHMEM_UINT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _uint32, uint32_t, OSHMEM_OP_MIN, SHMEM_UINT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _uint64, uint64_t, OSHMEM_OP_MIN, SHMEM_UINT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _size, size_t, OSHMEM_OP_MIN, SHMEM_SIZE_T) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _char, char, OSHMEM_OP_MIN, SHMEM_CHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _short, short, OSHMEM_OP_MIN, SHMEM_SHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _int, int, OSHMEM_OP_MIN, SHMEM_INT) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _long, long, OSHMEM_OP_MIN, SHMEM_LONG) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _float, float, OSHMEM_OP_MIN, SHMEM_FLOAT) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _double, double, OSHMEM_OP_MIN, SHMEM_DOUBLE) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _longlong, long long, OSHMEM_OP_MIN, SHMEM_LLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _schar, signed char, OSHMEM_OP_MIN, SHMEM_SCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _longdouble, long double, OSHMEM_OP_MIN, SHMEM_LDOUBLE) +SHMEM_TYPE_TEAM_REDUCE_OP(min, _ptrdiff, ptrdiff_t, OSHMEM_OP_MIN, SHMEM_PTRDIFF_T) + + +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _uchar, unsigned char, OSHMEM_OP_SUM, SHMEM_UCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _ushort, unsigned short, OSHMEM_OP_SUM, SHMEM_USHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _uint, unsigned int, OSHMEM_OP_SUM, SHMEM_UINT) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _ulong, unsigned long, OSHMEM_OP_SUM, SHMEM_ULONG) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _ulonglong, unsigned long long, OSHMEM_OP_SUM, SHMEM_ULLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _int8, int8_t, OSHMEM_OP_SUM, SHMEM_INT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _int16, int16_t, OSHMEM_OP_SUM, SHMEM_INT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _int32, int32_t, OSHMEM_OP_SUM, SHMEM_INT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _int64, int64_t, OSHMEM_OP_SUM, SHMEM_INT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _uint8, uint8_t, OSHMEM_OP_SUM, SHMEM_UINT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _uint16, uint16_t, OSHMEM_OP_SUM, SHMEM_UINT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _uint32, uint32_t, OSHMEM_OP_SUM, SHMEM_UINT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _uint64, uint64_t, OSHMEM_OP_SUM, SHMEM_UINT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _size, size_t, OSHMEM_OP_SUM, SHMEM_SIZE_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _char, char, OSHMEM_OP_SUM, SHMEM_CHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _short, short, OSHMEM_OP_SUM, SHMEM_SHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _int, int, OSHMEM_OP_SUM, SHMEM_INT) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _long, long, OSHMEM_OP_SUM, SHMEM_LONG) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _float, float, OSHMEM_OP_SUM, SHMEM_FLOAT) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _double, double, OSHMEM_OP_SUM, SHMEM_DOUBLE) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _longlong, long long, OSHMEM_OP_SUM, SHMEM_LLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _schar, signed char, OSHMEM_OP_SUM, SHMEM_SCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _longdouble, long double, OSHMEM_OP_SUM, SHMEM_LDOUBLE) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _ptrdiff, ptrdiff_t, OSHMEM_OP_SUM, SHMEM_PTRDIFF_T) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _complexd, double complex, OSHMEM_OP_SUM, SHMEM_COMPLEXD) +SHMEM_TYPE_TEAM_REDUCE_OP(sum, _complexf, float complex, OSHMEM_OP_SUM, SHMEM_COMPLEXF) + +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _uchar, unsigned char, OSHMEM_OP_PROD, SHMEM_UCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _ushort, unsigned short, OSHMEM_OP_PROD, SHMEM_USHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _uint, unsigned int, OSHMEM_OP_PROD, SHMEM_UINT) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _ulong, unsigned long, OSHMEM_OP_PROD, SHMEM_ULONG) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _ulonglong, unsigned long long, OSHMEM_OP_PROD, SHMEM_ULLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _int8, int8_t, OSHMEM_OP_PROD, SHMEM_INT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _int16, int16_t, OSHMEM_OP_PROD, SHMEM_INT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _int32, int32_t, OSHMEM_OP_PROD, SHMEM_INT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _int64, int64_t, OSHMEM_OP_PROD, SHMEM_INT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _uint8, uint8_t, OSHMEM_OP_PROD, SHMEM_UINT8_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _uint16, uint16_t, OSHMEM_OP_PROD, SHMEM_UINT16_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _uint32, uint32_t, OSHMEM_OP_PROD, SHMEM_UINT32_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _uint64, uint64_t, OSHMEM_OP_PROD, SHMEM_UINT64_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _size, size_t, OSHMEM_OP_PROD, SHMEM_SIZE_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _char, char, OSHMEM_OP_PROD, SHMEM_CHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _short, short, OSHMEM_OP_PROD, SHMEM_SHORT) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _int, int, OSHMEM_OP_PROD, SHMEM_INT) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _long, long, OSHMEM_OP_PROD, SHMEM_LONG) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _float, float, OSHMEM_OP_PROD, SHMEM_FLOAT) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _double, double, OSHMEM_OP_PROD, SHMEM_DOUBLE) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _longlong, long long, OSHMEM_OP_PROD, SHMEM_LLONG) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _schar, signed char, OSHMEM_OP_PROD, SHMEM_SCHAR) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _longdouble, long double, OSHMEM_OP_PROD, SHMEM_LDOUBLE) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _ptrdiff, ptrdiff_t, OSHMEM_OP_PROD, SHMEM_PTRDIFF_T) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _complexd, double complex, OSHMEM_OP_PROD, SHMEM_COMPLEXD) +SHMEM_TYPE_TEAM_REDUCE_OP(prod, _complexf, float complex, OSHMEM_OP_PROD, SHMEM_COMPLEXF) diff --git a/oshmem/shmem/c/shmem_set.c b/oshmem/shmem/c/shmem_set.c index 30fef842cb3..268ba8221bd 100644 --- a/oshmem/shmem/c/shmem_set.c +++ b/oshmem/shmem/c/shmem_set.c @@ -68,6 +68,12 @@ #pragma weak shmem_ctx_ulonglong_atomic_set = pshmem_ctx_ulonglong_atomic_set #pragma weak shmem_ctx_float_atomic_set = pshmem_ctx_float_atomic_set #pragma weak shmem_ctx_double_atomic_set = pshmem_ctx_double_atomic_set +#pragma weak shmem_ctx_int32_atomic_set = pshmem_ctx_int32_atomic_set +#pragma weak shmem_ctx_int64_atomic_set = pshmem_ctx_int64_atomic_set +#pragma weak shmem_ctx_uint32_atomic_set = pshmem_ctx_uint32_atomic_set +#pragma weak shmem_ctx_uint64_atomic_set = pshmem_ctx_uint64_atomic_set +#pragma weak shmem_ctx_size_atomic_set = pshmem_ctx_size_atomic_set +#pragma weak shmem_ctx_ptrdiff_atomic_set = pshmem_ctx_ptrdiff_atomic_set #pragma weak shmem_int_atomic_set = pshmem_int_atomic_set #pragma weak shmem_long_atomic_set = pshmem_long_atomic_set @@ -77,6 +83,12 @@ #pragma weak shmem_ulonglong_atomic_set = pshmem_ulonglong_atomic_set #pragma weak shmem_float_atomic_set = pshmem_float_atomic_set #pragma weak shmem_double_atomic_set = pshmem_double_atomic_set +#pragma weak shmem_int32_atomic_set = pshmem_int32_atomic_set +#pragma weak shmem_int64_atomic_set = pshmem_int64_atomic_set +#pragma weak shmem_uint32_atomic_set = pshmem_uint32_atomic_set +#pragma weak shmem_uint64_atomic_set = pshmem_uint64_atomic_set +#pragma weak shmem_size_atomic_set = pshmem_size_atomic_set +#pragma weak shmem_ptrdiff_atomic_set = pshmem_ptrdiff_atomic_set #pragma weak shmem_int_set = pshmem_int_set #pragma weak shmem_long_set = pshmem_long_set @@ -86,7 +98,7 @@ #pragma weak shmemx_int32_set = pshmemx_int32_set #pragma weak shmemx_int64_set = pshmemx_int64_set -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_ATOMIC_SET(_int, int, shmem) @@ -97,6 +109,12 @@ SHMEM_CTX_TYPE_ATOMIC_SET(_ulong, unsigned long, shmem) SHMEM_CTX_TYPE_ATOMIC_SET(_ulonglong, unsigned long long, shmem) SHMEM_CTX_TYPE_ATOMIC_SET(_float, float, shmem) SHMEM_CTX_TYPE_ATOMIC_SET(_double, double, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_ptrdiff, ptrdiff_t, shmem) SHMEM_TYPE_ATOMIC_SET(_int, int, shmem) SHMEM_TYPE_ATOMIC_SET(_long, long, shmem) SHMEM_TYPE_ATOMIC_SET(_longlong, long long, shmem) @@ -105,6 +123,12 @@ SHMEM_TYPE_ATOMIC_SET(_ulong, unsigned long, shmem) SHMEM_TYPE_ATOMIC_SET(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_ATOMIC_SET(_float, float, shmem) SHMEM_TYPE_ATOMIC_SET(_double, double, shmem) +SHMEM_TYPE_ATOMIC_SET(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_SET(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_SET(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_SET(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_SET(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_SET(_ptrdiff, ptrdiff_t, shmem) /* deprecated APIs */ #define SHMEM_TYPE_SET(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_set_cache_inv.c b/oshmem/shmem/c/shmem_set_cache_inv.c index 33f80f53c16..8c3e00f13fb 100644 --- a/oshmem/shmem/c/shmem_set_cache_inv.c +++ b/oshmem/shmem/c/shmem_set_cache_inv.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,17 +20,17 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_set_cache_inv = pshmem_set_cache_inv -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_set_cache_inv(void) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_set_cache_inv is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_set_cache_line_inv.c b/oshmem/shmem/c/shmem_set_cache_line_inv.c index 894782f8a9b..74c612ac6e9 100644 --- a/oshmem/shmem/c/shmem_set_cache_line_inv.c +++ b/oshmem/shmem/c/shmem_set_cache_line_inv.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,17 +20,17 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_set_cache_line_inv = pshmem_set_cache_line_inv -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_set_cache_line_inv(void *target) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_set_cache_line_inv is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_set_lock.c b/oshmem/shmem/c/shmem_set_lock.c index a4864ffe441..514cb2111c3 100644 --- a/oshmem/shmem/c/shmem_set_lock.c +++ b/oshmem/shmem/c/shmem_set_lock.c @@ -22,7 +22,7 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_set_lock = pshmem_set_lock -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_set_lock(volatile long *lock) diff --git a/oshmem/shmem/c/shmem_swap.c b/oshmem/shmem/c/shmem_swap.c index b6586d0934d..d118b22d5e8 100644 --- a/oshmem/shmem/c/shmem_swap.c +++ b/oshmem/shmem/c/shmem_swap.c @@ -70,6 +70,12 @@ #pragma weak shmem_ctx_ulonglong_atomic_swap = pshmem_ctx_ulonglong_atomic_swap #pragma weak shmem_ctx_float_atomic_swap = pshmem_ctx_float_atomic_swap #pragma weak shmem_ctx_double_atomic_swap = pshmem_ctx_double_atomic_swap +#pragma weak shmem_ctx_int32_atomic_swap = pshmem_ctx_int32_atomic_swap +#pragma weak shmem_ctx_int64_atomic_swap = pshmem_ctx_int64_atomic_swap +#pragma weak shmem_ctx_uint32_atomic_swap = pshmem_ctx_uint32_atomic_swap +#pragma weak shmem_ctx_uint64_atomic_swap = pshmem_ctx_uint64_atomic_swap +#pragma weak shmem_ctx_size_atomic_swap = pshmem_ctx_size_atomic_swap +#pragma weak shmem_ctx_ptrdiff_atomic_swap = pshmem_ctx_ptrdiff_atomic_swap #pragma weak shmem_int_atomic_swap = pshmem_int_atomic_swap #pragma weak shmem_long_atomic_swap = pshmem_long_atomic_swap @@ -79,6 +85,12 @@ #pragma weak shmem_ulonglong_atomic_swap = pshmem_ulonglong_atomic_swap #pragma weak shmem_float_atomic_swap = pshmem_float_atomic_swap #pragma weak shmem_double_atomic_swap = pshmem_double_atomic_swap +#pragma weak shmem_int32_atomic_swap = pshmem_int32_atomic_swap +#pragma weak shmem_int64_atomic_swap = pshmem_int64_atomic_swap +#pragma weak shmem_uint32_atomic_swap = pshmem_uint32_atomic_swap +#pragma weak shmem_uint64_atomic_swap = pshmem_uint64_atomic_swap +#pragma weak shmem_size_atomic_swap = pshmem_size_atomic_swap +#pragma weak shmem_ptrdiff_atomic_swap = pshmem_ptrdiff_atomic_swap #pragma weak shmem_int_swap = pshmem_int_swap #pragma weak shmem_long_swap = pshmem_long_swap @@ -88,7 +100,8 @@ #pragma weak shmemx_int32_swap = pshmemx_int32_swap #pragma weak shmemx_int64_swap = pshmemx_int64_swap -#include "oshmem/shmem/c/profile/defines.h" + +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_CTX_TYPE_ATOMIC_SWAP(_int, int, shmem) @@ -99,6 +112,12 @@ SHMEM_CTX_TYPE_ATOMIC_SWAP(_ulong, unsigned long, shmem) SHMEM_CTX_TYPE_ATOMIC_SWAP(_ulonglong, unsigned long long, shmem) SHMEM_CTX_TYPE_ATOMIC_SWAP(_float, float, shmem) SHMEM_CTX_TYPE_ATOMIC_SWAP(_double, double, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_ptrdiff, ptrdiff_t, shmem) SHMEM_TYPE_ATOMIC_SWAP(_int, int, shmem) SHMEM_TYPE_ATOMIC_SWAP(_long, long, shmem) SHMEM_TYPE_ATOMIC_SWAP(_longlong, long long, shmem) @@ -107,6 +126,12 @@ SHMEM_TYPE_ATOMIC_SWAP(_ulong, unsigned long, shmem) SHMEM_TYPE_ATOMIC_SWAP(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_ATOMIC_SWAP(_float, float, shmem) SHMEM_TYPE_ATOMIC_SWAP(_double, double, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_ptrdiff, ptrdiff_t, shmem) /* deprecated APIs */ #define SHMEM_TYPE_SWAP(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_swap_nb.c b/oshmem/shmem/c/shmem_swap_nb.c new file mode 100644 index 00000000000..a8d560ba6b6 --- /dev/null +++ b/oshmem/shmem/c/shmem_swap_nb.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/mca/atomic/atomic.h" + +/* + * It performs an atomic nonblocking swap operation. + * The atomic swap routines write value to address target on PE pe, and return the previous + * contents of target. The operation must be completed without the possibility of another + * process updating target between the time of the fetch and the update. + */ +#define DO_SHMEM_TYPE_ATOMIC_SWAP_NBI(ctx, type, fetch, target, value, pe, out_value) do { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(target); \ + \ + size = sizeof(out_value); \ + rc = MCA_ATOMIC_CALL(swap_nb( \ + ctx, \ + fetch, \ + (void*)target, \ + (void*)&out_value, \ + OSHMEM_ATOMIC_PTR_2_INT(&value, sizeof(value)), \ + size, \ + pe)); \ + RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(type_name, type, prefix) \ + void prefix##_ctx##type_name##_atomic_swap_nbi(shmem_ctx_t ctx, type *fetch, type *target, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_SWAP_NBI(ctx, type, fetch, target, value, pe, \ + out_value); \ + return ; \ + } + +#define SHMEM_TYPE_ATOMIC_SWAP_NBI(type_name, type, prefix) \ + void prefix##type_name##_atomic_swap_nbi(type *fetch, type *target, type value, int pe)\ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_SWAP_NBI(oshmem_ctx_default, type, fetch, target, \ + value, pe, out_value); \ + return ; \ + } + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +/* Nonblocking Atomic Swap */ +#pragma weak shmem_ctx_double_atomic_swap_nbi = pshmem_ctx_double_atomic_swap_nbi +#pragma weak shmem_ctx_float_atomic_swap_nbi = pshmem_ctx_float_atomic_swap_nbi +#pragma weak shmem_ctx_int_atomic_swap_nbi = pshmem_ctx_int_atomic_swap_nbi +#pragma weak shmem_ctx_long_atomic_swap_nbi = pshmem_ctx_long_atomic_swap_nbi +#pragma weak shmem_ctx_longlong_atomic_swap_nbi = pshmem_ctx_longlong_atomic_swap_nbi +#pragma weak shmem_ctx_uint_atomic_swap_nbi = pshmem_ctx_uint_atomic_swap_nbi +#pragma weak shmem_ctx_ulong_atomic_swap_nbi = pshmem_ctx_ulong_atomic_swap_nbi +#pragma weak shmem_ctx_ulonglong_atomic_swap_nbi = pshmem_ctx_ulonglong_atomic_swap_nbi +#pragma weak shmem_ctx_int32_atomic_swap_nbi = pshmem_ctx_int32_atomic_swap_nbi +#pragma weak shmem_ctx_int64_atomic_swap_nbi = pshmem_ctx_int64_atomic_swap_nbi +#pragma weak shmem_ctx_uint32_atomic_swap_nbi = pshmem_ctx_uint32_atomic_swap_nbi +#pragma weak shmem_ctx_uint64_atomic_swap_nbi = pshmem_ctx_uint64_atomic_swap_nbi +#pragma weak shmem_ctx_size_atomic_swap_nbi = pshmem_ctx_size_atomic_swap_nbi +#pragma weak shmem_ctx_ptrdiff_atomic_swap_nbi = pshmem_ctx_ptrdiff_atomic_swap_nbi + +#pragma weak shmem_double_atomic_swap_nbi = pshmem_double_atomic_swap_nbi +#pragma weak shmem_float_atomic_swap_nbi = pshmem_float_atomic_swap_nbi +#pragma weak shmem_int_atomic_swap_nbi = pshmem_int_atomic_swap_nbi +#pragma weak shmem_long_atomic_swap_nbi = pshmem_long_atomic_swap_nbi +#pragma weak shmem_longlong_atomic_swap_nbi = pshmem_longlong_atomic_swap_nbi +#pragma weak shmem_uint_atomic_swap_nbi = pshmem_uint_atomic_swap_nbi +#pragma weak shmem_ulong_atomic_swap_nbi = pshmem_ulong_atomic_swap_nbi +#pragma weak shmem_ulonglong_atomic_swap_nbi = pshmem_ulonglong_atomic_swap_nbi +#pragma weak shmem_int32_atomic_swap_nbi = pshmem_int32_atomic_swap_nbi +#pragma weak shmem_int64_atomic_swap_nbi = pshmem_int64_atomic_swap_nbi +#pragma weak shmem_uint32_atomic_swap_nbi = pshmem_uint32_atomic_swap_nbi +#pragma weak shmem_uint64_atomic_swap_nbi = pshmem_uint64_atomic_swap_nbi +#pragma weak shmem_size_atomic_swap_nbi = pshmem_size_atomic_swap_nbi +#pragma weak shmem_ptrdiff_atomic_swap_nbi = pshmem_ptrdiff_atomic_swap_nbi + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_float, float, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_double, double, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_size, size_t, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP_NBI(_ptrdiff, ptrdiff_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_int, int, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_long, long, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_ulonglong, unsigned long long, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_float, float, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_double, double, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_int32, int32_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_int64, int64_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_uint32, uint32_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_uint64, uint64_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_size, size_t, shmem) +SHMEM_TYPE_ATOMIC_SWAP_NBI(_ptrdiff, ptrdiff_t, shmem) diff --git a/oshmem/shmem/c/shmem_sync.c b/oshmem/shmem/c/shmem_sync.c index 9f2b983aa19..1cd7266fc3a 100644 --- a/oshmem/shmem/c/shmem_sync.c +++ b/oshmem/shmem/c/shmem_sync.c @@ -22,12 +22,12 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_sync = pshmem_sync #pragma weak shmem_sync_all = pshmem_sync_all -#include "oshmem/shmem/c/profile/defines.h" +#pragma weak shmem_sync_deprecated = pshmem_sync_deprecated +#include "oshmem/shmem/c/profile-defines.h" #endif -void shmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync) +void shmem_sync_deprecated(int PE_start, int logPE_stride, int PE_size, long *pSync) { int rc; oshmem_group_t* group; diff --git a/oshmem/shmem/c/shmem_team.c b/oshmem/shmem/c/shmem_team.c new file mode 100644 index 00000000000..7004080f869 --- /dev/null +++ b/oshmem/shmem/c/shmem_team.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/mca/spml/spml.h" + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +/* + * Team management routines + */ +#pragma weak shmem_team_sync = pshmem_team_sync +#pragma weak shmem_team_my_pe = pshmem_team_my_pe +#pragma weak shmem_team_n_pes = pshmem_team_n_pes +#pragma weak shmem_team_get_config = pshmem_team_get_config +#pragma weak shmem_team_translate_pe = pshmem_team_translate_pe +#pragma weak shmem_team_split_strided = pshmem_team_split_strided +#pragma weak shmem_team_split_2d = pshmem_team_split_2d +#pragma weak shmem_team_destroy = pshmem_team_destroy +#pragma weak shmem_ctx_get_team = pshmem_ctx_get_team +#pragma weak shmem_team_create_ctx = pshmem_team_create_ctx + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +void shmem_team_sync(shmem_team_t team) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_sync(team)); + RUNTIME_CHECK_IMPL_RC(rc); + + return ; +} + +int shmem_team_my_pe(shmem_team_t team) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_my_pe(team)); + RUNTIME_CHECK_IMPL_RC(rc); + + return rc; +} + +int shmem_team_n_pes(shmem_team_t team) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_n_pes(team)); + RUNTIME_CHECK_IMPL_RC(rc); + + return rc; +} +int shmem_team_get_config(shmem_team_t team, long config_mask, shmem_team_config_t *config) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_get_config(team, config_mask, config)); + RUNTIME_CHECK_RC(rc); + + return rc; +} +int shmem_team_translate_pe(shmem_team_t src_team, int src_pe, shmem_team_t dest_team) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_translate_pe(src_team, src_pe, dest_team)); + RUNTIME_CHECK_IMPL_RC(rc); + + return rc; +} +int shmem_team_split_strided (shmem_team_t parent_team, int start, int stride, + int size, const shmem_team_config_t *config, long config_mask, + shmem_team_t *new_team) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_split_strided(parent_team, start, stride, size, + config, config_mask, new_team)); + RUNTIME_CHECK_RC(rc); + + return rc; +} + +int shmem_team_split_2d (shmem_team_t parent_team, int xrange, const + shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t + *xaxis_team, const shmem_team_config_t *yaxis_config, long yaxis_mask, + shmem_team_t *yaxis_team) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_split_2d(parent_team, xrange, xaxis_config, + xaxis_mask, xaxis_team, yaxis_config, yaxis_mask, yaxis_team)); + RUNTIME_CHECK_RC(rc); + + return rc; +} + +void shmem_team_destroy(shmem_team_t team) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_destroy(team)); + RUNTIME_CHECK_RC(rc); + + return ; +} + +int shmem_ctx_get_team(shmem_ctx_t ctx, shmem_team_t *team) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_get(ctx, team)); + RUNTIME_CHECK_RC(rc); + + return rc; +} + +int shmem_team_create_ctx(shmem_team_t team, long options, shmem_ctx_t *ctx) +{ + int rc = 0; + + RUNTIME_CHECK_INIT(); + + rc = MCA_SPML_CALL(team_create_ctx(team, options, ctx)); + RUNTIME_CHECK_RC(rc); + + return rc; +} diff --git a/oshmem/shmem/c/shmem_test_ivars.c b/oshmem/shmem/c/shmem_test_ivars.c new file mode 100644 index 00000000000..b5ef86b18dc --- /dev/null +++ b/oshmem/shmem/c/shmem_test_ivars.c @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/mca/spml/spml.h" + + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +#pragma weak shmem_short_test_all = pshmem_short_test_all +#pragma weak shmem_ushort_test_all = pshmem_ushort_test_all +#pragma weak shmem_int_test_all = pshmem_int_test_all +#pragma weak shmem_long_test_all = pshmem_long_test_all +#pragma weak shmem_longlong_test_all = pshmem_longlong_test_all +#pragma weak shmem_uint_test_all = pshmem_uint_test_all +#pragma weak shmem_ulong_test_all = pshmem_ulong_test_all +#pragma weak shmem_ulonglong_test_all = pshmem_ulonglong_test_all +#pragma weak shmem_int32_test_all = pshmem_int32_test_all +#pragma weak shmem_int64_test_all = pshmem_int64_test_all +#pragma weak shmem_uint32_test_all = pshmem_uint32_test_all +#pragma weak shmem_uint64_test_all = pshmem_uint64_test_all +#pragma weak shmem_size_test_all = pshmem_size_test_all +#pragma weak shmem_ptrdiff_test_all = pshmem_ptrdiff_test_all + + +#pragma weak shmem_short_test_any = pshmem_short_test_any +#pragma weak shmem_ushort_test_any = pshmem_ushort_test_any +#pragma weak shmem_int_test_any = pshmem_int_test_any +#pragma weak shmem_long_test_any = pshmem_long_test_any +#pragma weak shmem_longlong_test_any = pshmem_longlong_test_any +#pragma weak shmem_uint_test_any = pshmem_uint_test_any +#pragma weak shmem_ulong_test_any = pshmem_ulong_test_any +#pragma weak shmem_ulonglong_test_any = pshmem_ulonglong_test_any +#pragma weak shmem_int32_test_any = pshmem_int32_test_any +#pragma weak shmem_int64_test_any = pshmem_int64_test_any +#pragma weak shmem_uint32_test_any = pshmem_uint32_test_any +#pragma weak shmem_uint64_test_any = pshmem_uint64_test_any +#pragma weak shmem_size_test_any = pshmem_size_test_any +#pragma weak shmem_ptrdiff_test_any = pshmem_ptrdiff_test_any + +#pragma weak shmem_short_test_some = pshmem_short_test_some +#pragma weak shmem_ushort_test_some = pshmem_ushort_test_some +#pragma weak shmem_int_test_some = pshmem_int_test_some +#pragma weak shmem_long_test_some = pshmem_long_test_some +#pragma weak shmem_longlong_test_some = pshmem_longlong_test_some +#pragma weak shmem_uint_test_some = pshmem_uint_test_some +#pragma weak shmem_ulong_test_some = pshmem_ulong_test_some +#pragma weak shmem_ulonglong_test_some = pshmem_ulonglong_test_some +#pragma weak shmem_int32_test_some = pshmem_int32_test_some +#pragma weak shmem_int64_test_some = pshmem_int64_test_some +#pragma weak shmem_uint32_test_some = pshmem_uint32_test_some +#pragma weak shmem_uint64_test_some = pshmem_uint64_test_some +#pragma weak shmem_size_test_some = pshmem_size_test_some +#pragma weak shmem_ptrdiff_test_some = pshmem_ptrdiff_test_some + + + +#pragma weak shmem_short_test_all_vector = pshmem_short_test_all_vector +#pragma weak shmem_ushort_test_all_vector = pshmem_ushort_test_all_vector +#pragma weak shmem_int_test_all_vector = pshmem_int_test_all_vector +#pragma weak shmem_long_test_all_vector = pshmem_long_test_all_vector +#pragma weak shmem_longlong_test_all_vector = pshmem_longlong_test_all_vector +#pragma weak shmem_uint_test_all_vector = pshmem_uint_test_all_vector +#pragma weak shmem_ulong_test_all_vector = pshmem_ulong_test_all_vector +#pragma weak shmem_ulonglong_test_all_vector = pshmem_ulonglong_test_all_vector +#pragma weak shmem_int32_test_all_vector = pshmem_int32_test_all_vector +#pragma weak shmem_int64_test_all_vector = pshmem_int64_test_all_vector +#pragma weak shmem_uint32_test_all_vector = pshmem_uint32_test_all_vector +#pragma weak shmem_uint64_test_all_vector = pshmem_uint64_test_all_vector +#pragma weak shmem_size_test_all_vector = pshmem_size_test_all_vector +#pragma weak shmem_ptrdiff_test_all_vector = pshmem_ptrdiff_test_all_vector + + +#pragma weak shmem_short_test_any_vector = pshmem_short_test_any_vector +#pragma weak shmem_ushort_test_any_vector = pshmem_ushort_test_any_vector +#pragma weak shmem_int_test_any_vector = pshmem_int_test_any_vector +#pragma weak shmem_long_test_any_vector = pshmem_long_test_any_vector +#pragma weak shmem_longlong_test_any_vector = pshmem_longlong_test_any_vector +#pragma weak shmem_uint_test_any_vector = pshmem_uint_test_any_vector +#pragma weak shmem_ulong_test_any_vector = pshmem_ulong_test_any_vector +#pragma weak shmem_ulonglong_test_any_vector = pshmem_ulonglong_test_any_vector +#pragma weak shmem_int32_test_any_vector = pshmem_int32_test_any_vector +#pragma weak shmem_int64_test_any_vector = pshmem_int64_test_any_vector +#pragma weak shmem_uint32_test_any_vector = pshmem_uint32_test_any_vector +#pragma weak shmem_uint64_test_any_vector = pshmem_uint64_test_any_vector +#pragma weak shmem_size_test_any_vector = pshmem_size_test_any_vector +#pragma weak shmem_ptrdiff_test_any_vector = pshmem_ptrdiff_test_any_vector + + +#pragma weak shmem_short_test_some_vector = pshmem_short_test_some_vector +#pragma weak shmem_ushort_test_some_vector = pshmem_ushort_test_some_vector +#pragma weak shmem_int_test_some_vector = pshmem_int_test_some_vector +#pragma weak shmem_long_test_some_vector = pshmem_long_test_some_vector +#pragma weak shmem_longlong_test_some_vector = pshmem_longlong_test_some_vector +#pragma weak shmem_uint_test_some_vector = pshmem_uint_test_some_vector +#pragma weak shmem_ulong_test_some_vector = pshmem_ulong_test_some_vector +#pragma weak shmem_ulonglong_test_some_vector = pshmem_ulonglong_test_some_vector +#pragma weak shmem_int32_test_some_vector = pshmem_int32_test_some_vector +#pragma weak shmem_int64_test_some_vector = pshmem_int64_test_some_vector +#pragma weak shmem_uint32_test_some_vector = pshmem_uint32_test_some_vector +#pragma weak shmem_uint64_test_some_vector = pshmem_uint64_test_some_vector +#pragma weak shmem_size_test_some_vector = pshmem_size_test_some_vector +#pragma weak shmem_ptrdiff_test_some_vector = pshmem_ptrdiff_test_some_vector + + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +#define SHMEM_TYPE_TEST_ALL(type_name, type, code, prefix) \ + int prefix##type_name##_test_all(volatile type *ivars, size_t nelems, const int *status, int cmp, type value) \ + { \ + int rc = OSHMEM_SUCCESS; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(test_all( \ + (void*)ivars, \ + cmp, \ + (void*)&value, \ + nelems, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + + +#define SHMEM_TYPE_TEST_ANY(type_name, type, code, prefix) \ + size_t prefix##type_name##_test_any(volatile type *ivars, size_t nelems, const int *status, int cmp, type value) \ + { \ + size_t rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(test_any( \ + (void*)ivars, \ + cmp, \ + (void*)&value, \ + nelems, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + + +#define SHMEM_TYPE_TEST_SOME(type_name, type, code, prefix) \ + size_t prefix##type_name##_test_some(volatile type *ivars, size_t nelems, size_t *indices, const int *status, int cmp, type value) \ + { \ + size_t rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(test_some( \ + (void*)ivars, \ + cmp, \ + (void*)&value, \ + nelems, indices, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + +#define SHMEM_TYPE_TEST_ANY_VECTOR(type_name, type, code, prefix) \ + size_t prefix##type_name##_test_any_vector(volatile type *ivars, size_t nelems, const int *status, int cmp, type *values) \ + { \ + size_t rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(test_any_vector( \ + (void*)ivars, \ + cmp, \ + (void*)values, \ + nelems, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + +#define SHMEM_TYPE_TEST_SOME_VECTOR(type_name, type, code, prefix) \ + size_t prefix##type_name##_test_some_vector(volatile type *ivars, size_t nelems, size_t *indices, const int *status, int cmp, type *values) \ + { \ + size_t rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(test_some_vector( \ + (void*)ivars, \ + cmp, \ + (void*)values, \ + nelems, indices, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + + +#define SHMEM_TYPE_TEST_ALL_VECTOR(type_name, type, code, prefix) \ + int prefix##type_name##_test_all_vector(volatile type *ivars, size_t nelems, const int *status, int cmp, type *values) \ + { \ + int rc = OSHMEM_SUCCESS; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(test_all_vector( \ + (void*)ivars, \ + cmp, \ + (void*)values, \ + nelems, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + + + +SHMEM_TYPE_TEST_ALL(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_TEST_ALL(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_TEST_ALL(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_TEST_ALL(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_TEST_ALL(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_TEST_ALL(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_TEST_ALL(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_TEST_ALL(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_TEST_ALL(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_TEST_ALL(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_TEST_ALL(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_TEST_ALL(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_TEST_ALL(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_TEST_ALL(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + +SHMEM_TYPE_TEST_ANY(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_TEST_ANY(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_TEST_ANY(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_TEST_ANY(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_TEST_ANY(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_TEST_ANY(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_TEST_ANY(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_TEST_ANY(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_TEST_ANY(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_TEST_ANY(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_TEST_ANY(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_TEST_ANY(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_TEST_ANY(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_TEST_ANY(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + +SHMEM_TYPE_TEST_SOME(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_TEST_SOME(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_TEST_SOME(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_TEST_SOME(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_TEST_SOME(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_TEST_SOME(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_TEST_SOME(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_TEST_SOME(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_TEST_SOME(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_TEST_SOME(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_TEST_SOME(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_TEST_SOME(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_TEST_SOME(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_TEST_SOME(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + +SHMEM_TYPE_TEST_ALL_VECTOR(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_TEST_ALL_VECTOR(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + + +SHMEM_TYPE_TEST_ANY_VECTOR(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_TEST_ANY_VECTOR(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + + +SHMEM_TYPE_TEST_SOME_VECTOR(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_TEST_SOME_VECTOR(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + diff --git a/oshmem/shmem/c/shmem_test_lock.c b/oshmem/shmem/c/shmem_test_lock.c index 0d587da3969..217b9afde02 100644 --- a/oshmem/shmem/c/shmem_test_lock.c +++ b/oshmem/shmem/c/shmem_test_lock.c @@ -23,7 +23,7 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_test_lock = pshmem_test_lock -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif int shmem_test_lock(volatile long *lock) diff --git a/oshmem/shmem/c/shmem_udcflush.c b/oshmem/shmem/c/shmem_udcflush.c index eb0fa227434..a60a7f54411 100644 --- a/oshmem/shmem/c/shmem_udcflush.c +++ b/oshmem/shmem/c/shmem_udcflush.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,17 +19,17 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_udcflush = pshmem_udcflush -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_udcflush(void) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_udcflush is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_udcflush_line.c b/oshmem/shmem/c/shmem_udcflush_line.c index e6302e93db8..7603a93bd77 100644 --- a/oshmem/shmem/c/shmem_udcflush_line.c +++ b/oshmem/shmem/c/shmem_udcflush_line.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,17 +18,17 @@ #include "oshmem/include/shmem.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_udcflush_line = pshmem_udcflush_line -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif void shmem_udcflush_line(void *target) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_udcflush_line is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_wait.c b/oshmem/shmem/c/shmem_wait.c index 32d0f53c4ba..ac9936e8ac0 100644 --- a/oshmem/shmem/c/shmem_wait.c +++ b/oshmem/shmem/c/shmem_wait.c @@ -68,21 +68,23 @@ #pragma weak shmemx_int32_wait_until = pshmemx_int32_wait_until #pragma weak shmemx_int64_wait_until = pshmemx_int64_wait_until -#pragma weak shmem_short_test = pshmem_short_test -#pragma weak shmem_int_test = pshmem_int_test -#pragma weak shmem_long_test = pshmem_long_test -#pragma weak shmem_longlong_test = pshmem_longlong_test -#pragma weak shmem_ushort_test = pshmem_ushort_test -#pragma weak shmem_uint_test = pshmem_uint_test -#pragma weak shmem_ulong_test = pshmem_ulong_test -#pragma weak shmem_ulonglong_test = pshmem_ulonglong_test -#pragma weak shmem_int32_test = pshmem_int32_test -#pragma weak shmem_int64_test = pshmem_int64_test -#pragma weak shmem_uint32_test = pshmem_uint32_test -#pragma weak shmem_uint64_test = pshmem_uint64_test -#pragma weak shmem_size_test = pshmem_size_test -#pragma weak shmem_ptrdiff_test = pshmem_ptrdiff_test -#include "oshmem/shmem/c/profile/defines.h" +#pragma weak shmem_short_test = pshmem_short_test +#pragma weak shmem_int_test = pshmem_int_test +#pragma weak shmem_long_test = pshmem_long_test +#pragma weak shmem_longlong_test = pshmem_longlong_test +#pragma weak shmem_ushort_test = pshmem_ushort_test +#pragma weak shmem_uint_test = pshmem_uint_test +#pragma weak shmem_ulong_test = pshmem_ulong_test +#pragma weak shmem_ulonglong_test = pshmem_ulonglong_test +#pragma weak shmem_int32_test = pshmem_int32_test +#pragma weak shmem_int64_test = pshmem_int64_test +#pragma weak shmem_uint32_test = pshmem_uint32_test +#pragma weak shmem_uint64_test = pshmem_uint64_test +#pragma weak shmem_size_test = pshmem_size_test +#pragma weak shmem_ptrdiff_test = pshmem_ptrdiff_test + + +#include "oshmem/shmem/c/profile-defines.h" #endif SHMEM_TYPE_WAIT(, volatile long, SHMEM_LONG, shmem) diff --git a/oshmem/shmem/c/shmem_wait_ivars.c b/oshmem/shmem/c/shmem_wait_ivars.c new file mode 100644 index 00000000000..e3d0ef6448f --- /dev/null +++ b/oshmem/shmem/c/shmem_wait_ivars.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. + * All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/mca/spml/spml.h" + + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" + +#pragma weak shmem_short_wait_until_all = pshmem_short_wait_until_all +#pragma weak shmem_ushort_wait_until_all = pshmem_ushort_wait_until_all +#pragma weak shmem_int_wait_until_all = pshmem_int_wait_until_all +#pragma weak shmem_long_wait_until_all = pshmem_long_wait_until_all +#pragma weak shmem_longlong_wait_until_all = pshmem_longlong_wait_until_all +#pragma weak shmem_uint_wait_until_all = pshmem_uint_wait_until_all +#pragma weak shmem_ulong_wait_until_all = pshmem_ulong_wait_until_all +#pragma weak shmem_ulonglong_wait_until_all = pshmem_ulonglong_wait_until_all +#pragma weak shmem_int32_wait_until_all = pshmem_int32_wait_until_all +#pragma weak shmem_int64_wait_until_all = pshmem_int64_wait_until_all +#pragma weak shmem_uint32_wait_until_all = pshmem_uint32_wait_until_all +#pragma weak shmem_uint64_wait_until_all = pshmem_uint64_wait_until_all +#pragma weak shmem_size_wait_until_all = pshmem_size_wait_until_all +#pragma weak shmem_ptrdiff_wait_until_all = pshmem_ptrdiff_wait_until_all + + +#pragma weak shmem_short_wait_until_any = pshmem_short_wait_until_any +#pragma weak shmem_ushort_wait_until_any = pshmem_ushort_wait_until_any +#pragma weak shmem_int_wait_until_any = pshmem_int_wait_until_any +#pragma weak shmem_long_wait_until_any = pshmem_long_wait_until_any +#pragma weak shmem_longlong_wait_until_any = pshmem_longlong_wait_until_any +#pragma weak shmem_uint_wait_until_any = pshmem_uint_wait_until_any +#pragma weak shmem_ulong_wait_until_any = pshmem_ulong_wait_until_any +#pragma weak shmem_ulonglong_wait_until_any = pshmem_ulonglong_wait_until_any +#pragma weak shmem_int32_wait_until_any = pshmem_int32_wait_until_any +#pragma weak shmem_int64_wait_until_any = pshmem_int64_wait_until_any +#pragma weak shmem_uint32_wait_until_any = pshmem_uint32_wait_until_any +#pragma weak shmem_uint64_wait_until_any = pshmem_uint64_wait_until_any +#pragma weak shmem_size_wait_until_any = pshmem_size_wait_until_any +#pragma weak shmem_ptrdiff_wait_until_any = pshmem_ptrdiff_wait_until_any + + +#pragma weak shmem_short_wait_until_some = pshmem_short_wait_until_some +#pragma weak shmem_ushort_wait_until_some = pshmem_ushort_wait_until_some +#pragma weak shmem_int_wait_until_some = pshmem_int_wait_until_some +#pragma weak shmem_long_wait_until_some = pshmem_long_wait_until_some +#pragma weak shmem_longlong_wait_until_some = pshmem_longlong_wait_until_some +#pragma weak shmem_uint_wait_until_some = pshmem_uint_wait_until_some +#pragma weak shmem_ulong_wait_until_some = pshmem_ulong_wait_until_some +#pragma weak shmem_ulonglong_wait_until_some = pshmem_ulonglong_wait_until_some +#pragma weak shmem_int32_wait_until_some = pshmem_int32_wait_until_some +#pragma weak shmem_int64_wait_until_some = pshmem_int64_wait_until_some +#pragma weak shmem_uint32_wait_until_some = pshmem_uint32_wait_until_some +#pragma weak shmem_uint64_wait_until_some = pshmem_uint64_wait_until_some +#pragma weak shmem_size_wait_until_some = pshmem_size_wait_until_some +#pragma weak shmem_ptrdiff_wait_until_some = pshmem_ptrdiff_wait_until_some + + +#pragma weak shmem_short_wait_until_all_vector = pshmem_short_wait_until_all_vector +#pragma weak shmem_ushort_wait_until_all_vector = pshmem_ushort_wait_until_all_vector +#pragma weak shmem_int_wait_until_all_vector = pshmem_int_wait_until_all_vector +#pragma weak shmem_long_wait_until_all_vector = pshmem_long_wait_until_all_vector +#pragma weak shmem_longlong_wait_until_all_vector = pshmem_longlong_wait_until_all_vector +#pragma weak shmem_uint_wait_until_all_vector = pshmem_uint_wait_until_all_vector +#pragma weak shmem_ulong_wait_until_all_vector = pshmem_ulong_wait_until_all_vector +#pragma weak shmem_ulonglong_wait_until_all_vector = pshmem_ulonglong_wait_until_all_vector +#pragma weak shmem_int32_wait_until_all_vector = pshmem_int32_wait_until_all_vector +#pragma weak shmem_int64_wait_until_all_vector = pshmem_int64_wait_until_all_vector +#pragma weak shmem_uint32_wait_until_all_vector = pshmem_uint32_wait_until_all_vector +#pragma weak shmem_uint64_wait_until_all_vector = pshmem_uint64_wait_until_all_vector +#pragma weak shmem_size_wait_until_all_vector = pshmem_size_wait_until_all_vector +#pragma weak shmem_ptrdiff_wait_until_all_vector = pshmem_ptrdiff_wait_until_all_vector + + +#pragma weak shmem_short_wait_until_any_vector = pshmem_short_wait_until_any_vector +#pragma weak shmem_ushort_wait_until_any_vector = pshmem_ushort_wait_until_any_vector +#pragma weak shmem_int_wait_until_any_vector = pshmem_int_wait_until_any_vector +#pragma weak shmem_long_wait_until_any_vector = pshmem_long_wait_until_any_vector +#pragma weak shmem_longlong_wait_until_any_vector = pshmem_longlong_wait_until_any_vector +#pragma weak shmem_uint_wait_until_any_vector = pshmem_uint_wait_until_any_vector +#pragma weak shmem_ulong_wait_until_any_vector = pshmem_ulong_wait_until_any_vector +#pragma weak shmem_ulonglong_wait_until_any_vector = pshmem_ulonglong_wait_until_any_vector +#pragma weak shmem_int32_wait_until_any_vector = pshmem_int32_wait_until_any_vector +#pragma weak shmem_int64_wait_until_any_vector = pshmem_int64_wait_until_any_vector +#pragma weak shmem_uint32_wait_until_any_vector = pshmem_uint32_wait_until_any_vector +#pragma weak shmem_uint64_wait_until_any_vector = pshmem_uint64_wait_until_any_vector +#pragma weak shmem_size_wait_until_any_vector = pshmem_size_wait_until_any_vector +#pragma weak shmem_ptrdiff_wait_until_any_vector = pshmem_ptrdiff_wait_until_any_vector + + +#pragma weak shmem_short_wait_until_some_vector = pshmem_short_wait_until_some_vector +#pragma weak shmem_ushort_wait_until_some_vector = pshmem_ushort_wait_until_some_vector +#pragma weak shmem_int_wait_until_some_vector = pshmem_int_wait_until_some_vector +#pragma weak shmem_long_wait_until_some_vector = pshmem_long_wait_until_some_vector +#pragma weak shmem_longlong_wait_until_some_vector = pshmem_longlong_wait_until_some_vector +#pragma weak shmem_uint_wait_until_some_vector = pshmem_uint_wait_until_some_vector +#pragma weak shmem_ulong_wait_until_some_vector = pshmem_ulong_wait_until_some_vector +#pragma weak shmem_ulonglong_wait_until_some_vector = pshmem_ulonglong_wait_until_some_vector +#pragma weak shmem_int32_wait_until_some_vector = pshmem_int32_wait_until_some_vector +#pragma weak shmem_int64_wait_until_some_vector = pshmem_int64_wait_until_some_vector +#pragma weak shmem_uint32_wait_until_some_vector = pshmem_uint32_wait_until_some_vector +#pragma weak shmem_uint64_wait_until_some_vector = pshmem_uint64_wait_until_some_vector +#pragma weak shmem_size_wait_until_some_vector = pshmem_size_wait_until_some_vector +#pragma weak shmem_ptrdiff_wait_until_some_vector = pshmem_ptrdiff_wait_until_some_vector + + +#include "oshmem/shmem/c/profile-defines.h" +#endif + +#define SHMEM_TYPE_WAIT_UNTIL_ALL(type_name, type, code, prefix) \ + void prefix##type_name##_wait_until_all(volatile type *ivars, size_t nelems, const int *status, int cmp, type value) \ + { \ + \ + RUNTIME_CHECK_INIT(); \ + \ + MCA_SPML_CALL(wait_until_all( \ + (void*)ivars, \ + cmp, \ + (void*)&value, \ + nelems, status, code)); \ + \ + return ; \ + } + + +#define SHMEM_TYPE_WAIT_UNTIL_ANY(type_name, type, code, prefix) \ + size_t prefix##type_name##_wait_until_any(volatile type *ivars, size_t nelems, const int *status, int cmp, type value) \ + { \ + size_t rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(wait_until_any( \ + (void*)ivars, \ + cmp, \ + (void*)&value, \ + nelems, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + + +#define SHMEM_TYPE_WAIT_UNTIL_SOME(type_name, type, code, prefix) \ + size_t prefix##type_name##_wait_until_some(volatile type *ivars, size_t nelems, size_t *indices, const int *status, int cmp, type value) \ + { \ + size_t rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(wait_until_some( \ + (void*)ivars, \ + cmp, \ + (void*)&value, \ + nelems, indices, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + +#define SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(type_name, type, code, prefix) \ + size_t prefix##type_name##_wait_until_any_vector(volatile type *ivars, size_t nelems, const int *status, int cmp, type *values) \ + { \ + size_t rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(wait_until_any_vector( \ + (void*)ivars, \ + cmp, \ + (void*)values, \ + nelems, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + +#define SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(type_name, type, code, prefix) \ + size_t prefix##type_name##_wait_until_some_vector(volatile type *ivars, size_t nelems, size_t *indices, const int *status, int cmp, type *values) \ + { \ + size_t rc = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(wait_until_some_vector( \ + (void*)ivars, \ + cmp, \ + (void*)values, \ + nelems, indices, status, code)); \ + RUNTIME_CHECK_IMPL_RC(rc); \ + \ + return rc; \ + } + + +#define SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(type_name, type, code, prefix) \ + void prefix##type_name##_wait_until_all_vector(volatile type *ivars, size_t nelems, const int *status, int cmp, type *values) \ + { \ + int rc = OSHMEM_SUCCESS; \ + \ + \ + MCA_SPML_CALL(wait_until_all_vector( \ + (void*)ivars, \ + cmp, \ + (void*)values, \ + nelems, status, code)); \ + \ + return ; \ + } + + + +SHMEM_TYPE_WAIT_UNTIL_ALL(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + +SHMEM_TYPE_WAIT_UNTIL_ANY(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + +SHMEM_TYPE_WAIT_UNTIL_SOME(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ALL_VECTOR(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + + +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_ANY_VECTOR(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + + +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_short, short, SHMEM_SHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_ushort, unsigned short, SHMEM_USHORT, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_int, int, SHMEM_INT, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_long, long, SHMEM_LONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_longlong, long long, SHMEM_LLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_uint, unsigned int, SHMEM_UINT, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_ulong, unsigned long, SHMEM_ULONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_ulonglong, unsigned long long, SHMEM_ULLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_int32, int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_int64, int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_uint32, uint32_t, SHMEM_UINT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_uint64, uint64_t, SHMEM_UINT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_size, size_t, SHMEM_SIZE_T, shmem) +SHMEM_TYPE_WAIT_UNTIL_SOME_VECTOR(_ptrdiff, ptrdiff_t, SHMEM_PTRDIFF_T, shmem) + + diff --git a/oshmem/shmem/c/shmem_xor.c b/oshmem/shmem/c/shmem_xor.c index b2209ce6648..26a22083e50 100644 --- a/oshmem/shmem/c/shmem_xor.c +++ b/oshmem/shmem/c/shmem_xor.c @@ -51,7 +51,7 @@ #pragma weak shmemx_int64_atomic_xor = pshmemx_int64_atomic_xor #pragma weak shmemx_uint32_atomic_xor = pshmemx_uint32_atomic_xor #pragma weak shmemx_uint64_atomic_xor = pshmemx_uint64_atomic_xor -#include "oshmem/shmem/c/profile/defines.h" +#include "oshmem/shmem/c/profile-defines.h" #endif OSHMEM_TYPE_OP(int, int, shmem, xor) diff --git a/oshmem/shmem/fortran/Makefile.am b/oshmem/shmem/fortran/Makefile.am index 25610758f9f..83dcc0b996f 100644 --- a/oshmem/shmem/fortran/Makefile.am +++ b/oshmem/shmem/fortran/Makefile.am @@ -4,6 +4,8 @@ # Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2021 Amazon.com, Inc. or its affiliates. +# All Rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -11,30 +13,28 @@ # $HEADER$ # -# This is guaranteed to be false if we're not building OSHMEM at all -# -if OSHMEM_BUILD_FORTRAN_BINDINGS -if OSHMEM_PROFILING - SUBDIRS = profile -endif -endif - if OSHMEM_BUILD_FORTRAN_BINDINGS -oshmem_fortran_lib = liboshmem_fortran.la +# note that liboshmem_fortran_profile.la is built regardless of +# whether or not the profiling layer is built. It holds all the api +# functions that might have a profiling version. +oshmem_fortran_libs = liboshmem_fortran.la liboshmem_fortran_profile.la else -oshmem_fortran_lib = +oshmem_fortran_libs = endif -noinst_LTLIBRARIES = $(oshmem_fortran_lib) +noinst_LTLIBRARIES = $(oshmem_fortran_libs) headers = prototypes_shmem.h \ + prototypes_pshmem.h \ bindings.h \ + pbindings.h \ + profile-defines.h \ shmem_fortran_pointer.h liboshmem_fortran_la_SOURCES = shmem_finalize_f.c +liboshmem_fortran_la_LIBADD = liboshmem_fortran_profile.la -if ! OSHMEM_PROFILING -liboshmem_fortran_la_SOURCES += \ +liboshmem_fortran_profile_la_SOURCES = \ shmem_init_f.c \ shmem_global_exit_f.c \ shmem_n_pes_f.c \ @@ -138,7 +138,10 @@ liboshmem_fortran_la_SOURCES += \ shmem_fence_f.c \ shmem_info_f.c -AM_CPPFLAGS = -DOSHMEM_PROFILING=0 +if OSHMEM_PROFILING +liboshmem_fortran_profile_la_CPPFLAGS = -DOSHMEM_PROFILING=1 +else +liboshmem_fortran_profile_la_CPPFLAGS = -DOSHMEM_PROFILING=0 endif if PROJECT_OSHMEM diff --git a/oshmem/shmem/fortran/profile/pbindings.h b/oshmem/shmem/fortran/pbindings.h similarity index 100% rename from oshmem/shmem/fortran/profile/pbindings.h rename to oshmem/shmem/fortran/pbindings.h diff --git a/oshmem/shmem/fortran/profile/defines.h b/oshmem/shmem/fortran/profile-defines.h similarity index 100% rename from oshmem/shmem/fortran/profile/defines.h rename to oshmem/shmem/fortran/profile-defines.h diff --git a/oshmem/shmem/fortran/profile/Makefile.am b/oshmem/shmem/fortran/profile/Makefile.am deleted file mode 100644 index 546d56eac65..00000000000 --- a/oshmem/shmem/fortran/profile/Makefile.am +++ /dev/null @@ -1,159 +0,0 @@ -# -# Copyright (c) 2013 Mellanox Technologies, Inc. -# All rights reserved -# Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -AM_CPPFLAGS = -DOSHMEM_PROFILING=1 - -# This is guaranteed to be false if we're not building OSHMEM at all -if OSHMEM_BUILD_FORTRAN_BINDINGS -pshmem_fortran_lib = liboshmem_fortran_pshmem.la -else -pshmem_fortran_lib = -endif - -noinst_LTLIBRARIES = $(pshmem_fortran_lib) - -headers = prototypes_pshmem.h \ - pbindings.h \ - defines.h - -nodist_liboshmem_fortran_pshmem_la_SOURCES = \ - pshmem_init_f.c \ - pshmem_global_exit_f.c \ - pshmem_n_pes_f.c \ - pshmem_my_pe_f.c \ - pshmem_barrier_all_f.c \ - pshpalloc_f.c \ - pshpdeallc_f.c \ - pshpclmove_f.c \ - pshmem_ptr_f.c \ - pshmem_pe_accessible_f.c \ - pshmem_addr_accessible_f.c \ - pshmem_character_put_f.c \ - pshmem_double_put_f.c \ - pshmem_complex_put_f.c \ - pshmem_logical_put_f.c \ - pshmem_integer_put_f.c \ - pshmem_real_put_f.c \ - pshmem_put4_f.c \ - pshmem_put8_f.c \ - pshmem_put32_f.c \ - pshmem_put64_f.c \ - pshmem_put128_f.c \ - pshmem_putmem_f.c \ - pshmem_complex_iput_f.c \ - pshmem_double_iput_f.c \ - pshmem_integer_iput_f.c \ - pshmem_iput128_f.c \ - pshmem_iput32_f.c \ - pshmem_iput4_f.c \ - pshmem_iput64_f.c \ - pshmem_iput8_f.c \ - pshmem_logical_iput_f.c \ - pshmem_real_iput_f.c \ - pshmem_put_nb_f.c \ - pshmem_character_get_f.c \ - pshmem_complex_get_f.c \ - pshmem_double_get_f.c \ - pshmem_get128_f.c \ - pshmem_get32_f.c \ - pshmem_get4_f.c \ - pshmem_get64_f.c \ - pshmem_get8_f.c \ - pshmem_getmem_f.c \ - pshmem_integer_get_f.c \ - pshmem_logical_get_f.c \ - pshmem_real_get_f.c \ - pshmem_complex_iget_f.c \ - pshmem_double_iget_f.c \ - pshmem_iget128_f.c \ - pshmem_iget32_f.c \ - pshmem_iget4_f.c \ - pshmem_iget64_f.c \ - pshmem_iget8_f.c \ - pshmem_integer_iget_f.c \ - pshmem_logical_iget_f.c \ - pshmem_real_iget_f.c \ - pshmem_get_nb_f.c \ - pshmem_swap_f.c \ - pshmem_int4_swap_f.c \ - pshmem_int8_swap_f.c \ - pshmem_real4_swap_f.c \ - pshmem_real8_swap_f.c \ - pshmem_int4_set_f.c \ - pshmem_int8_set_f.c \ - pshmem_real4_set_f.c \ - pshmem_real8_set_f.c \ - pshmem_int4_cswap_f.c \ - pshmem_int8_cswap_f.c \ - pshmem_int4_fadd_f.c \ - pshmem_int8_fadd_f.c \ - pshmem_int4_fetch_f.c \ - pshmem_int8_fetch_f.c \ - pshmem_real4_fetch_f.c \ - pshmem_real8_fetch_f.c \ - pshmem_int4_finc_f.c \ - pshmem_int8_finc_f.c \ - pshmem_int4_add_f.c \ - pshmem_int8_add_f.c \ - pshmem_int4_wait_f.c \ - pshmem_int8_wait_f.c \ - pshmem_wait_f.c \ - pshmem_int4_wait_until_f.c \ - pshmem_int8_wait_until_f.c \ - pshmem_wait_until_f.c \ - pshmem_barrier_f.c \ - pshmem_and_to_all_f.c \ - pshmem_or_to_all_f.c \ - pshmem_xor_to_all_f.c \ - pshmem_max_to_all_f.c \ - pshmem_min_to_all_f.c \ - pshmem_sum_to_all_f.c \ - pshmem_prod_to_all_f.c \ - pshmem_collect_f.c \ - pshmem_broadcast_f.c \ - pshmem_alltoall_f.c \ - pshmem_lock_f.c \ - pshmem_cache_f.c \ - pshmem_int4_inc_f.c \ - pshmem_int8_inc_f.c \ - pshmem_quiet_f.c \ - pshmem_fence_f.c \ - pshmem_info_f.c - -# -# Sym link in the sources from the real OSHMEM directory -# -$(nodist_liboshmem_fortran_pshmem_la_SOURCES): - $(OMPI_V_LN_S) if test ! -r $@ ; then \ - pname=`echo $@ | cut -b '2-'` ; \ - $(LN_S) $(top_srcdir)/oshmem/shmem/fortran/$$pname $@ ; \ - fi - -if PROJECT_OSHMEM -if WANT_INSTALL_HEADERS -oshmemdir = $(oshmemincludedir)/$(subdir) -oshmem_HEADERS = $(headers) -endif -endif - -# These files were created by targets above - -MAINTAINERCLEANFILES = $(nodist_liboshmem_fortran_pshmem_la_SOURCES) - -# Don't want these targets in here - -tags-recursive: -tags: -TAGS: -GTAGS: -ID: diff --git a/oshmem/shmem/fortran/profile/prototypes_pshmem.h b/oshmem/shmem/fortran/prototypes_pshmem.h similarity index 100% rename from oshmem/shmem/fortran/profile/prototypes_pshmem.h rename to oshmem/shmem/fortran/prototypes_pshmem.h diff --git a/oshmem/shmem/fortran/shmem_addr_accessible_f.c b/oshmem/shmem/fortran/shmem_addr_accessible_f.c index d4aac33ecf3..dae10d869c5 100644 --- a/oshmem/shmem/fortran/shmem_addr_accessible_f.c +++ b/oshmem/shmem/fortran/shmem_addr_accessible_f.c @@ -16,9 +16,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ADDR_ACCESSIBLE, shmem_addr_accessible) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (MPI_Fint, diff --git a/oshmem/shmem/fortran/shmem_alltoall_f.c b/oshmem/shmem/fortran/shmem_alltoall_f.c index 6845edcf3f0..46c07257f43 100644 --- a/oshmem/shmem/fortran/shmem_alltoall_f.c +++ b/oshmem/shmem/fortran/shmem_alltoall_f.c @@ -18,12 +18,12 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALL32, shmem_alltoall32) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALL64, shmem_alltoall64) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALLS32, shmem_alltoalls32) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALLS64, shmem_alltoalls64) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_and_to_all_f.c b/oshmem/shmem/fortran/shmem_and_to_all_f.c index d653360dbf8..51099e8a473 100644 --- a/oshmem/shmem/fortran/shmem_and_to_all_f.c +++ b/oshmem/shmem/fortran/shmem_and_to_all_f.c @@ -18,11 +18,11 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT2_AND_TO_ALL, shmem_int2_and_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_AND_TO_ALL, shmem_int4_and_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_AND_TO_ALL, shmem_int8_and_to_all) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_barrier_all_f.c b/oshmem/shmem/fortran/shmem_barrier_all_f.c index 1e63510ed27..9e2e160efa1 100644 --- a/oshmem/shmem/fortran/shmem_barrier_all_f.c +++ b/oshmem/shmem/fortran/shmem_barrier_all_f.c @@ -14,9 +14,9 @@ #include "oshmem/include/shmem.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_BARRIER_ALL, shmem_barrier_all) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_barrier_f.c b/oshmem/shmem/fortran/shmem_barrier_f.c index d7e4e4459c4..946109e5a73 100644 --- a/oshmem/shmem/fortran/shmem_barrier_f.c +++ b/oshmem/shmem/fortran/shmem_barrier_f.c @@ -14,9 +14,9 @@ #include "oshmem/include/shmem.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_BARRIER, shmem_barrier) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_broadcast_f.c b/oshmem/shmem/fortran/shmem_broadcast_f.c index af2a9b7185c..d36e37aa38b 100644 --- a/oshmem/shmem/fortran/shmem_broadcast_f.c +++ b/oshmem/shmem/fortran/shmem_broadcast_f.c @@ -18,12 +18,12 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_BROADCAST4, shmem_broadcast4) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_BROADCAST8, shmem_broadcast8) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_BROADCAST32, shmem_broadcast32) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_BROADCAST64, shmem_broadcast64) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, @@ -85,7 +85,7 @@ SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, }\ \ /* Define actual PE using relative in active set */\ - rel_PE_root = oshmem_proc_pe(group->proc_array[OMPI_FINT_2_INT(*PE_root)]);\ + rel_PE_root = oshmem_proc_pe_vpid(group, OMPI_FINT_2_INT(*PE_root));\ \ /* Call collective broadcast operation */\ rc = group->g_scoll.scoll_broadcast( group, \ diff --git a/oshmem/shmem/fortran/shmem_cache_f.c b/oshmem/shmem/fortran/shmem_cache_f.c index a171c526c6e..5fc2643e1ab 100644 --- a/oshmem/shmem/fortran/shmem_cache_f.c +++ b/oshmem/shmem/fortran/shmem_cache_f.c @@ -16,14 +16,14 @@ #include "ompi/datatype/ompi_datatype.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_SET_CACHE_INV, shmem_set_cache_inv) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_SET_CACHE_LINE_INV, shmem_set_cache_line_inv) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_CLEAR_CACHE_INV, shmem_clear_cache_inv) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_CLEAR_CACHE_LINE_INV, shmem_clear_cache_line_inv) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_UDCFLUSH, shmem_udcflush) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_UDCFLUSH_LINE, shmem_udcflush_line) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_character_get_f.c b/oshmem/shmem/fortran/shmem_character_get_f.c index 7932e66af57..9e6322bbd4d 100644 --- a/oshmem/shmem/fortran/shmem_character_get_f.c +++ b/oshmem/shmem/fortran/shmem_character_get_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_CHARACTER_GET, shmem_character_get) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_character_put_f.c b/oshmem/shmem/fortran/shmem_character_put_f.c index babada9c499..13e5405dacf 100644 --- a/oshmem/shmem/fortran/shmem_character_put_f.c +++ b/oshmem/shmem/fortran/shmem_character_put_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_CHARACTER_PUT, shmem_character_put) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_collect_f.c b/oshmem/shmem/fortran/shmem_collect_f.c index d990a6c902f..b05f8b56764 100644 --- a/oshmem/shmem/fortran/shmem_collect_f.c +++ b/oshmem/shmem/fortran/shmem_collect_f.c @@ -18,7 +18,7 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COLLECT4, shmem_collect4) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COLLECT8, shmem_collect8) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COLLECT32, shmem_collect32) @@ -27,7 +27,7 @@ SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_FCOLLECT4, shmem_fcollect4) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_FCOLLECT8, shmem_fcollect8) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_FCOLLECT32, shmem_fcollect32) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_FCOLLECT64, shmem_fcollect64) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_complex_get_f.c b/oshmem/shmem/fortran/shmem_complex_get_f.c index 21465b5293d..fc6fb643722 100644 --- a/oshmem/shmem/fortran/shmem_complex_get_f.c +++ b/oshmem/shmem/fortran/shmem_complex_get_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COMPLEX_GET, shmem_complex_get) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_complex_iget_f.c b/oshmem/shmem/fortran/shmem_complex_iget_f.c index 51e5a59dd27..9a5ef31a69e 100644 --- a/oshmem/shmem/fortran/shmem_complex_iget_f.c +++ b/oshmem/shmem/fortran/shmem_complex_iget_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COMPLEX_IGET, shmem_complex_iget) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_complex_iput_f.c b/oshmem/shmem/fortran/shmem_complex_iput_f.c index 00b96ff71f7..ab610aabb02 100644 --- a/oshmem/shmem/fortran/shmem_complex_iput_f.c +++ b/oshmem/shmem/fortran/shmem_complex_iput_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COMPLEX_IPUT, shmem_complex_iput) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_complex_put_f.c b/oshmem/shmem/fortran/shmem_complex_put_f.c index 1cff50e1fcc..d414bd177fa 100644 --- a/oshmem/shmem/fortran/shmem_complex_put_f.c +++ b/oshmem/shmem/fortran/shmem_complex_put_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COMPLEX_PUT, shmem_complex_put) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_double_get_f.c b/oshmem/shmem/fortran/shmem_double_get_f.c index 0870f55823b..e507af7504d 100644 --- a/oshmem/shmem/fortran/shmem_double_get_f.c +++ b/oshmem/shmem/fortran/shmem_double_get_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_DOUBLE_GET, shmem_double_get) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_double_iget_f.c b/oshmem/shmem/fortran/shmem_double_iget_f.c index a5da3afdbca..53c6e6f0789 100644 --- a/oshmem/shmem/fortran/shmem_double_iget_f.c +++ b/oshmem/shmem/fortran/shmem_double_iget_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_DOUBLE_IGET, shmem_double_iget) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_double_iput_f.c b/oshmem/shmem/fortran/shmem_double_iput_f.c index ad3fc1d1f36..878137d7f10 100644 --- a/oshmem/shmem/fortran/shmem_double_iput_f.c +++ b/oshmem/shmem/fortran/shmem_double_iput_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_DOUBLE_IPUT, shmem_double_iput) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_double_put_f.c b/oshmem/shmem/fortran/shmem_double_put_f.c index 9ac5efd020b..a201e3df337 100644 --- a/oshmem/shmem/fortran/shmem_double_put_f.c +++ b/oshmem/shmem/fortran/shmem_double_put_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_DOUBLE_PUT, shmem_double_put) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_fence_f.c b/oshmem/shmem/fortran/shmem_fence_f.c index c1fdf9303a8..8413600d5b6 100644 --- a/oshmem/shmem/fortran/shmem_fence_f.c +++ b/oshmem/shmem/fortran/shmem_fence_f.c @@ -15,9 +15,9 @@ #include "oshmem/mca/spml/spml.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_FENCE, shmem_fence) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_get128_f.c b/oshmem/shmem/fortran/shmem_get128_f.c index f6c5e45af74..e396fd65fde 100644 --- a/oshmem/shmem/fortran/shmem_get128_f.c +++ b/oshmem/shmem/fortran/shmem_get128_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GET128, shmem_get128) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_get32_f.c b/oshmem/shmem/fortran/shmem_get32_f.c index 5f73e8d8c17..302aa100a90 100644 --- a/oshmem/shmem/fortran/shmem_get32_f.c +++ b/oshmem/shmem/fortran/shmem_get32_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GET32, shmem_get32) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_get4_f.c b/oshmem/shmem/fortran/shmem_get4_f.c index 20da34ca8fc..b9ff6d5756f 100644 --- a/oshmem/shmem/fortran/shmem_get4_f.c +++ b/oshmem/shmem/fortran/shmem_get4_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GET4, shmem_get4) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_get64_f.c b/oshmem/shmem/fortran/shmem_get64_f.c index f585cfaa99f..c923ae040ed 100644 --- a/oshmem/shmem/fortran/shmem_get64_f.c +++ b/oshmem/shmem/fortran/shmem_get64_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GET64, shmem_get64) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_get8_f.c b/oshmem/shmem/fortran/shmem_get8_f.c index 8ba75bcecd8..d5b3ea64f01 100644 --- a/oshmem/shmem/fortran/shmem_get8_f.c +++ b/oshmem/shmem/fortran/shmem_get8_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GET8, shmem_get8) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_get_nb_f.c b/oshmem/shmem/fortran/shmem_get_nb_f.c index f5af43eca74..9c1a47cddf4 100644 --- a/oshmem/shmem/fortran/shmem_get_nb_f.c +++ b/oshmem/shmem/fortran/shmem_get_nb_f.c @@ -18,7 +18,7 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GETMEM_NBI, shmem_getmem_nbi) @@ -35,7 +35,7 @@ SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GET32_NBI, shmem_get32_nbi) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GET64_NBI, shmem_get64_nbi) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GET128_NBI, shmem_get128_nbi) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_getmem_f.c b/oshmem/shmem/fortran/shmem_getmem_f.c index dae32735982..eec5f05e8a5 100644 --- a/oshmem/shmem/fortran/shmem_getmem_f.c +++ b/oshmem/shmem/fortran/shmem_getmem_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GETMEM, shmem_getmem) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_global_exit_f.c b/oshmem/shmem/fortran/shmem_global_exit_f.c index 72ec888be59..a210d979eea 100644 --- a/oshmem/shmem/fortran/shmem_global_exit_f.c +++ b/oshmem/shmem/fortran/shmem_global_exit_f.c @@ -14,9 +14,9 @@ #include "oshmem/include/shmem.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_GLOBAL_EXIT, shmem_global_exit) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iget128_f.c b/oshmem/shmem/fortran/shmem_iget128_f.c index 0a2737ae4cf..d371f84b11d 100644 --- a/oshmem/shmem/fortran/shmem_iget128_f.c +++ b/oshmem/shmem/fortran/shmem_iget128_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IGET128, shmem_iget128) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iget32_f.c b/oshmem/shmem/fortran/shmem_iget32_f.c index e1b8395ed25..17e90e9f677 100644 --- a/oshmem/shmem/fortran/shmem_iget32_f.c +++ b/oshmem/shmem/fortran/shmem_iget32_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IGET32, shmem_iget32) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iget4_f.c b/oshmem/shmem/fortran/shmem_iget4_f.c index 868d8a9b72f..56727817399 100644 --- a/oshmem/shmem/fortran/shmem_iget4_f.c +++ b/oshmem/shmem/fortran/shmem_iget4_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IGET4, shmem_iget4) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iget64_f.c b/oshmem/shmem/fortran/shmem_iget64_f.c index fafab50c50f..2f5f9405465 100644 --- a/oshmem/shmem/fortran/shmem_iget64_f.c +++ b/oshmem/shmem/fortran/shmem_iget64_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IGET64, shmem_iget64) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iget8_f.c b/oshmem/shmem/fortran/shmem_iget8_f.c index 2e9d76733a9..03b24d70025 100644 --- a/oshmem/shmem/fortran/shmem_iget8_f.c +++ b/oshmem/shmem/fortran/shmem_iget8_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IGET8, shmem_iget8) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_init_f.c b/oshmem/shmem/fortran/shmem_init_f.c index 2300284c8d4..c4531de8329 100644 --- a/oshmem/shmem/fortran/shmem_init_f.c +++ b/oshmem/shmem/fortran/shmem_init_f.c @@ -14,10 +14,10 @@ #include "oshmem/include/shmem.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INIT, shmem_init) SHMEM_GENERATE_WEAK_BINDINGS(START_PES, start_pes) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int4_add_f.c b/oshmem/shmem/fortran/shmem_int4_add_f.c index 8a985a41cf2..6ff106601d6 100644 --- a/oshmem/shmem/fortran/shmem_int4_add_f.c +++ b/oshmem/shmem/fortran/shmem_int4_add_f.c @@ -20,9 +20,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_ADD, shmem_int4_add) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int4_cswap_f.c b/oshmem/shmem/fortran/shmem_int4_cswap_f.c index a1e5fbfe924..18d5ff6396b 100644 --- a/oshmem/shmem/fortran/shmem_int4_cswap_f.c +++ b/oshmem/shmem/fortran/shmem_int4_cswap_f.c @@ -22,9 +22,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_CSWAP, shmem_int4_cswap) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer4_t, diff --git a/oshmem/shmem/fortran/shmem_int4_fadd_f.c b/oshmem/shmem/fortran/shmem_int4_fadd_f.c index e00d33f2f7f..03b37f9cd27 100644 --- a/oshmem/shmem/fortran/shmem_int4_fadd_f.c +++ b/oshmem/shmem/fortran/shmem_int4_fadd_f.c @@ -20,9 +20,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_FADD, shmem_int4_fadd) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer4_t, diff --git a/oshmem/shmem/fortran/shmem_int4_fetch_f.c b/oshmem/shmem/fortran/shmem_int4_fetch_f.c index 4e920f5b2a3..9de05b7a0f1 100644 --- a/oshmem/shmem/fortran/shmem_int4_fetch_f.c +++ b/oshmem/shmem/fortran/shmem_int4_fetch_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_FETCH, shmem_int4_fetch) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer4_t, diff --git a/oshmem/shmem/fortran/shmem_int4_finc_f.c b/oshmem/shmem/fortran/shmem_int4_finc_f.c index acc53355b28..cefcf506f44 100644 --- a/oshmem/shmem/fortran/shmem_int4_finc_f.c +++ b/oshmem/shmem/fortran/shmem_int4_finc_f.c @@ -20,9 +20,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_FINC, shmem_int4_finc) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer4_t, diff --git a/oshmem/shmem/fortran/shmem_int4_inc_f.c b/oshmem/shmem/fortran/shmem_int4_inc_f.c index ebf039f1d1d..0c3fe128c95 100644 --- a/oshmem/shmem/fortran/shmem_int4_inc_f.c +++ b/oshmem/shmem/fortran/shmem_int4_inc_f.c @@ -20,9 +20,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_INC, shmem_int4_inc) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int4_set_f.c b/oshmem/shmem/fortran/shmem_int4_set_f.c index 5ef02b5967c..f76cbc80cc3 100644 --- a/oshmem/shmem/fortran/shmem_int4_set_f.c +++ b/oshmem/shmem/fortran/shmem_int4_set_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_SET, shmem_int4_set) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int4_swap_f.c b/oshmem/shmem/fortran/shmem_int4_swap_f.c index f0de74e1540..d608961e835 100644 --- a/oshmem/shmem/fortran/shmem_int4_swap_f.c +++ b/oshmem/shmem/fortran/shmem_int4_swap_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_SWAP, shmem_int4_swap) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer4_t, diff --git a/oshmem/shmem/fortran/shmem_int4_wait_f.c b/oshmem/shmem/fortran/shmem_int4_wait_f.c index 42f766580f8..cdab0ed62ad 100644 --- a/oshmem/shmem/fortran/shmem_int4_wait_f.c +++ b/oshmem/shmem/fortran/shmem_int4_wait_f.c @@ -17,9 +17,9 @@ #include "ompi/datatype/ompi_datatype.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_WAIT, shmem_int4_wait) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int4_wait_until_f.c b/oshmem/shmem/fortran/shmem_int4_wait_until_f.c index c2411315a33..10a6aa9ec57 100644 --- a/oshmem/shmem/fortran/shmem_int4_wait_until_f.c +++ b/oshmem/shmem/fortran/shmem_int4_wait_until_f.c @@ -17,9 +17,9 @@ #include "ompi/datatype/ompi_datatype.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_WAIT_UNTIL, shmem_int4_wait_until) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int8_add_f.c b/oshmem/shmem/fortran/shmem_int8_add_f.c index 67dfbef61b1..b238e454f8c 100644 --- a/oshmem/shmem/fortran/shmem_int8_add_f.c +++ b/oshmem/shmem/fortran/shmem_int8_add_f.c @@ -20,9 +20,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_ADD, shmem_int8_add) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int8_cswap_f.c b/oshmem/shmem/fortran/shmem_int8_cswap_f.c index e4e4637e1c1..d0ec61c495f 100644 --- a/oshmem/shmem/fortran/shmem_int8_cswap_f.c +++ b/oshmem/shmem/fortran/shmem_int8_cswap_f.c @@ -21,9 +21,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_CSWAP, shmem_int8_cswap) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer8_t, diff --git a/oshmem/shmem/fortran/shmem_int8_fadd_f.c b/oshmem/shmem/fortran/shmem_int8_fadd_f.c index 9c62a499a1d..e942b55eaf0 100644 --- a/oshmem/shmem/fortran/shmem_int8_fadd_f.c +++ b/oshmem/shmem/fortran/shmem_int8_fadd_f.c @@ -20,9 +20,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_FADD, shmem_int8_fadd) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer8_t, diff --git a/oshmem/shmem/fortran/shmem_int8_fetch_f.c b/oshmem/shmem/fortran/shmem_int8_fetch_f.c index 4795e27838b..0aee9fcc128 100644 --- a/oshmem/shmem/fortran/shmem_int8_fetch_f.c +++ b/oshmem/shmem/fortran/shmem_int8_fetch_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_FETCH, shmem_int8_fetch) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer8_t, diff --git a/oshmem/shmem/fortran/shmem_int8_finc_f.c b/oshmem/shmem/fortran/shmem_int8_finc_f.c index 5cc8f94bfac..99f7d23af92 100644 --- a/oshmem/shmem/fortran/shmem_int8_finc_f.c +++ b/oshmem/shmem/fortran/shmem_int8_finc_f.c @@ -20,9 +20,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_FINC, shmem_int8_finc) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer8_t, diff --git a/oshmem/shmem/fortran/shmem_int8_inc_f.c b/oshmem/shmem/fortran/shmem_int8_inc_f.c index 984a62c086c..1b7af91fee0 100644 --- a/oshmem/shmem/fortran/shmem_int8_inc_f.c +++ b/oshmem/shmem/fortran/shmem_int8_inc_f.c @@ -20,9 +20,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_INC, shmem_int8_inc) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int8_set_f.c b/oshmem/shmem/fortran/shmem_int8_set_f.c index 182680c306f..676df463a81 100644 --- a/oshmem/shmem/fortran/shmem_int8_set_f.c +++ b/oshmem/shmem/fortran/shmem_int8_set_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_SET, shmem_int8_set) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int8_swap_f.c b/oshmem/shmem/fortran/shmem_int8_swap_f.c index 36dd76f67d1..b03cd4153b8 100644 --- a/oshmem/shmem/fortran/shmem_int8_swap_f.c +++ b/oshmem/shmem/fortran/shmem_int8_swap_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_SWAP, shmem_int8_swap) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer8_t, diff --git a/oshmem/shmem/fortran/shmem_int8_wait_f.c b/oshmem/shmem/fortran/shmem_int8_wait_f.c index be9fcf7d094..021ed612ce8 100644 --- a/oshmem/shmem/fortran/shmem_int8_wait_f.c +++ b/oshmem/shmem/fortran/shmem_int8_wait_f.c @@ -17,9 +17,9 @@ #include "ompi/datatype/ompi_datatype.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_WAIT, shmem_int8_wait) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_int8_wait_until_f.c b/oshmem/shmem/fortran/shmem_int8_wait_until_f.c index a40f4b15359..65b523fa632 100644 --- a/oshmem/shmem/fortran/shmem_int8_wait_until_f.c +++ b/oshmem/shmem/fortran/shmem_int8_wait_until_f.c @@ -17,9 +17,9 @@ #include "ompi/datatype/ompi_datatype.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_WAIT_UNTIL, shmem_int8_wait_until) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_integer_get_f.c b/oshmem/shmem/fortran/shmem_integer_get_f.c index 1442fe43021..b88efcce2f6 100644 --- a/oshmem/shmem/fortran/shmem_integer_get_f.c +++ b/oshmem/shmem/fortran/shmem_integer_get_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INTEGER_GET, shmem_integer_get) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_integer_iget_f.c b/oshmem/shmem/fortran/shmem_integer_iget_f.c index 66350fde108..08b59fcbb79 100644 --- a/oshmem/shmem/fortran/shmem_integer_iget_f.c +++ b/oshmem/shmem/fortran/shmem_integer_iget_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INTEGER_IGET, shmem_integer_iget) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_integer_iput_f.c b/oshmem/shmem/fortran/shmem_integer_iput_f.c index 393bc053870..730a5e99890 100644 --- a/oshmem/shmem/fortran/shmem_integer_iput_f.c +++ b/oshmem/shmem/fortran/shmem_integer_iput_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INTEGER_IPUT, shmem_integer_iput) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_integer_put_f.c b/oshmem/shmem/fortran/shmem_integer_put_f.c index 1505d22edff..bea4371ddc5 100644 --- a/oshmem/shmem/fortran/shmem_integer_put_f.c +++ b/oshmem/shmem/fortran/shmem_integer_put_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INTEGER_PUT, shmem_integer_put) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iput128_f.c b/oshmem/shmem/fortran/shmem_iput128_f.c index 4428bf7af64..1f7105f4896 100644 --- a/oshmem/shmem/fortran/shmem_iput128_f.c +++ b/oshmem/shmem/fortran/shmem_iput128_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IPUT128, shmem_iput128) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iput32_f.c b/oshmem/shmem/fortran/shmem_iput32_f.c index 3d57fcc691c..6167a5085f4 100644 --- a/oshmem/shmem/fortran/shmem_iput32_f.c +++ b/oshmem/shmem/fortran/shmem_iput32_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IPUT32, shmem_iput32) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iput4_f.c b/oshmem/shmem/fortran/shmem_iput4_f.c index 4e89c33bdb3..aef7a6670a6 100644 --- a/oshmem/shmem/fortran/shmem_iput4_f.c +++ b/oshmem/shmem/fortran/shmem_iput4_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IPUT4, shmem_iput4) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iput64_f.c b/oshmem/shmem/fortran/shmem_iput64_f.c index 0f4fb3e6d0e..35a4392bdce 100644 --- a/oshmem/shmem/fortran/shmem_iput64_f.c +++ b/oshmem/shmem/fortran/shmem_iput64_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IPUT64, shmem_iput64) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_iput8_f.c b/oshmem/shmem/fortran/shmem_iput8_f.c index 0c5e0b2fcaa..434488b399a 100644 --- a/oshmem/shmem/fortran/shmem_iput8_f.c +++ b/oshmem/shmem/fortran/shmem_iput8_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_IPUT8, shmem_iput8) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_lock_f.c b/oshmem/shmem/fortran/shmem_lock_f.c index ce5bd48b4fa..563535f48be 100644 --- a/oshmem/shmem/fortran/shmem_lock_f.c +++ b/oshmem/shmem/fortran/shmem_lock_f.c @@ -17,11 +17,11 @@ #include "oshmem/shmem/shmem_lock.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_SET_LOCK, shmem_set_lock) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_CLEAR_LOCK, shmem_clear_lock) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_TEST_LOCK, shmem_test_lock) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_logical_get_f.c b/oshmem/shmem/fortran/shmem_logical_get_f.c index 68b0188f1ae..ed9cf4327e2 100644 --- a/oshmem/shmem/fortran/shmem_logical_get_f.c +++ b/oshmem/shmem/fortran/shmem_logical_get_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_LOGICAL_GET, shmem_logical_get) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_logical_iget_f.c b/oshmem/shmem/fortran/shmem_logical_iget_f.c index 9f6e54ae9ac..e3ab0b79dc5 100644 --- a/oshmem/shmem/fortran/shmem_logical_iget_f.c +++ b/oshmem/shmem/fortran/shmem_logical_iget_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_LOGICAL_IGET, shmem_logical_iget) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_logical_iput_f.c b/oshmem/shmem/fortran/shmem_logical_iput_f.c index f0f704fbc20..dc22fcd3133 100644 --- a/oshmem/shmem/fortran/shmem_logical_iput_f.c +++ b/oshmem/shmem/fortran/shmem_logical_iput_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_LOGICAL_IPUT, shmem_logical_iput) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_logical_put_f.c b/oshmem/shmem/fortran/shmem_logical_put_f.c index d1c2cf39b15..b41ac40a020 100644 --- a/oshmem/shmem/fortran/shmem_logical_put_f.c +++ b/oshmem/shmem/fortran/shmem_logical_put_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_LOGICAL_PUT, shmem_logical_put) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_max_to_all_f.c b/oshmem/shmem/fortran/shmem_max_to_all_f.c index 8b3b4465317..9c3d2c9df73 100644 --- a/oshmem/shmem/fortran/shmem_max_to_all_f.c +++ b/oshmem/shmem/fortran/shmem_max_to_all_f.c @@ -18,14 +18,14 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT2_MAX_TO_ALL, shmem_int2_max_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_MAX_TO_ALL, shmem_int4_max_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_MAX_TO_ALL, shmem_int8_max_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL4_MAX_TO_ALL, shmem_real4_max_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL8_MAX_TO_ALL, shmem_real8_max_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL16_MAX_TO_ALL, shmem_real16_max_to_all) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_min_to_all_f.c b/oshmem/shmem/fortran/shmem_min_to_all_f.c index 22201286f8e..9f73d80f663 100644 --- a/oshmem/shmem/fortran/shmem_min_to_all_f.c +++ b/oshmem/shmem/fortran/shmem_min_to_all_f.c @@ -18,14 +18,14 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT2_MIN_TO_ALL, shmem_int2_min_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_MIN_TO_ALL, shmem_int4_min_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_MIN_TO_ALL, shmem_int8_min_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL4_MIN_TO_ALL, shmem_real4_min_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL8_MIN_TO_ALL, shmem_real8_min_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL16_MIN_TO_ALL, shmem_real16_min_to_all) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_my_pe_f.c b/oshmem/shmem/fortran/shmem_my_pe_f.c index f8823896bb9..d7c844233df 100644 --- a/oshmem/shmem/fortran/shmem_my_pe_f.c +++ b/oshmem/shmem/fortran/shmem_my_pe_f.c @@ -14,11 +14,11 @@ #include "oshmem/include/shmem.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_MY_PE, shmem_my_pe) SHMEM_GENERATE_WEAK_BINDINGS(MY_PE, my_pe) #pragma weak _my_pe_ = p_my_pe_ -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (MPI_Fint, diff --git a/oshmem/shmem/fortran/shmem_n_pes_f.c b/oshmem/shmem/fortran/shmem_n_pes_f.c index 3647625055e..ae39d46903a 100644 --- a/oshmem/shmem/fortran/shmem_n_pes_f.c +++ b/oshmem/shmem/fortran/shmem_n_pes_f.c @@ -14,10 +14,10 @@ #include "oshmem/include/shmem.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_N_PES, shmem_n_pes) SHMEM_GENERATE_WEAK_BINDINGS(NUM_PES, num_pes) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (MPI_Fint, diff --git a/oshmem/shmem/fortran/shmem_or_to_all_f.c b/oshmem/shmem/fortran/shmem_or_to_all_f.c index 7bca154b606..f800d3a8062 100644 --- a/oshmem/shmem/fortran/shmem_or_to_all_f.c +++ b/oshmem/shmem/fortran/shmem_or_to_all_f.c @@ -18,11 +18,11 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT2_OR_TO_ALL, shmem_int2_or_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_OR_TO_ALL, shmem_int4_or_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_OR_TO_ALL, shmem_int8_or_to_all) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_pe_accessible_f.c b/oshmem/shmem/fortran/shmem_pe_accessible_f.c index 191633897d5..cc90e1c01f9 100644 --- a/oshmem/shmem/fortran/shmem_pe_accessible_f.c +++ b/oshmem/shmem/fortran/shmem_pe_accessible_f.c @@ -16,9 +16,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PE_ACCESSIBLE, shmem_pe_accessible) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_logical_t, diff --git a/oshmem/shmem/fortran/shmem_prod_to_all_f.c b/oshmem/shmem/fortran/shmem_prod_to_all_f.c index c093bb00573..e1732dbf2cb 100644 --- a/oshmem/shmem/fortran/shmem_prod_to_all_f.c +++ b/oshmem/shmem/fortran/shmem_prod_to_all_f.c @@ -18,7 +18,7 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT2_PROD_TO_ALL, shmem_int2_prod_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_PROD_TO_ALL, shmem_int4_prod_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_PROD_TO_ALL, shmem_int8_prod_to_all) @@ -27,7 +27,7 @@ SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COMP8_PROD_TO_ALL, shmem_comp8_prod_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL4_PROD_TO_ALL, shmem_real4_prod_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL8_PROD_TO_ALL, shmem_real8_prod_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL16_PROD_TO_ALL, shmem_real16_prod_to_all) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_ptr_f.c b/oshmem/shmem/fortran/shmem_ptr_f.c index 74df2655695..78669cfa53d 100644 --- a/oshmem/shmem/fortran/shmem_ptr_f.c +++ b/oshmem/shmem/fortran/shmem_ptr_f.c @@ -16,9 +16,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PTR, shmem_ptr) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (FORTRAN_POINTER_T *, diff --git a/oshmem/shmem/fortran/shmem_put128_f.c b/oshmem/shmem/fortran/shmem_put128_f.c index cabb0ded118..a77e82f16a9 100644 --- a/oshmem/shmem/fortran/shmem_put128_f.c +++ b/oshmem/shmem/fortran/shmem_put128_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUT128, shmem_put128) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_put32_f.c b/oshmem/shmem/fortran/shmem_put32_f.c index 2f2c2510ef1..f7e5add52fb 100644 --- a/oshmem/shmem/fortran/shmem_put32_f.c +++ b/oshmem/shmem/fortran/shmem_put32_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUT32, shmem_put32) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_put4_f.c b/oshmem/shmem/fortran/shmem_put4_f.c index 6fba537602a..7546a7905e5 100644 --- a/oshmem/shmem/fortran/shmem_put4_f.c +++ b/oshmem/shmem/fortran/shmem_put4_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUT4, shmem_put4) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_put64_f.c b/oshmem/shmem/fortran/shmem_put64_f.c index caad85c91a1..dd28c09dc80 100644 --- a/oshmem/shmem/fortran/shmem_put64_f.c +++ b/oshmem/shmem/fortran/shmem_put64_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUT64, shmem_put64) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_put8_f.c b/oshmem/shmem/fortran/shmem_put8_f.c index ba484ecaf19..b9a0b1a7b04 100644 --- a/oshmem/shmem/fortran/shmem_put8_f.c +++ b/oshmem/shmem/fortran/shmem_put8_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUT8, shmem_put8) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_put_nb_f.c b/oshmem/shmem/fortran/shmem_put_nb_f.c index b1cabcbfa18..fb43c6b1e07 100644 --- a/oshmem/shmem/fortran/shmem_put_nb_f.c +++ b/oshmem/shmem/fortran/shmem_put_nb_f.c @@ -18,7 +18,7 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUTMEM_NBI, shmem_putmem_nbi) @@ -35,7 +35,7 @@ SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUT32_NBI, shmem_put32_nbi) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUT64_NBI, shmem_put64_nbi) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUT128_NBI, shmem_put128_nbi) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_putmem_f.c b/oshmem/shmem/fortran/shmem_putmem_f.c index 663540fea27..be93d6eec50 100644 --- a/oshmem/shmem/fortran/shmem_putmem_f.c +++ b/oshmem/shmem/fortran/shmem_putmem_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_PUTMEM, shmem_putmem) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_quiet_f.c b/oshmem/shmem/fortran/shmem_quiet_f.c index 24c03e6ae15..ee42c72890e 100644 --- a/oshmem/shmem/fortran/shmem_quiet_f.c +++ b/oshmem/shmem/fortran/shmem_quiet_f.c @@ -15,9 +15,9 @@ #include "oshmem/mca/spml/spml.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_QUIET, shmem_quiet) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_real4_fetch_f.c b/oshmem/shmem/fortran/shmem_real4_fetch_f.c index 66c3b84b9c6..56d5330c477 100644 --- a/oshmem/shmem/fortran/shmem_real4_fetch_f.c +++ b/oshmem/shmem/fortran/shmem_real4_fetch_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL4_FETCH, shmem_real4_fetch) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_real4_t, diff --git a/oshmem/shmem/fortran/shmem_real4_set_f.c b/oshmem/shmem/fortran/shmem_real4_set_f.c index 1a4e222cc2f..e61e584afb4 100644 --- a/oshmem/shmem/fortran/shmem_real4_set_f.c +++ b/oshmem/shmem/fortran/shmem_real4_set_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL4_SET, shmem_real4_set) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_real4_swap_f.c b/oshmem/shmem/fortran/shmem_real4_swap_f.c index 57f21d55ba1..4ac9407f5f5 100644 --- a/oshmem/shmem/fortran/shmem_real4_swap_f.c +++ b/oshmem/shmem/fortran/shmem_real4_swap_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL4_SWAP, shmem_real4_swap) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_real4_t, diff --git a/oshmem/shmem/fortran/shmem_real8_fetch_f.c b/oshmem/shmem/fortran/shmem_real8_fetch_f.c index d7a2381df6a..37e0fc09d5a 100644 --- a/oshmem/shmem/fortran/shmem_real8_fetch_f.c +++ b/oshmem/shmem/fortran/shmem_real8_fetch_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL8_FETCH, shmem_real8_fetch) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_real8_t, diff --git a/oshmem/shmem/fortran/shmem_real8_set_f.c b/oshmem/shmem/fortran/shmem_real8_set_f.c index 7c7384e43af..7d29dac429b 100644 --- a/oshmem/shmem/fortran/shmem_real8_set_f.c +++ b/oshmem/shmem/fortran/shmem_real8_set_f.c @@ -18,9 +18,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL8_SET, shmem_real8_set) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_real8_swap_f.c b/oshmem/shmem/fortran/shmem_real8_swap_f.c index e08822c2f82..4d084283629 100644 --- a/oshmem/shmem/fortran/shmem_real8_swap_f.c +++ b/oshmem/shmem/fortran/shmem_real8_swap_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL8_SWAP, shmem_real8_swap) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_real8_t, diff --git a/oshmem/shmem/fortran/shmem_real_get_f.c b/oshmem/shmem/fortran/shmem_real_get_f.c index ef0c337ccd6..8d2370a0d8a 100644 --- a/oshmem/shmem/fortran/shmem_real_get_f.c +++ b/oshmem/shmem/fortran/shmem_real_get_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL_GET, shmem_real_get) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_real_iget_f.c b/oshmem/shmem/fortran/shmem_real_iget_f.c index dd6d4567b23..f1a54d2dc22 100644 --- a/oshmem/shmem/fortran/shmem_real_iget_f.c +++ b/oshmem/shmem/fortran/shmem_real_iget_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL_IGET, shmem_real_iget) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_real_iput_f.c b/oshmem/shmem/fortran/shmem_real_iput_f.c index 7383dc74e3b..13f1a56397b 100644 --- a/oshmem/shmem/fortran/shmem_real_iput_f.c +++ b/oshmem/shmem/fortran/shmem_real_iput_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL_IPUT, shmem_real_iput) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_real_put_f.c b/oshmem/shmem/fortran/shmem_real_put_f.c index 3ef8582069b..26e2baff19c 100644 --- a/oshmem/shmem/fortran/shmem_real_put_f.c +++ b/oshmem/shmem/fortran/shmem_real_put_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL_PUT, shmem_real_put) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_sum_to_all_f.c b/oshmem/shmem/fortran/shmem_sum_to_all_f.c index ca48f484407..aca69a06b76 100644 --- a/oshmem/shmem/fortran/shmem_sum_to_all_f.c +++ b/oshmem/shmem/fortran/shmem_sum_to_all_f.c @@ -18,7 +18,7 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT2_SUM_TO_ALL, shmem_int2_sum_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_SUM_TO_ALL, shmem_int4_sum_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_SUM_TO_ALL, shmem_int8_sum_to_all) @@ -27,7 +27,7 @@ SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COMP8_SUM_TO_ALL, shmem_comp8_sum_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL4_SUM_TO_ALL, shmem_real4_sum_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL8_SUM_TO_ALL, shmem_real8_sum_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_REAL16_SUM_TO_ALL, shmem_real16_sum_to_all) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_swap_f.c b/oshmem/shmem/fortran/shmem_swap_f.c index 4378d72908c..aa03abcbfa4 100644 --- a/oshmem/shmem/fortran/shmem_swap_f.c +++ b/oshmem/shmem/fortran/shmem_swap_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_SWAP, shmem_swap) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (MPI_Fint, diff --git a/oshmem/shmem/fortran/shmem_wait_f.c b/oshmem/shmem/fortran/shmem_wait_f.c index 5cd12f5487f..e188a35b2ce 100644 --- a/oshmem/shmem/fortran/shmem_wait_f.c +++ b/oshmem/shmem/fortran/shmem_wait_f.c @@ -17,9 +17,9 @@ #include "ompi/datatype/ompi_datatype.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_WAIT, shmem_wait) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_wait_until_f.c b/oshmem/shmem/fortran/shmem_wait_until_f.c index c3f47c2946d..574475ce4ee 100644 --- a/oshmem/shmem/fortran/shmem_wait_until_f.c +++ b/oshmem/shmem/fortran/shmem_wait_until_f.c @@ -17,9 +17,9 @@ #include "ompi/datatype/ompi_datatype.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_WAIT_UNTIL, shmem_wait_until) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shmem_xor_to_all_f.c b/oshmem/shmem/fortran/shmem_xor_to_all_f.c index f85d62b92b0..25dd043a2da 100644 --- a/oshmem/shmem/fortran/shmem_xor_to_all_f.c +++ b/oshmem/shmem/fortran/shmem_xor_to_all_f.c @@ -18,13 +18,13 @@ #include "oshmem/op/op.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT2_XOR_TO_ALL, shmem_int2_xor_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT4_XOR_TO_ALL, shmem_int4_xor_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_INT8_XOR_TO_ALL, shmem_int8_xor_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COMP4_XOR_TO_ALL, shmem_comp4_xor_to_all) SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_COMP8_XOR_TO_ALL, shmem_comp8_xor_to_all) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shpalloc_f.c b/oshmem/shmem/fortran/shpalloc_f.c index 424ae85fa0c..b2104712978 100644 --- a/oshmem/shmem/fortran/shpalloc_f.c +++ b/oshmem/shmem/fortran/shpalloc_f.c @@ -19,9 +19,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHPALLOC, shpalloc) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shpclmove_f.c b/oshmem/shmem/fortran/shpclmove_f.c index 10266a2aba9..c39168fce62 100644 --- a/oshmem/shmem/fortran/shpclmove_f.c +++ b/oshmem/shmem/fortran/shpclmove_f.c @@ -17,9 +17,9 @@ #include "oshmem/runtime/runtime.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHPCLMOVE, shpclmove) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/shmem/fortran/shpdeallc_f.c b/oshmem/shmem/fortran/shpdeallc_f.c index fd529a6d8c8..9eef7901965 100644 --- a/oshmem/shmem/fortran/shpdeallc_f.c +++ b/oshmem/shmem/fortran/shpdeallc_f.c @@ -16,9 +16,9 @@ #include "stdio.h" #if OSHMEM_PROFILING -#include "oshmem/shmem/fortran/profile/pbindings.h" +#include "oshmem/shmem/fortran/pbindings.h" SHMEM_GENERATE_WEAK_BINDINGS(SHPDEALLC, shpdeallc) -#include "oshmem/shmem/fortran/profile/defines.h" +#include "oshmem/shmem/fortran/profile-defines.h" #endif SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void, diff --git a/oshmem/tools/oshmem_info/param.c b/oshmem/tools/oshmem_info/param.c index bcc85693742..f9ebc94197e 100644 --- a/oshmem/tools/oshmem_info/param.c +++ b/oshmem/tools/oshmem_info/param.c @@ -40,7 +40,7 @@ #include "opal/runtime/opal_info_support.h" #include "ompi/tools/ompi_info/ompi_info.h" -#include "ompi/include/mpi_portable_platform.h" +#include "opal/opal_portable_platform.h" #include "oshmem/tools/oshmem_info/oshmem_info.h" @@ -147,9 +147,9 @@ void oshmem_info_do_config(bool want_all) opal_info_out("C compiler absolute", "compiler:c:absolute", OPAL_CC_ABSOLUTE); opal_info_out("C compiler family name", "compiler:c:familyname", - _STRINGIFY(OPAL_BUILD_PLATFORM_COMPILER_FAMILYNAME)); + PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYNAME)); opal_info_out("C compiler version", "compiler:c:version", - _STRINGIFY(OPAL_BUILD_PLATFORM_COMPILER_VERSION_STR)); + PLATFORM_COMPILER_VERSION_STR); if (want_all) { opal_info_out_int("C char size", "compiler:c:sizeof:char", sizeof(char)); diff --git a/oshmem/tools/wrappers/shmemc++-wrapper-data.txt.in b/oshmem/tools/wrappers/shmemc++-wrapper-data.txt.in index 31cdebc8d04..8f57729e2d8 100644 --- a/oshmem/tools/wrappers/shmemc++-wrapper-data.txt.in +++ b/oshmem/tools/wrappers/shmemc++-wrapper-data.txt.in @@ -2,6 +2,7 @@ # All rights reserved. # Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2020 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2021 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -29,8 +30,8 @@ linker_flags=@OMPI_WRAPPER_EXTRA_LDFLAGS@ # intentionally only link in the SHMEM and MPI libraries (OPAL, # etc. are pulled in implicitly) because we intend SHMEM/MPI # applications to only use the SHMEM and MPI APIs. -libs=-loshmem -lmpi -libs_static=-loshmem -lmpi -l@OPAL_LIB_NAME@ @OMPI_WRAPPER_EXTRA_LIBS@ +libs=-loshmem -l@OMPI_LIBMPI_NAME@ +libs_static=-loshmem -l@OMPI_LIBMPI_NAME@ -l@OPAL_LIB_NAME@ @OMPI_WRAPPER_EXTRA_LIBS@ dyn_lib_file=liboshmem.@OPAL_DYN_LIB_SUFFIX@ static_lib_file=liboshmem.a required_file= diff --git a/test/asm/Makefile.am b/test/asm/Makefile.am index d8f0e4ccc3b..53359a4beaf 100644 --- a/test/asm/Makefile.am +++ b/test/asm/Makefile.am @@ -90,5 +90,5 @@ maintainer-clean-local: atomic_math_noinline.c \ atomic_cmpset_noinline.c -distclean: +distclean-local: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/asm/atomic_barrier.c b/test/asm/atomic_barrier.c index 065e116a283..264951b45dd 100644 --- a/test/asm/atomic_barrier.c +++ b/test/asm/atomic_barrier.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,8 +26,6 @@ int main(int argc, char *argv[]) { -#if OPAL_HAVE_ATOMIC_MEM_BARRIER - /* there really isn't a great way to test that the barriers actually barrier, but at least make sure they don't kill the machine.*/ @@ -35,7 +35,4 @@ int main(int argc, char *argv[]) opal_atomic_wmb(); return 0; -#else - return 77; -#endif } diff --git a/test/asm/atomic_cmpset.c b/test/asm/atomic_cmpset.c index 8a948608bf0..9f6d84588da 100644 --- a/test/asm/atomic_cmpset.c +++ b/test/asm/atomic_cmpset.c @@ -187,42 +187,6 @@ int main(int argc, char *argv[]) assert(old128 == 42); #endif - /* -- cmpset int tests -- */ - - volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_compare_exchange_strong(&volint, &oldint, newint) == true); - opal_atomic_rmb(); - assert(volint == newint); - assert(oldint == 42); - - volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_compare_exchange_strong(&volint, &oldint, newint) == false); - opal_atomic_rmb(); - assert(volint == 42); - assert(oldint == 42); - - volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_compare_exchange_strong_acq(&volint, &oldint, newint) == true); - assert(volint == newint); - assert(oldint == 42); - - volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_compare_exchange_strong_acq(&volint, &oldint, newint) == false); - assert(volint == 42); - assert(oldint == 42); - - volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_compare_exchange_strong_rel(&volint, &oldint, newint) == true); - opal_atomic_rmb(); - assert(volint == newint); - assert(oldint == 42); - - volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_compare_exchange_strong_rel(&volint, &oldint, newint) == false); - opal_atomic_rmb(); - assert(volint == 42); - assert(oldint == 42); - /* -- cmpset ptr tests -- */ volptr = 42, oldptr = 42, newptr = 50; diff --git a/test/class/Makefile.am b/test/class/Makefile.am index 37392e6a01f..ee37245f909 100644 --- a/test/class/Makefile.am +++ b/test/class/Makefile.am @@ -97,5 +97,5 @@ opal_fifo_DEPENDENCIES = $(opal_fifo_LDADD) clean-local: rm -f opal_bitmap_test_out.txt opal_hash_table_test_out.txt opal_proc_table_test_out.txt -distclean: +distclean-local: rm -rf *.dSYM .deps .libs *.log *.txt *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/datatype/Makefile.am b/test/datatype/Makefile.am index 9d2584368e8..3d6fd3289b5 100644 --- a/test/datatype/Makefile.am +++ b/test/datatype/Makefile.am @@ -108,5 +108,5 @@ partial_LDADD = \ $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la -distclean: +distclean-local: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/datatype/checksum.c b/test/datatype/checksum.c index 4c8a60bf915..bda063f9580 100644 --- a/test/datatype/checksum.c +++ b/test/datatype/checksum.c @@ -151,7 +151,6 @@ int main(int argc, char *argv[]) free(packed); /* clean-ups all data allocations */ - ompi_datatype_finalize(); opal_finalize_util(); return 0; diff --git a/test/datatype/ddt_pack.c b/test/datatype/ddt_pack.c index 7d0f0461baf..59f88290ab3 100644 --- a/test/datatype/ddt_pack.c +++ b/test/datatype/ddt_pack.c @@ -500,7 +500,6 @@ int main(int argc, char *argv[]) ompi_datatype_destroy(&dup_type); cleanup: - ompi_datatype_finalize(); opal_finalize_util(); return ret; diff --git a/test/datatype/ddt_raw.c b/test/datatype/ddt_raw.c index 0dcc5e13a3b..769580ca0b5 100644 --- a/test/datatype/ddt_raw.c +++ b/test/datatype/ddt_raw.c @@ -342,7 +342,6 @@ int main(int argc, char *argv[]) assert(pdt1 == NULL); /* clean-ups all data allocations */ - ompi_datatype_finalize(); opal_finalize_util(); return OMPI_SUCCESS; diff --git a/test/datatype/ddt_test.c b/test/datatype/ddt_test.c index a61019cc4e7..214c47df3f6 100644 --- a/test/datatype/ddt_test.c +++ b/test/datatype/ddt_test.c @@ -579,7 +579,7 @@ int main(int argc, char *argv[]) assert(pdt2 == NULL); /* clean-ups all data allocations */ - ompi_datatype_finalize(); + opal_finalize_util(); return OMPI_SUCCESS; } diff --git a/test/datatype/external32.c b/test/datatype/external32.c index 397c5b5e21b..b96d000f20e 100644 --- a/test/datatype/external32.c +++ b/test/datatype/external32.c @@ -260,7 +260,7 @@ int main(int argc, char *argv[]) } } - ompi_datatype_finalize(); + opal_finalize_util(); return 0; } diff --git a/test/datatype/partial.c b/test/datatype/partial.c index bfb1c5d59dc..a4b537aa395 100644 --- a/test/datatype/partial.c +++ b/test/datatype/partial.c @@ -173,7 +173,6 @@ int main(int argc, char *argv[]) free(packed); /* clean-ups all data allocations */ - ompi_datatype_finalize(); opal_finalize_util(); return 0; diff --git a/test/datatype/position.c b/test/datatype/position.c index d4ec8ccab57..a4a9f212644 100644 --- a/test/datatype/position.c +++ b/test/datatype/position.c @@ -267,7 +267,6 @@ int main(int argc, char *argv[]) } free(segments); - ompi_datatype_finalize(); opal_finalize_util(); return (0 == errors ? 0 : -1); diff --git a/test/datatype/position_noncontig.c b/test/datatype/position_noncontig.c index 87412c2c773..e700906a6ee 100644 --- a/test/datatype/position_noncontig.c +++ b/test/datatype/position_noncontig.c @@ -235,7 +235,6 @@ int main(int argc, char *argv[]) } free(segments); - ompi_datatype_finalize(); opal_finalize_util(); return (0 == errors ? 0 : -1); diff --git a/test/datatype/reduce_local.c b/test/datatype/reduce_local.c index 1fbc12c1705..cc0063be2f9 100644 --- a/test/datatype/reduce_local.c +++ b/test/datatype/reduce_local.c @@ -161,7 +161,7 @@ do { \ for(int _r = repeats; _r > 0; _r--) { \ memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \ tstart = MPI_Wtime(); \ - MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT), (MPITYPE), (MPIOP)); \ + MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT)-_k, (MPITYPE), (MPIOP)); \ tend = MPI_Wtime(); \ duration[_k] += (tend - tstart); \ if( check ) { \ @@ -281,6 +281,7 @@ int main(int argc, char **argv) " -1 : (mis)alignment in elements for the first op\n" " -2 : (mis)alignment in elements for the result\n" " -v: increase the verbosity level\n" + " -f: turn off correctness checks\n" " -h: this help message\n", argv[0]); exit(0); diff --git a/test/event/Makefile.am b/test/event/Makefile.am index 7664bd24681..ffba4a0f224 100644 --- a/test/event/Makefile.am +++ b/test/event/Makefile.am @@ -44,5 +44,5 @@ event_test_LDADD = \ $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la event_test_DEPENDENCIES = $(event_test_LDADD) -distclean: +distclean-local: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/memchecker/Makefile.am b/test/memchecker/Makefile.am index 338d56c6ef7..b2cfd5575ad 100644 --- a/test/memchecker/Makefile.am +++ b/test/memchecker/Makefile.am @@ -60,5 +60,5 @@ non_blocking_recv_test_LDADD = \ $(top_builddir)/ompi/lib@OPAL_LIB_NAME@mpi.la non_blocking_recv_test_DEPENDENCIES = $(non_blocking_recv_test_LDADD) -distclean: +distclean-local: rm -rf *.dSYM .deps *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/monitoring/Makefile.am b/test/monitoring/Makefile.am index 9be8248ba0d..ed7b257d8e7 100644 --- a/test/monitoring/Makefile.am +++ b/test/monitoring/Makefile.am @@ -45,5 +45,5 @@ if PROJECT_OMPI $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la endif # PROJECT_OMPI -distclean: +distclean-local: rm -rf *.dSYM .deps .libs *.la *.lo monitoring_test test_pvar_access test_overhead check_monitoring example_reduce_count prof *.log *.o *.trs Makefile diff --git a/test/mpi/environment/Makefile.am b/test/mpi/environment/Makefile.am index 885c15b09d6..820856a3d51 100644 --- a/test/mpi/environment/Makefile.am +++ b/test/mpi/environment/Makefile.am @@ -29,5 +29,5 @@ chello_LDADD = \ chello_DEPENDENCIES = $(chello_LDADD) -distclean: +distclean-local: rm -rf *.dSYM .deps *.log *.o *.trs $(noinst_PROGRAMS) Makefile diff --git a/test/mpool/Makefile.am b/test/mpool/Makefile.am index 83fe60f2019..11ad7e19e76 100644 --- a/test/mpool/Makefile.am +++ b/test/mpool/Makefile.am @@ -16,6 +16,6 @@ mpool_memkind_SOURCES = mpool_memkind.c LDFLAGS = $(OPAL_PKG_CONFIG_LDFLAGS) LDADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la -distclean: +distclean-local: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/runtime/Makefile.am b/test/runtime/Makefile.am index a632cbb1d54..676bfc21de9 100644 --- a/test/runtime/Makefile.am +++ b/test/runtime/Makefile.am @@ -56,5 +56,5 @@ opal_init_finalize_LDADD = \ $(top_builddir)/test/support/libsupport.a opal_init_finalize_DEPENDENCIES = $(opal_init_finalize_LDADD) -distclean: +distclean-local: rm -rf *.dSYM .deps *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/spc/Makefile.am b/test/spc/Makefile.am index 420420f8dea..4652e3c4440 100644 --- a/test/spc/Makefile.am +++ b/test/spc/Makefile.am @@ -20,5 +20,5 @@ if PROJECT_OMPI $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la endif # PROJECT_OMPI -distclean: +distclean-local: rm -rf *.dSYM .deps .libs *.la *.lo spc_test prof *.log *.o *.trs Makefile diff --git a/test/support/Makefile.am b/test/support/Makefile.am index 85f3d3f8a1e..e8eadefc969 100644 --- a/test/support/Makefile.am +++ b/test/support/Makefile.am @@ -32,5 +32,5 @@ libsupport_a_SOURCES = \ support.c \ support.h -distclean: +distclean-local: rm -rf *.dSYM .deps *.log *.o *.trs $(check_LIBRARIES) Makefile diff --git a/test/threads/Makefile.am b/test/threads/Makefile.am index f0a9a57798e..b8d1923006d 100644 --- a/test/threads/Makefile.am +++ b/test/threads/Makefile.am @@ -28,9 +28,7 @@ check_PROGRAMS = \ opal_condition \ opal_atomic_thread_bench -# JMS possibly to be re-added when #1232 is fixed -#TESTS = $(check_PROGRAMS) -TESTS = +TESTS = $(check_PROGRAMS) opal_thread_SOURCES = opal_thread.c opal_thread_LDADD = \ @@ -50,5 +48,5 @@ opal_atomic_thread_bench_LDADD = \ $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la opal_atomic_thread_bench_DEPENDENCIES = $(opal_atomic_thread_bench_LDADD) -distclean: +distclean-local: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/threads/opal_condition.c b/test/threads/opal_condition.c index fb5e2a2a41c..78970632f10 100644 --- a/test/threads/opal_condition.c +++ b/test/threads/opal_condition.c @@ -40,38 +40,46 @@ static volatile int thr2_count = 0; static void *thr1_run(opal_object_t *obj) { - int i; clock_t c1, c2; - opal_mutex_lock(&mutex); c1 = clock(); - for (i = 0; i < TEST_COUNT; i++) { - opal_condition_wait(&thr1_cond, &mutex); - opal_condition_signal(&thr2_cond); + opal_mutex_lock(&mutex); + while (TEST_COUNT != thr1_count) { thr1_count++; + opal_condition_signal(&thr2_cond); + opal_condition_wait(&thr1_cond, &mutex); } - c2 = clock(); + + // Whoever gets here first needs to alert the other + // thread for their last iteration. + opal_condition_signal(&thr2_cond); opal_mutex_unlock(&mutex); - fprintf(stderr, "thr1: time per iteration: %ld usec\n", (long) ((c2 - c1) / TEST_COUNT)); + c2 = clock(); + fprintf(stderr, "thr1: time per iteration: %ld uses\n", (long)((c2 - c1) / TEST_COUNT)); return NULL; } static void *thr2_run(opal_object_t *obj) { - int i; clock_t c1, c2; - opal_mutex_lock(&mutex); c1 = clock(); - for (i = 0; i < TEST_COUNT; i++) { + opal_mutex_lock(&mutex); + while(TEST_COUNT != thr2_count) { + thr2_count++; opal_condition_signal(&thr1_cond); opal_condition_wait(&thr2_cond, &mutex); - thr2_count++; } - c2 = clock(); + + // Whoever gets here first needs to alert the other + // thread for the last iteration. + opal_condition_signal(&thr1_cond); opal_mutex_unlock(&mutex); - fprintf(stderr, "thr2: time per iteration: %ld usec\n", (long) ((c2 - c1) / TEST_COUNT)); + + c2 = clock(); + fprintf(stderr, "thr2: time per iteration: %ld usec\n", (long)((c2 - c1) / TEST_COUNT)); return NULL; } + int main(int argc, char **argv) { int rc; diff --git a/test/util/Makefile.am b/test/util/Makefile.am index bfee3bea200..f6186f3a912 100644 --- a/test/util/Makefile.am +++ b/test/util/Makefile.am @@ -131,6 +131,6 @@ bipartite_graph_DEPENDENCIES = $(bipartite_graph_LDADD) clean-local: rm -f test_session_dir_out test-file opal_path_nfs.out -distclean: +distclean-local: rm -rf *.dSYM .deps .libs *.out *.log *.o *.trs $(check_PROGRAMS) Makefile