Skip to content

Commit

Permalink
Merge pull request #2310 from edwardhartnett/ejh_pnetcdf
Browse files Browse the repository at this point in the history
fix for inq_dimlen() bug with unlimited dimensions on parallel I/O builds
  • Loading branch information
WardF authored Apr 26, 2022
2 parents ec89d88 + 5400018 commit 8714b51
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 12 deletions.
18 changes: 11 additions & 7 deletions libhdf5/hdf5dim.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,20 +161,24 @@ HDF5_inq_dim(int ncid, int dimid, char *name, size_t *lenp)
{
if (dim->unlimited)
{
*lenp = 0;

#ifndef USE_PARALLEL
/* Shortcut for non-parallel operation: if the dim->len is
* non-zero, it will be set to the correct size. */
if (dim->len)
*lenp = dim->len;
#endif

/* Since this is an unlimited dimension, go to the file
and see how many records there are. Take the max number
of records from all the vars that share this
dimension. */
*lenp = 0;
if (dim->len == 0) {
if (*lenp == 0)
{
if ((ret = nc4_find_dim_len(dim_grp, dimid, &lenp)))
return ret;
if (h5->no_write == NC_TRUE) {
dim->len = *lenp;
}
}
else {
*lenp = dim->len;
}
}
else
Expand Down
26 changes: 22 additions & 4 deletions libhdf5/hdf5internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ find_var_dim_max_length(NC_GRP_INFO_T *grp, int varid, int dimid,

*maxlen = 0;

LOG((3, "find_var_dim_max_length varid %d dimid %d", varid, dimid));

/* Find this var. */
var = (NC_VAR_INFO_T*)ncindexith(grp->vars,varid);
if (!var) return NC_ENOTVAR;
Expand Down Expand Up @@ -157,11 +159,27 @@ find_var_dim_max_length(NC_GRP_INFO_T *grp, int varid, int dimid,
BAIL(NC_EHDFERR);
LOG((5, "find_var_dim_max_length: varid %d len %d max: %d",
varid, (int)h5dimlen[0], (int)h5dimlenmax[0]));
for (d=0; d<dataset_ndims; d++) {
if (var->dimids[d] == dimid) {
for (d=0; d<dataset_ndims; d++)
if (var->dimids[d] == dimid)
*maxlen = *maxlen > h5dimlen[d] ? *maxlen : h5dimlen[d];
}
}

#ifdef USE_PARALLEL
/* If we are doing parallel I/O in collective mode (with
* either pnetcdf or HDF5), then communicate with all
* other tasks in the collective and find out which has
* the max value for the dimension size. */
assert(grp->nc4_info);
LOG((3, "before Allreduce *maxlen %ld grp->nc4_info->parallel %d var->parallel_access %d",
*maxlen, grp->nc4_info->parallel, var->parallel_access));
if (grp->nc4_info->parallel && var->parallel_access == NC_COLLECTIVE)
{
if ((MPI_SUCCESS != MPI_Allreduce(MPI_IN_PLACE, maxlen, 1,
MPI_UNSIGNED_LONG_LONG, MPI_MAX,
grp->nc4_info->comm)))
BAIL(NC_EMPI);
LOG((3, "after Allreduce *maxlen %ld", *maxlen));
}
#endif /* USE_PARALLEL */
}
}

Expand Down
3 changes: 2 additions & 1 deletion nc_test4/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ endif # BUILD_UTILITIES
if TEST_PARALLEL4
check_PROGRAMS += tst_mpi_parallel tst_parallel tst_parallel3 \
tst_parallel4 tst_parallel5 tst_nc4perf tst_mode tst_simplerw_coll_r \
tst_mode tst_parallel_zlib tst_parallel_compress tst_quantize_par
tst_mode tst_parallel_zlib tst_parallel_compress tst_quantize_par \
tst_parallel6
TESTS += run_par_test.sh
endif # TEST_PARALLEL4

Expand Down
4 changes: 4 additions & 0 deletions nc_test4/run_par_test.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,7 @@ echo
echo "Parallel I/O test for quantize feature."
@MPIEXEC@ -n 4 ./tst_quantize_par

echo
echo "Parallel I/O test contributed by wkliao from pnetcdf."
@MPIEXEC@ -n 4 ./tst_parallel6

74 changes: 74 additions & 0 deletions nc_test4/tst_parallel6.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/* Copyright 2022, UCAR/Unidata See COPYRIGHT file for copying and
* redistribution conditions.
*
* This parallel I/O test checks the behavior of nc_inq_dimlen() after
* parallel I/O writes.
*
* This program taken from a PNetCDF issue:
* https://github.com/Parallel-NetCDF/PnetCDF/issues/72, thanks
* wkliao!
*
* wkliao, Ed Hartnett, 4/11/22
*/

#include <nc_tests.h>
#include "err_macros.h"
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <netcdf.h>
#include <netcdf_par.h>

#define FILENAME "tst_parallel6.nc"

int main(int argc, char** argv)
{
int err = NC_NOERR, rank, nprocs;
int ncid, varid, dimid;
size_t start[1], count[1], nrecs;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

if (!rank)
printf("\n*** Testing parallel I/O.\n");

if (!rank)
printf("*** testing record lenth with multiple processes writing records...");

/* nc_set_log_level(4); */
if (nc_create_par(FILENAME, NC_CLOBBER | NC_NETCDF4, MPI_COMM_WORLD,
MPI_INFO_NULL, &ncid)) ERR;

if (nc_def_dim(ncid, "time", NC_UNLIMITED, &dimid)) ERR;
if (nc_def_var(ncid, "var", NC_INT, 1, &dimid, &varid)) ERR;
if (nc_var_par_access(ncid, varid, NC_COLLECTIVE)) ERR;
if (nc_enddef(ncid)) ERR;

start[0] = rank;
count[0] = 1;
if (nc_put_vara_int(ncid, varid, start, count, &rank)) ERR;
if (nc_inq_dimlen(ncid, dimid, &nrecs)) ERR;
if (nc_close(ncid)) ERR;
/* nc_set_log_level(-1); */

if (nrecs != nprocs)
{
printf("Rank %d error at line %d of file %s:\n",rank,__LINE__,__FILE__);
printf("\tafter writing start=%zd count=%zd\n", start[0], count[0]);
printf("\texpecting number of records = %d but got %ld\n",
nprocs, nrecs);
ERR;
}

if (!rank)
SUMMARIZE_ERR;

MPI_Finalize();

if (!rank)
FINAL_RESULTS;

return 0;
}

0 comments on commit 8714b51

Please sign in to comment.