Skip to content

Commit 8714b51

Browse files
authored
Merge pull request #2310 from edwardhartnett/ejh_pnetcdf
fix for inq_dimlen() bug with unlimited dimensions on parallel I/O builds
2 parents ec89d88 + 5400018 commit 8714b51

File tree

5 files changed

+113
-12
lines changed

5 files changed

+113
-12
lines changed

libhdf5/hdf5dim.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -161,20 +161,24 @@ HDF5_inq_dim(int ncid, int dimid, char *name, size_t *lenp)
161161
{
162162
if (dim->unlimited)
163163
{
164+
*lenp = 0;
165+
166+
#ifndef USE_PARALLEL
167+
/* Shortcut for non-parallel operation: if the dim->len is
168+
* non-zero, it will be set to the correct size. */
169+
if (dim->len)
170+
*lenp = dim->len;
171+
#endif
172+
164173
/* Since this is an unlimited dimension, go to the file
165174
and see how many records there are. Take the max number
166175
of records from all the vars that share this
167176
dimension. */
168-
*lenp = 0;
169-
if (dim->len == 0) {
177+
if (*lenp == 0)
178+
{
170179
if ((ret = nc4_find_dim_len(dim_grp, dimid, &lenp)))
171180
return ret;
172-
if (h5->no_write == NC_TRUE) {
173181
dim->len = *lenp;
174-
}
175-
}
176-
else {
177-
*lenp = dim->len;
178182
}
179183
}
180184
else

libhdf5/hdf5internal.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ find_var_dim_max_length(NC_GRP_INFO_T *grp, int varid, int dimid,
117117

118118
*maxlen = 0;
119119

120+
LOG((3, "find_var_dim_max_length varid %d dimid %d", varid, dimid));
121+
120122
/* Find this var. */
121123
var = (NC_VAR_INFO_T*)ncindexith(grp->vars,varid);
122124
if (!var) return NC_ENOTVAR;
@@ -157,11 +159,27 @@ find_var_dim_max_length(NC_GRP_INFO_T *grp, int varid, int dimid,
157159
BAIL(NC_EHDFERR);
158160
LOG((5, "find_var_dim_max_length: varid %d len %d max: %d",
159161
varid, (int)h5dimlen[0], (int)h5dimlenmax[0]));
160-
for (d=0; d<dataset_ndims; d++) {
161-
if (var->dimids[d] == dimid) {
162+
for (d=0; d<dataset_ndims; d++)
163+
if (var->dimids[d] == dimid)
162164
*maxlen = *maxlen > h5dimlen[d] ? *maxlen : h5dimlen[d];
163-
}
164-
}
165+
166+
#ifdef USE_PARALLEL
167+
/* If we are doing parallel I/O in collective mode (with
168+
* either pnetcdf or HDF5), then communicate with all
169+
* other tasks in the collective and find out which has
170+
* the max value for the dimension size. */
171+
assert(grp->nc4_info);
172+
LOG((3, "before Allreduce *maxlen %ld grp->nc4_info->parallel %d var->parallel_access %d",
173+
*maxlen, grp->nc4_info->parallel, var->parallel_access));
174+
if (grp->nc4_info->parallel && var->parallel_access == NC_COLLECTIVE)
175+
{
176+
if ((MPI_SUCCESS != MPI_Allreduce(MPI_IN_PLACE, maxlen, 1,
177+
MPI_UNSIGNED_LONG_LONG, MPI_MAX,
178+
grp->nc4_info->comm)))
179+
BAIL(NC_EMPI);
180+
LOG((3, "after Allreduce *maxlen %ld", *maxlen));
181+
}
182+
#endif /* USE_PARALLEL */
165183
}
166184
}
167185

nc_test4/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ endif # BUILD_UTILITIES
9393
if TEST_PARALLEL4
9494
check_PROGRAMS += tst_mpi_parallel tst_parallel tst_parallel3 \
9595
tst_parallel4 tst_parallel5 tst_nc4perf tst_mode tst_simplerw_coll_r \
96-
tst_mode tst_parallel_zlib tst_parallel_compress tst_quantize_par
96+
tst_mode tst_parallel_zlib tst_parallel_compress tst_quantize_par \
97+
tst_parallel6
9798
TESTS += run_par_test.sh
9899
endif # TEST_PARALLEL4
99100

nc_test4/run_par_test.sh.in

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,7 @@ echo
6565
echo "Parallel I/O test for quantize feature."
6666
@MPIEXEC@ -n 4 ./tst_quantize_par
6767

68+
echo
69+
echo "Parallel I/O test contributed by wkliao from pnetcdf."
70+
@MPIEXEC@ -n 4 ./tst_parallel6
71+

nc_test4/tst_parallel6.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/* Copyright 2022, UCAR/Unidata See COPYRIGHT file for copying and
2+
* redistribution conditions.
3+
*
4+
* This parallel I/O test checks the behavior of nc_inq_dimlen() after
5+
* parallel I/O writes.
6+
*
7+
* This program taken from a PNetCDF issue:
8+
* https://github.com/Parallel-NetCDF/PnetCDF/issues/72, thanks
9+
* wkliao!
10+
*
11+
* wkliao, Ed Hartnett, 4/11/22
12+
*/
13+
14+
#include <nc_tests.h>
15+
#include "err_macros.h"
16+
#include <stdio.h>
17+
#include <stdlib.h>
18+
#include <mpi.h>
19+
#include <netcdf.h>
20+
#include <netcdf_par.h>
21+
22+
#define FILENAME "tst_parallel6.nc"
23+
24+
int main(int argc, char** argv)
25+
{
26+
int err = NC_NOERR, rank, nprocs;
27+
int ncid, varid, dimid;
28+
size_t start[1], count[1], nrecs;
29+
30+
MPI_Init(&argc, &argv);
31+
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
32+
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
33+
34+
if (!rank)
35+
printf("\n*** Testing parallel I/O.\n");
36+
37+
if (!rank)
38+
printf("*** testing record lenth with multiple processes writing records...");
39+
40+
/* nc_set_log_level(4); */
41+
if (nc_create_par(FILENAME, NC_CLOBBER | NC_NETCDF4, MPI_COMM_WORLD,
42+
MPI_INFO_NULL, &ncid)) ERR;
43+
44+
if (nc_def_dim(ncid, "time", NC_UNLIMITED, &dimid)) ERR;
45+
if (nc_def_var(ncid, "var", NC_INT, 1, &dimid, &varid)) ERR;
46+
if (nc_var_par_access(ncid, varid, NC_COLLECTIVE)) ERR;
47+
if (nc_enddef(ncid)) ERR;
48+
49+
start[0] = rank;
50+
count[0] = 1;
51+
if (nc_put_vara_int(ncid, varid, start, count, &rank)) ERR;
52+
if (nc_inq_dimlen(ncid, dimid, &nrecs)) ERR;
53+
if (nc_close(ncid)) ERR;
54+
/* nc_set_log_level(-1); */
55+
56+
if (nrecs != nprocs)
57+
{
58+
printf("Rank %d error at line %d of file %s:\n",rank,__LINE__,__FILE__);
59+
printf("\tafter writing start=%zd count=%zd\n", start[0], count[0]);
60+
printf("\texpecting number of records = %d but got %ld\n",
61+
nprocs, nrecs);
62+
ERR;
63+
}
64+
65+
if (!rank)
66+
SUMMARIZE_ERR;
67+
68+
MPI_Finalize();
69+
70+
if (!rank)
71+
FINAL_RESULTS;
72+
73+
return 0;
74+
}

0 commit comments

Comments
 (0)