Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize worker behavior for object store writes #650

Merged
Merged
2 changes: 1 addition & 1 deletion data/serialized_dataset_examples/ngen-output-1.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"type" : "OBJECT_STORE",
"data_domain" : {
"continuous" : [],
"data_format" : "NGEN_OUTPUT",
"data_format" : "NGEN_CSV_OUTPUT",
"discrete" : [
{
"values" : [
Expand Down
1 change: 1 addition & 0 deletions docker/main/ngen/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,7 @@ USER ${USER}
COPY --chown=${USER} noah_owp_parameters /dmod/bmi_module_data/noah_owp/parameters/
COPY --chown=${USER} ngen_entrypoint.sh ${WORKDIR}/entrypoint.sh
COPY --chown=${USER} funcs.sh ${WORKDIR}
COPY --chown=${USER} --chmod=744 py_funcs.py /dmod/bin/py_funcs

ENV HYDRA_PROXY_RETRY_COUNT=5

Expand Down
2 changes: 1 addition & 1 deletion docker/main/ngen/funcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -212,4 +212,4 @@ run_secondary_mpi_ssh_worker_node()
while [ -e ${RUN_SENTINEL} ] && kill -s 0 "${_SSH_D_PID}" 2>/dev/null ; do
sleep 5
done
}
}
61 changes: 42 additions & 19 deletions docker/main/ngen/ngen_cal_entrypoint.sh
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to @christophertubbs's above comment about using python for example. While out of scope for this PR, we should really consider rewriting this entry point and related helper functions in something other than POSIX compliant shell.

Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,65 @@
while [ ${#} -gt 0 ]; do
case "${1}" in
--config-dataset)
CONFIG_DATASET_NAME="${2:?}"
declare -x CONFIG_DATASET_NAME="${2:?}"
shift
;;
--host-string)
MPI_HOST_STRING="${2:?}"
declare -x MPI_HOST_STRING="${2:?}"
shift
;;
--hydrofabric-dataset)
HYDROFABRIC_DATASET_NAME="${2:?}"
declare -x HYDROFABRIC_DATASET_NAME="${2:?}"
shift
;;
--job-id)
JOB_ID="${2:?}"
declare -x JOB_ID="${2:?}"
shift
;;
--node-count)
MPI_NODE_COUNT="${2:?}"
declare -x MPI_NODE_COUNT="${2:?}"
shift
;;
--output-dataset)
OUTPUT_DATASET_NAME="${2:?}"
declare -x OUTPUT_DATASET_NAME="${2:?}"
shift
;;
--partition-dataset)
PARTITION_DATASET_NAME="${2:?}"
declare -x PARTITION_DATASET_NAME="${2:?}"
shift
;;
--worker-index)
WORKER_INDEX="${2:?}"
declare -x WORKER_INDEX="${2:?}"
shift
;;
--calibration-config-file)
CALIBRATION_CONFIG_BASENAME="${2:?}"
declare -x CALIBRATION_CONFIG_BASENAME="${2:?}"
shift
;;
esac
shift
done

# TODO: (later) in both ngen and ngen-cal entrypoints, add controls for whether this is temp dir or output dataset dir
declare -x JOB_OUTPUT_WRITE_DIR="/tmp/job_output"

# Get some universally applicable functions and constants
source ./funcs.sh

ngen_sanity_checks_and_derived_init
init_script_mpi_vars
init_ngen_executable_paths

# Move to the output dataset mounted directory
cd ${OUTPUT_DATASET_DIR:?Output dataset directory not defined}
# Move to the output write directory
# TODO: (later) in both ngen and ngen-cal entrypoints, control whether this is needed, based on if write dir is output dataset dir
#cd ${OUTPUT_DATASET_DIR:?Output dataset directory not defined}
mkdir ${JOB_OUTPUT_WRITE_DIR:?}
chown ${MPI_USER}:${MPI_USER} ${JOB_OUTPUT_WRITE_DIR}
cd ${JOB_OUTPUT_WRITE_DIR}
#Needed for routing
if [ ! -e /dmod/datasets/linked_job_output ]; then
ln -s ${JOB_OUTPUT_WRITE_DIR} /dmod/datasets/linked_job_output
fi

start_calibration() {
# Start ngen calibration
Expand All @@ -61,24 +72,31 @@ start_calibration() {
echo "$(print_date) Starting ngen calibration with serial ngen execution"
fi

# Find and use copy of config in output dataset
# Find calibration config, then copy to output dataset and use that
if [ -n "${CALIBRATION_CONFIG_BASENAME:-}" ]; then
CALIBRATION_CONFIG_FILE=$(find ${OUTPUT_DATASET_DIR:?} -type f -name "${CALIBRATION_CONFIG_BASENAME}" -maxdepth 1 | head -1)
#CALIBRATION_CONFIG_FILE=$(find ${OUTPUT_DATASET_DIR:?} -type f -name "${CALIBRATION_CONFIG_BASENAME}" -maxdepth 1 | head -1)
_ORIG_CAL_CONFIG_FILE=$(find ${CONFIG_DATASET_DIR:?} -type f -name "${CALIBRATION_CONFIG_BASENAME}" -maxdepth 1 | head -1)
else
CALIBRATION_CONFIG_FILE=$(find ${OUTPUT_DATASET_DIR:?} -type f -iname "*.yaml" -o -iname "*.yml" -maxdepth 1 | head -1)
#CALIBRATION_CONFIG_FILE=$(find ${OUTPUT_DATASET_DIR:?} -type f -iname "*.yaml" -o -iname "*.yml" -maxdepth 1 | head -1)
_ORIG_CAL_CONFIG_FILE=$(find ${CONFIG_DATASET_DIR:?} -type f -iname "*.yaml" -o -iname "*.yml" -maxdepth 1 | head -1)
fi

if [ -z "${CALIBRATION_CONFIG_FILE}" ]; then
if [ -z "${_ORIG_CAL_CONFIG_FILE}" ]; then
echo "Error: NGEN calibration yaml file not found" 2>&1
exit 1
fi
cp -a ${_ORIG_CAL_CONFIG_FILE:?} ${OUTPUT_DATASET_DIR:?}/.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Marking this as related to #407. I am not sure where we should document the expectations / spec for a worker image's data requirements, but this feels like a prime candidate for that sort of documentation. Just want to track this b.c. it will affect how we need to normalize paths.

CALIBRATION_CONFIG_FILE="${OUTPUT_DATASET_DIR:?}/$(basename ${_ORIG_CAL_CONFIG_FILE})"

python3 -m ngen.cal "${CALIBRATION_CONFIG_FILE}"

#Capture the return value to use as service exit code
NGEN_RETURN=$?

echo "$(print_date) ngen calibration finished with return value: ${NGEN_RETURN}"

# TODO: (later) make sure outputs are handled properly, and that eventually we support toggling whether written to
# TODO: output dataset dir directly or somewhere else

# Exit with the model's exit code
return ${NGEN_RETURN}
}
Expand All @@ -89,11 +107,16 @@ if [ "${WORKER_INDEX:-0}" = "0" ]; then
# This will only have an effect when running with multiple MPI nodes, so its safe to have even in serial exec
trap close_remote_workers EXIT
# Have "main" (potentially only) worker copy config files to output dataset for record keeping
# TODO: perform copy of configs to output dataset outside of image (in service) for better performance
cp -a ${CONFIG_DATASET_DIR:?Config dataset directory not defined}/. ${OUTPUT_DATASET_DIR:?}
# TODO: (later) in ngen and ngen-cal entrypoints, consider adding controls for whether this is done or a simpler
# TODO: 'cp' call, based on whether we write directly to output dataset dir or some other output write dir
# Do a dry run first to sanity check directory and fail if needed before backgrounding process
py_funcs tar_and_copy --dry-run --compress ${CONFIG_DATASET_DIR:?Config dataset directory not defined} config_dataset.tgz ${OUTPUT_DATASET_DIR:?}
# Then actually run the archive and copy function in the background
py_funcs tar_and_copy --compress ${CONFIG_DATASET_DIR:?} config_dataset.tgz ${OUTPUT_DATASET_DIR:?} &
_CONFIG_COPY_PROC=$!
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we wait this after start_calibration?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am mostly kicking that can down the road, but practically I don't think we will have to. But this image is going to need extensive test regardless, and we can sort out any issues related to this then.

# If there is partitioning, which implies multi-processing job ...
if [ -n "${PARTITION_DATASET_DIR:-}" ]; then
# Include partition config dataset too if appropriate
# TODO: perform copy of configs to output dataset outside of image (in service) for better performance
cp -a ${PARTITION_DATASET_DIR}/. ${OUTPUT_DATASET_DIR:?}
fi

Expand Down
53 changes: 46 additions & 7 deletions docker/main/ngen/ngen_entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,25 @@ while [ ${#} -gt 0 ]; do
shift
done

# TODO: (later) in both ngen and ngen-cal entrypoints, add controls for whether this is temp dir or output dataset dir
declare -x JOB_OUTPUT_WRITE_DIR="/tmp/job_output"

# Get some universally applicable functions and constants
source /ngen/funcs.sh

ngen_sanity_checks_and_derived_init
init_script_mpi_vars
init_ngen_executable_paths

# Move to the output dataset mounted directory
cd ${OUTPUT_DATASET_DIR:?Output dataset directory not defined}
# Move to the output write directory
# TODO: (later) in both ngen and ngen-cal entrypoints, control whether this is needed, based on if write dir is output dataset dir
#cd ${OUTPUT_DATASET_DIR:?Output dataset directory not defined}
mkdir ${JOB_OUTPUT_WRITE_DIR:?}
chown ${MPI_USER}:${MPI_USER} ${JOB_OUTPUT_WRITE_DIR}
cd ${JOB_OUTPUT_WRITE_DIR}
#Needed for routing
if [ ! -e /dmod/datasets/linked_job_output ]; then
ln -s $(pwd) /dmod/datasets/linked_job_output
ln -s ${JOB_OUTPUT_WRITE_DIR} /dmod/datasets/linked_job_output
fi

# We can allow worker index to not be supplied when executing serially
Expand All @@ -59,15 +66,47 @@ if [ "${WORKER_INDEX:-0}" = "0" ]; then
# This will only have an effect when running with multiple MPI nodes, so its safe to have even in serial exec
trap close_remote_workers EXIT
# Have "main" (potentially only) worker copy config files to output dataset for record keeping
# TODO: perform copy of configs to output dataset outside of image (in service) for better performance
cp -a ${CONFIG_DATASET_DIR:?Config dataset directory not defined}/. ${OUTPUT_DATASET_DIR:?}
# TODO: (later) in ngen and ngen-cal entrypoints, consider adding controls for whether this is done or a simpler
# TODO: 'cp' call, based on whether we write directly to output dataset dir or some other output write dir
# Do a dry run first to sanity check directory and fail if needed before backgrounding process
py_funcs tar_and_copy --dry-run --compress ${CONFIG_DATASET_DIR:?Config dataset directory not defined} config_dataset.tgz ${OUTPUT_DATASET_DIR:?}
# Then actually run the archive and copy function in the background
py_funcs tar_and_copy --compress ${CONFIG_DATASET_DIR:?} config_dataset.tgz ${OUTPUT_DATASET_DIR:?} &
_CONFIG_COPY_PROC=$!
# If there is partitioning, which implies multi-processing job ...
if [ -n "${PARTITION_DATASET_DIR:-}" ]; then
# Include partition config dataset too if appropriate
# TODO: perform copy of configs to output dataset outside of image (in service) for better performance
# Include partition config dataset too if appropriate, though for simplicity, just copy directly
cp -a ${PARTITION_DATASET_DIR}/. ${OUTPUT_DATASET_DIR:?}
# Then run execution
exec_main_worker_ngen_run

# TODO: (later) in ngen and ngen-cal entrypoints, add controls for whether this is done base on whether we
# TODO: are writing directly to output dataset dir or some other output write dir; this will be
# TODO: important once netcdf output works
# Then gather output from all worker hosts
py_funcs gather_output ${MPI_HOST_STRING:?} ${JOB_OUTPUT_WRITE_DIR:?}
# Then wait at this point (if necessary) for our background config copy to avoid taxing things
echo "$(print_date) Waiting for compression and copying of configuration files to output dataset"
wait ${_CONFIG_COPY_PROC}
echo "$(print_date) Compression/copying of config data to output dataset complete"
echo "$(print_date) Copying results to output dataset"
py_funcs move_job_output --job_id ${JOB_ID:?} ${JOB_OUTPUT_WRITE_DIR} to_directory ${OUTPUT_DATASET_DIR:?}
echo "$(print_date) Results copied to output dataset"
# Otherwise, we just have a serial job ...
else
# Execute it first
exec_serial_ngen_run

# TODO: (later) in ngen and ngen-cal entrypoints, add controls for whether this is done base on whether we
# TODO: are writing directly to output dataset dir or some other output write dir; this will be
# TODO: important once netcdf output works
echo "$(print_date) Waiting for compression and copying of configuration files to output dataset"
wait ${_CONFIG_COPY_PROC}
echo "$(print_date) Compression/copying of config data to output dataset complete"

echo "$(print_date) Copying results to output dataset"
py_funcs move_job_output --job_id ${JOB_ID:?} ${JOB_OUTPUT_WRITE_DIR} to_directory ${OUTPUT_DATASET_DIR:?}
echo "$(print_date) Results copied to output dataset"
fi
else
# Start SSHD on the main worker if have an MPI job
Expand Down
Loading
Loading