From 99f16010cb36e245f005fda93d8b029ad3f21203 Mon Sep 17 00:00:00 2001 From: Rob Hanna - NOAA <90854818+RobHanna-NOAA@users.noreply.github.com> Date: Fri, 23 Dec 2022 14:03:40 -0600 Subject: [PATCH] Make GMS HAND post-processing independent --- docs/CHANGELOG.md | 54 +++++- gms_pipeline.sh | 52 ++--- gms_run_branch.sh | 159 +++------------ gms_run_post_processing.sh | 182 ++++++++++++++++++ gms_run_unit.sh | 75 ++------ src/bash_functions.env | 10 + src/filter_catchments_and_add_attributes.py | 22 +-- src/gms/delineate_hydros_and_produce_HAND.sh | 4 +- src/gms/derive_level_paths.py | 56 +++--- src/gms/run_by_unit.sh | 19 +- src/split_flows.py | 25 +-- src/usgs_gage_unit_setup.py | 9 +- unit_tests/gms/derive_level_paths_params.json | 6 +- .../gms/derive_level_paths_unittests.py | 94 +-------- 14 files changed, 354 insertions(+), 413 deletions(-) create mode 100755 gms_run_post_processing.sh diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 401a91b75..90abdebc9 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,6 +1,56 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.0.16.0 - 2022-12-20 - [PR #768](https://github.com/NOAA-OWP/inundation-mapping/pull/768) + +`gms_run_branch.sh` was processing all of the branches iteratively, then continuing on to a large post processing portion of code. That has now be split to two files, one for branch iteration and the other file for just post processing. + +Other minor changes include: +- Removing the system where a user could override `DropStreamOrders` where they could process streams with stream orders 1 and 2 independently like other GMS branches. This option is now removed, so it will only allow stream orders 3 and higher as gms branches and SO 1 and 2 will always be in branch zero. + +- The `retry` flag on the three gms*.sh files has been removed. It did not work correctly and was not being used. Usage of it would have created unreliable results. + +### Additions + +- `gms_run_post_processing.sh` + - handles all tasks from after `gms_run_branch.sh` to this file, except for output cleanup, which stayed in `gms_run_branch.sh`. + - Can be run completely independent from `gms_run_unit.sh` or gms_run_branch.sh` as long as all of the files are in place. And can be re-run if desired. + +### Changes + +- `gms_pipeline.sh` + - Remove "retry" system. + - Remove "dropLowStreamOrders" system. + - Updated for newer reusable output date/time/duration system. + - Add call to new `gms_run_post_processing.sh` file. + +- `gms_run_branch.sh` + - Remove "retry" system. + - Remove "dropLowStreamOrders" system. + - Updated for newer reusable output date/time/duration system. + - Removed most code from below the branch iterator to the new `gms_run_post_processing.sh` file. However, it did keep the branch files output cleanup and non-zero exit code checking. + +- `gms_run_unit.sh` + - Remove "retry" system. + - Remove "dropLowStreamOrders" system. + - Updated for newer reusable output date/time/duration system. + +- `src` + - `bash_functions.env`: Added a new method to make it easier / simpler to calculation and display duration time. + - `filter_catchments_and_add_attributes.py`: Remove "dropLowStreamOrders" system. + - `split_flows.py`: Remove "dropLowStreamOrders" system. + - `usgs_gage_unit_setup.py`: Remove "dropLowStreamOrders" system. + +- `gms` + - `delineate_hydros_and_produced_HAND.sh` : Remove "dropLowStreamOrders" system. + - `derive_level_paths.py`: Remove "dropLowStreamOrders" system and some small style updates. + - `run_by_unit.sh`: Remove "dropLowStreamOrders" system. + +- `unit_tests/gms` + - `derive_level_paths_params.json` and `derive_level_paths_unittests.py`: Remove "dropLowStreamOrders" system. + +

+ ## v4.0.15.0 - 2022-12-20 - [PR #758](https://github.com/NOAA-OWP/inundation-mapping/pull/758) This merge addresses feedback received from field users regarding CatFIM. Users wanted a Stage-Based version of CatFIM, they wanted maps created for multiple intervals between flood categories, and they wanted documentation as to why many sites are absent from the Stage-Based CatFIM service. This merge seeks to address this feedback. CatFIM will continue to evolve with more feedback over time. @@ -80,13 +130,11 @@ Fixes inundation of nodata areas of REM. - `tools/inundation.py`: Assigns depth a value of `0` if REM is less than `0` - - ## v4.0.13.1 - 2022-12-09 - [PR #743](https://github.com/NOAA-OWP/inundation-mapping/pull/743) This merge adds the tools required to generate Alpha metrics by hydroid. It summarizes the Apha metrics by branch 0 catchment for use in the Hydrovis "FIM Performance" service. -## Additions +### Additions - `pixel_counter.py`: A script to perform zonal statistics against raster data and geometries - `pixel_counter_functions.py`: Supporting functions diff --git a/gms_pipeline.sh b/gms_pipeline.sh index 4aa9c3216..2907ce04e 100755 --- a/gms_pipeline.sh +++ b/gms_pipeline.sh @@ -4,10 +4,11 @@ usage () { echo echo 'Produce GMS hydrofabric datasets for unit and branch scale.' - echo 'Usage : gms_pipeline.sh [REQ: -u - -n ]' - echo ' [OPT: -h -c -j ] -o -r' - echo ' -ud -bd ' - echo ' -zd -a ]' + echo 'Usage : gms_pipeline.sh [REQ: -u -n ]' + echo ' [OPT: -h -c -j ] -o' + echo ' -ud ' + echo ' -bd ' + echo ' -zd ]' echo '' echo 'REQUIRED:' echo ' -u/--hucList : HUC8s to run or multiple passed in quotes (space delimited) file.' @@ -38,10 +39,6 @@ usage () echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time.' echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' echo ' -o/--overwrite : overwrite outputs if already exist' - echo ' -r/--retry : retries failed jobs' - echo ' -a/--UseAllStreamOrders : If this flag is included, the system will INCLUDE stream orders 1 and 2' - echo ' at the initial load of the nwm_subset_streams.' - echo ' Default (if arg not added) is false and stream orders 1 and 2 will be dropped' echo exit } @@ -74,10 +71,6 @@ in -o|--overwrite) overwrite=1 ;; - -r|--retry) - retry="--retry-failed" - overwrite=1 - ;; -ud|--unitDenylist) shift deny_unit_list=$1 @@ -90,9 +83,6 @@ in shift deny_branch_zero_list=$1 ;; - -a|--useAllStreamOrders) - useAllStreamOrders=1 - ;; *) ;; esac shift @@ -120,10 +110,6 @@ then # default is false (0) overwrite=0 fi -if [ -z "$retry" ] -then - retry="" -fi # The tests for the deny lists are duplicated here on to help catch # them earlier (ie.. don't have to wait to process units to find an @@ -154,18 +140,6 @@ then usage fi -# invert useAllStreamOrders boolean (to make it historically compatiable -# with other files like gms/run_unit.sh and gms/run_branch.sh). -# Yet help user understand that the inclusion of the -a flag means -# to include the stream order (and not get mixed up with older versions -# where -s mean drop stream orders) -# This will encourage leaving stream orders 1 and 2 out. -if [ "$useAllStreamOrders" == "1" ]; then - export dropLowStreamOrders=0 -else - export dropLowStreamOrders=1 -fi - ## SOURCE ENV FILE AND FUNCTIONS ## source $envFile source $srcDir/bash_functions.env @@ -202,8 +176,7 @@ run_cmd+=" -c $envFile" run_cmd+=" -j $jobLimit" if [ $overwrite -eq 1 ]; then run_cmd+=" -o" ; fi -if [ "$retry" == "--retry-failed" ]; then run_cmd+=" -r" ; fi -if [ $dropLowStreamOrders -eq 1 ]; then run_cmd+=" -s" ; fi + #echo "$run_cmd" . /foss_fim/gms_run_unit.sh -u "$hucList" $run_cmd -ud "$deny_unit_list" -zd "$deny_branch_zero_list" @@ -217,18 +190,19 @@ if [ $dropLowStreamOrders -eq 1 ]; then run_cmd+=" -s" ; fi # if this has too many errors, it will return a sys.exit code (like 62 as per fim_enums) # and we will stop the rest of the process. We have to catch stnerr as well. +# This stops the run from continuing to run, drastically filing drive and killing disk space. python3 $srcDir/check_unit_errors.py -f $outputRunDataDir -n $num_hucs - ## Produce level path or branch level datasets . /foss_fim/gms_run_branch.sh $run_cmd -bd "$deny_branches_list" -zd "$deny_branch_zero_list" +## continue on to post processing +. /foss_fim/gms_run_post_processing.sh $run_cmd + +echo echo "======================== End of gms_pipeline.sh ==========================" -pipeline_end_time=`date +%s` -total_sec=$(expr $pipeline_end_time - $pipeline_start_time) -dur_min=$((total_sec / 60)) -dur_remainder_sec=$((total_sec % 60)) -echo "Total Run Time = $dur_min min(s) and $dur_remainder_sec sec" +date -u +Calc_Duration $pipeline_start_time echo diff --git a/gms_run_branch.sh b/gms_run_branch.sh index c313bcc4f..c8029af9b 100755 --- a/gms_run_branch.sh +++ b/gms_run_branch.sh @@ -4,19 +4,19 @@ usage () { echo 'Produce GMS hydrofabric at levelpath/branch scale. Execute gms_run_unit.sh prior to.' echo 'Usage : gms_run_branch.sh [REQ: -n ]' - echo ' [OPT: -h -j -o -r ' - echo ' -bd -zd ' - echo ' -u -a ]' + echo ' [OPT: -h -u -c -j ] -o' + echo ' -bd ' + echo ' -zd ]' echo '' echo 'REQUIRED:' echo ' -n/--runName : A name to tag the output directories and log files as. could be a version tag.' echo '' echo 'OPTIONS:' echo ' -h/--help : help file' - echo ' -c/--config : configuration file with bash environment variables to export' - echo ' default (if arg not added) : /foss_fim/config/params_template.env' echo ' -u/--hucList : HUC8s to run or multiple passed in quotes (space delimited).' echo ' A line delimited file also acceptable. HUCs must present in inputs directory.' + echo ' -c/--config : configuration file with bash environment variables to export' + echo ' default (if arg not added) : /foss_fim/config/params_template.env' echo ' -bd/--branchDenylist : A file with a line delimited list of files in BRANCHES directories to be removed' echo ' upon completion of branch processing.' echo ' (see config/deny_gms_branches_prod.lst for a starting point)' @@ -31,11 +31,7 @@ usage () echo ' use the word NONE as this value for this parameter.' echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time. 1 outputs' echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' - echo ' -r/--retry : retries failed jobs' echo ' -o/--overwrite : overwrite outputs if already exist' - echo ' -a/--UseAllStreamOrders : If this flag is included, the system will INCLUDE stream orders 1 and 2' - echo ' at the initial load of the nwm_subset_streams.' - echo ' Default (if arg not added) is false and stream orders 1 and 2 will be dropped' echo exit } @@ -66,10 +62,6 @@ in -o|--overwrite) overwrite=1 ;; - -r|--retry) - retry="--retry-failed" - overwrite=1 - ;; -bd|--branchDenylist) shift deny_branches_list=$1 @@ -78,9 +70,6 @@ in shift deny_branch_zero_list_for_branches=$1 ;; - -a|--useAllStreamOrders) - useAllStreamOrders=1 - ;; *) ;; esac shift @@ -133,22 +122,6 @@ if [ "$overwrite" = "" ] then overwrite=0 fi -if [ -z "$retry" ] -then - retry="" -fi - -# invert useAllStreamOrders boolean (to make it historically compatiable -# with other files like gms/run_unit.sh and gms/run_branch.sh). -# Yet help user understand that the inclusion of the -a flag means -# to include the stream order (and not get mixed up with older versions -# where -s mean drop stream orders) -# This will encourage leaving stream orders 1 and 2 out. -if [ "$useAllStreamOrders" == "1" ]; then - export dropLowStreamOrders=0 -else - export dropLowStreamOrders=1 -fi ## SOURCE ENV FILE AND FUNCTIONS ## source $envFile @@ -163,12 +136,11 @@ fi export outputRunDataDir=$outputDataDir/$runName export deny_branches_list=$deny_branches_list logFile=$outputRunDataDir/logs/branch/summary_gms_branch.log -export extent=GMS export overwrite=$overwrite +export extent=GMS - -## Check for run data directory ## -if [ ! -d "$outputRunDataDir" ]; then +## Check for run data directory and the file. If gms_run_unit failed, the file will not be there ## +if [ ! -f "$outputRunDataDir/gms_inputs.csv" ]; then echo "Depends on output from gms_run_unit.sh. Please produce data with gms_run_unit.sh first." exit 1 fi @@ -181,11 +153,6 @@ else gms_inputs=$outputRunDataDir/gms_inputs_filtered.csv fi -# Echo intent to retry -if [ "$retry" = "--retry-failed" ]; then - echo "Retrying failed unit level jobs for $runName" -fi - # make log dir if [ ! -d "$outputRunDataDir/logs/branch" ]; then mkdir -p $outputRunDataDir/logs/branch @@ -195,6 +162,9 @@ elif [ $overwrite -eq 1 ]; then mkdir -p $outputRunDataDir/logs/branch fi +# Note: Other parts of the program will check for the existance of the file +# /branch_errors/non_zero_exit_codes.log. It has to be removed no matter +# what on each run of gms_run_branch if [ ! -d "$outputRunDataDir/branch_errors" ]; then mkdir -p "$outputRunDataDir/branch_errors" elif [ $overwrite -eq 1 ]; then @@ -202,97 +172,25 @@ elif [ $overwrite -eq 1 ]; then mkdir -p $outputRunDataDir/branch_errors fi -## Track total time of the overall run -T_total_start - ## RUN GMS BY BRANCH ## -echo "==========================================================================" -echo "Start of branch processing" -echo "Started: `date -u`" +echo +echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echo "---- Start of branch processing" +echo "---- Started: `date -u`" +T_total_start Tstart +all_branches_start_time=`date +%s` if [ "$jobLimit" -eq 1 ]; then - parallel $retry --verbose --timeout $branch_timeout --lb -j $jobLimit --joblog $logFile --colsep ',' -- $srcDir/gms/time_and_tee_run_by_branch.sh :::: $gms_inputs + parallel --verbose --timeout $branch_timeout --lb -j $jobLimit --joblog $logFile --colsep ',' -- $srcDir/gms/time_and_tee_run_by_branch.sh :::: $gms_inputs else - parallel $retry --eta --timeout $branch_timeout -j $jobLimit --joblog $logFile --colsep ',' -- $srcDir/gms/time_and_tee_run_by_branch.sh :::: $gms_inputs + parallel --eta --timeout $branch_timeout -j $jobLimit --joblog $logFile --colsep ',' -- $srcDir/gms/time_and_tee_run_by_branch.sh :::: $gms_inputs fi echo "Branch processing is complete" Tcount date -u -## RUN AGGREGATE BRANCH ELEV TABLES ## -# TODO: How do we skip aggregation if there is a branch error -# maybe against the non_zero logs above -echo -echo "Processing usgs gage aggregation" -python3 $srcDir/usgs_gage_aggregate.py -fim $outputRunDataDir -gms $gms_inputs - -## RUN SYNTHETIC RATING CURVE BANKFULL ESTIMATION ROUTINE ## -if [ "$src_bankfull_toggle" = "True" ]; then - echo -e $startDiv"Estimating bankfull stage in SRCs"$stopDiv - # Run SRC bankfull estimation routine routine - Tstart - time python3 /foss_fim/src/identify_src_bankfull.py -fim_dir $outputRunDataDir -flows $bankfull_flows_file -j $jobLimit - Tcount -fi - -## RUN SYNTHETIC RATING SUBDIVISION ROUTINE ## -if [ "$src_subdiv_toggle" = "True" ]; then - echo -e $startDiv"Performing SRC channel/overbank subdivision routine"$stopDiv - # Run SRC Subdivision & Variable Roughness routine - Tstart - time python3 /foss_fim/src/subdiv_chan_obank_src.py -fim_dir $outputRunDataDir -mann $vmann_input_file -j $jobLimit - Tcount -fi - -## CONNECT TO CALIBRATION POSTGRESQL DATABASE (OPTIONAL) ## -if [ "$src_adjust_spatial" = "True" ]; then - if [ ! -f $CALB_DB_KEYS_FILE ]; then - echo "ERROR! - the src_adjust_spatial parameter in the params_template.env (or equiv) is set to "True" (see parameter file), but the provided calibration database access keys file does not exist: $CALB_DB_KEYS_FILE" - exit 1 - else - source $CALB_DB_KEYS_FILE - : ' - This makes the local variables from the calb_db_keys files - into global variables that can be used in other files, including python. - - Why not just leave the word export in front of each of the keys in the - calb_db_keys.env? Becuase that file is used against docker-compose - when we start up that part of the sytem and it does not like the word - export. - ' - export CALIBRATION_DB_HOST=$CALIBRATION_DB_HOST - export CALIBRATION_DB_NAME=$CALIBRATION_DB_NAME - export CALIBRATION_DB_USER_NAME=$CALIBRATION_DB_USER_NAME - export CALIBRATION_DB_PASS=$CALIBRATION_DB_PASS - echo "Populate PostgrSQL database with benchmark FIM extent points and HUC attributes (the calibration database)" - echo "Loading HUC Data" - time ogr2ogr -overwrite -nln hucs -a_srs ESRI:102039 -f PostgreSQL PG:"host=$CALIBRATION_DB_HOST dbname=$CALIBRATION_DB_NAME user=$CALIBRATION_DB_USER_NAME password=$CALIBRATION_DB_PASS" $inputDataDir/wbd/WBD_National.gpkg WBDHU8 - echo "Loading Point Data" - time ogr2ogr -overwrite -f PostgreSQL PG:"host=$CALIBRATION_DB_HOST dbname=$CALIBRATION_DB_NAME user=$CALIBRATION_DB_USER_NAME password=$CALIBRATION_DB_PASS" $fim_obs_pnt_data usgs_nws_benchmark_points -nln points - fi -fi - -## RUN SYNTHETIC RATING CURVE CALIBRATION W/ USGS GAGE RATING CURVES ## -if [ "$src_adjust_usgs" = "True" ] && [ "$src_subdiv_toggle" = "True" ]; then - Tstart - echo -e $startDiv"Performing SRC adjustments using USGS rating curve database"$stopDiv - # Run SRC Optimization routine using USGS rating curve data (WSE and flow @ NWM recur flow thresholds) - python3 $srcDir/src_adjust_usgs_rating.py -run_dir $outputRunDataDir -usgs_rc $inputDataDir/usgs_gages/usgs_rating_curves.csv -nwm_recur $nwm_recur_file -j $jobLimit - Tcount - date -u -fi - -## RUN SYNTHETIC RATING CURVE CALIBRATION W/ BENCHMARK POINT DATABASE (POSTGRESQL) ## -if [ "$src_adjust_spatial" = "True" ] && [ "$src_subdiv_toggle" = "True" ]; then - Tstart - echo -e $startDiv"Performing SRC adjustments using benchmark point database"$stopDiv - python3 $srcDir/src_adjust_spatial_obs.py -fim_dir $outputRunDataDir -j $jobLimit - Tcount - date -u -fi - # ------------------- ## REMOVE FILES FROM DENY LIST FOR BRANCH ZERO (but using normal branch deny) ## ## but also do not remove if branch zero deny is NONE (any case) @@ -317,9 +215,13 @@ else $srcDir/gms/outputs_cleanup.py -d $outputRunDataDir -l $deny_branches_list -b 0 fi + # ------------------- ## GET NON ZERO EXIT CODES ## # Needed in case aggregation fails, we will need the logs +# Note: Other parts of the program (gms_run_post_processing.sh) check to see +# if the branch_errors/non_zero_exit_codes.log exists. If it does not, it assumes +# that gms_run_branch did not complete (or was not run) echo echo -e $startDiv"Start non-zero exit code checking"$stopDiv find $outputRunDataDir/logs/branch -name "*_branch_*.log" -type f | xargs grep -E "Exit status: ([1-9][0-9]{0,2})" >"$outputRunDataDir/branch_errors/non_zero_exit_codes.log" & @@ -335,15 +237,8 @@ Tcount date -u echo -echo -e $startDiv"Combining crosswalk tables"$stopDiv -# aggregate outputs -Tstart -python3 /foss_fim/tools/gms_tools/combine_crosswalk_tables.py -d $outputRunDataDir -o $outputRunDataDir/crosswalk_table.csv -Tcount -date -u - -echo "==========================================================================" -echo "GMS_run_branch complete" -Tcount -echo "Ended: `date -u`" +echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echo "---- gms_run_branch complete" +echo "---- Ended: `date -u`" +Calc_Duration $all_branches_start_time echo diff --git a/gms_run_post_processing.sh b/gms_run_post_processing.sh new file mode 100755 index 000000000..8397d44b2 --- /dev/null +++ b/gms_run_post_processing.sh @@ -0,0 +1,182 @@ +#!/bin/bash -e +: +usage () +{ + echo 'Produce GMS hydrofabric at levelpath/branch scale. Execute gms_run_unit.sh and gms_run_branch prior to.' + echo 'Usage : gms_run_post_processing.sh [REQ: -n ]' + echo ' [OPT: -h -c -j ]' + echo '' + echo 'REQUIRED:' + echo ' -n/--runName : A name to tag the output directories and log files as. could be a version tag.' + echo '' + echo 'OPTIONS:' + echo ' -h/--help : help file' + echo ' -c/--config : configuration file with bash environment variables to export' + echo ' default (if arg not added) : /foss_fim/config/params_template.env' + echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time. 1 outputs' + echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' + echo + exit +} + +while [ "$1" != "" ]; do +case $1 +in + -c|--configFile ) + shift + envFile=$1 + ;; + -n|--runName) + shift + runName=$1 + ;; + -j|--jobLimit) + shift + jobLimit=$1 + ;; + -h|--help) + shift + usage + ;; + *) ;; + esac + shift +done + +# print usage if arguments empty +if [ "$runName" = "" ] +then + echo "ERROR: Missing -n run time name argument" + usage +fi + +if [ "$envFile" = "" ] +then + envFile=/foss_fim/config/params_template.env +fi + +## SOURCE ENV FILE AND FUNCTIONS ## +source $envFile +source $srcDir/bash_functions.env + +# default values +if [ "$jobLimit" = "" ] ; then + jobLimit=$default_max_jobs +fi + +## Define Outputs Data Dir & Log File## +export outputRunDataDir=$outputDataDir/$runName +export extent=GMS + +## Check for run data directory ## +if [ ! -d "$outputRunDataDir" ]; then + echo "Depends on output from gms_run_unit.sh. Please produce data with gms_run_unit.sh first." + exit 1 +fi + +## Check to ensure gms_run_branch completed ## +if [ ! -f "$outputRunDataDir/branch_errors/non_zero_exit_codes.log" ]; then + echo "Depends on output from gms_run_branch.sh. Please run gms_run_branch.sh or check if it failed." + exit 1 +fi + +# Clean out the other post processing files before starting +rm -rdf $outputRunDataDir/logs/src_optimization +rm -f $outputRunDataDir/logs/log_bankfull_indentify.log +rm -f $outputRunDataDir/logs/subdiv_src_.log + +gms_inputs=$outputRunDataDir/gms_inputs.csv + +echo +echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echo "---- Start of gms_run_post_processing" +echo "---- Started: `date -u`" +T_total_start +post_proc_start_time=`date +%s` + +## RUN AGGREGATE BRANCH ELEV TABLES ## +# TODO: How do we skip aggregation if there is a branch error +# maybe against the non_zero logs above +echo +echo "Processing usgs gage aggregation" +python3 $srcDir/usgs_gage_aggregate.py -fim $outputRunDataDir -gms $gms_inputs + +## RUN SYNTHETIC RATING CURVE BANKFULL ESTIMATION ROUTINE ## +if [ "$src_bankfull_toggle" = "True" ]; then + echo -e $startDiv"Estimating bankfull stage in SRCs"$stopDiv + # Run SRC bankfull estimation routine routine + Tstart + time python3 /foss_fim/src/identify_src_bankfull.py -fim_dir $outputRunDataDir -flows $bankfull_flows_file -j $jobLimit + Tcount +fi + +## RUN SYNTHETIC RATING SUBDIVISION ROUTINE ## +if [ "$src_subdiv_toggle" = "True" ]; then + echo -e $startDiv"Performing SRC channel/overbank subdivision routine"$stopDiv + # Run SRC Subdivision & Variable Roughness routine + Tstart + time python3 /foss_fim/src/subdiv_chan_obank_src.py -fim_dir $outputRunDataDir -mann $vmann_input_file -j $jobLimit + Tcount +fi + +## CONNECT TO CALIBRATION POSTGRESQL DATABASE (OPTIONAL) ## +if [ "$src_adjust_spatial" = "True" ]; then + if [ ! -f $CALB_DB_KEYS_FILE ]; then + echo "ERROR! - the src_adjust_spatial parameter in the params_template.env (or equiv) is set to "True" (see parameter file), but the provided calibration database access keys file does not exist: $CALB_DB_KEYS_FILE" + exit 1 + else + source $CALB_DB_KEYS_FILE + : ' + This makes the local variables from the calb_db_keys files + into global variables that can be used in other files, including python. + + Why not just leave the word export in front of each of the keys in the + calb_db_keys.env? Becuase that file is used against docker-compose + when we start up that part of the sytem and it does not like the word + export. + ' + export CALIBRATION_DB_HOST=$CALIBRATION_DB_HOST + export CALIBRATION_DB_NAME=$CALIBRATION_DB_NAME + export CALIBRATION_DB_USER_NAME=$CALIBRATION_DB_USER_NAME + export CALIBRATION_DB_PASS=$CALIBRATION_DB_PASS + echo "Populate PostgrSQL database with benchmark FIM extent points and HUC attributes (the calibration database)" + echo "Loading HUC Data" + time ogr2ogr -overwrite -nln hucs -a_srs ESRI:102039 -f PostgreSQL PG:"host=$CALIBRATION_DB_HOST dbname=$CALIBRATION_DB_NAME user=$CALIBRATION_DB_USER_NAME password=$CALIBRATION_DB_PASS" $inputDataDir/wbd/WBD_National.gpkg WBDHU8 + echo "Loading Point Data" + time ogr2ogr -overwrite -f PostgreSQL PG:"host=$CALIBRATION_DB_HOST dbname=$CALIBRATION_DB_NAME user=$CALIBRATION_DB_USER_NAME password=$CALIBRATION_DB_PASS" $fim_obs_pnt_data usgs_nws_benchmark_points -nln points + fi +fi + +## RUN SYNTHETIC RATING CURVE CALIBRATION W/ USGS GAGE RATING CURVES ## +if [ "$src_adjust_usgs" = "True" ] && [ "$src_subdiv_toggle" = "True" ]; then + Tstart + echo -e $startDiv"Performing SRC adjustments using USGS rating curve database"$stopDiv + # Run SRC Optimization routine using USGS rating curve data (WSE and flow @ NWM recur flow thresholds) + python3 $srcDir/src_adjust_usgs_rating.py -run_dir $outputRunDataDir -usgs_rc $inputDataDir/usgs_gages/usgs_rating_curves.csv -nwm_recur $nwm_recur_file -j $jobLimit + Tcount + date -u +fi + +## RUN SYNTHETIC RATING CURVE CALIBRATION W/ BENCHMARK POINT DATABASE (POSTGRESQL) ## +if [ "$src_adjust_spatial" = "True" ] && [ "$src_subdiv_toggle" = "True" ]; then + Tstart + echo -e $startDiv"Performing SRC adjustments using benchmark point database"$stopDiv + python3 $srcDir/src_adjust_spatial_obs.py -fim_dir $outputRunDataDir -j $jobLimit + Tcount + date -u +fi + +echo +echo -e $startDiv"Combining crosswalk tables"$stopDiv +# aggregate outputs +Tstart +python3 /foss_fim/tools/gms_tools/combine_crosswalk_tables.py -d $outputRunDataDir -o $outputRunDataDir/crosswalk_table.csv +Tcount +date -u + +echo +echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echo "---- gms_run_post_processing complete" +echo "---- Ended: `date -u`" +Calc_Duration $post_proc_start_time +echo diff --git a/gms_run_unit.sh b/gms_run_unit.sh index c64030e5c..bc2361e01 100755 --- a/gms_run_unit.sh +++ b/gms_run_unit.sh @@ -4,9 +4,9 @@ usage () { echo 'Produce GMS hydrofabric datasets for unit scale.' echo 'Usage : gms_run_unit.sh [REQ: -u -n ]' - echo ' [OPT: -h -j ] -c ' - echo ' -o -r -ud ' - echo ' -zd -a ]' + echo ' [OPT: -h -c -j -o' + echo ' -ud ' + echo ' -zd ]' echo '' echo 'REQUIRED:' echo ' -u/--hucList : HUC8s to run or multiple passed in quotes (space delimited) file.' @@ -31,10 +31,6 @@ usage () echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time.' echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' echo ' -o/--overwrite : overwrite outputs if already exist' - echo ' -r/--retry : retries failed jobs' - echo ' -a/--UseAllStreamOrders : If this flag is included, the system will INCLUDE stream orders 1 and 2' - echo ' at the initial load of the nwm_subset_streams.' - echo ' Default (if arg not added) is false and stream orders 1 and 2 will be dropped' echo exit } @@ -65,10 +61,6 @@ in -o|--overwrite) overwrite=1 ;; - -r|--retry) - retry="--retry-failed" - overwrite=1 - ;; -ud|--unitDenylist) shift deny_unit_list=$1 @@ -77,9 +69,6 @@ in shift deny_branch_zero_list_for_units=$1 ;; - -a|--useAllStreamOrders) - useAllStreamOrders=1 - ;; *) ;; esac shift @@ -128,22 +117,6 @@ if [ -z "$overwrite" ] then overwrite=0 fi -if [ -z "$retry" ] -then - retry="" -fi - -# invert useAllStreamOrders boolean (to make it historically compatiable -# with other files like gms/run_unit.sh and gms/run_branch.sh). -# Yet help user understand that the inclusion of the -a flag means -# to include the stream order (and not get mixed up with older versions -# where -s mean drop stream orders) -# This will encourage leaving stream orders 1 and 2 out. -if [ "$useAllStreamOrders" == "1" ]; then - export dropLowStreamOrders=0 -else - export dropLowStreamOrders=1 -fi ## SOURCE ENV FILE AND FUNCTIONS ## source $envFile @@ -167,7 +140,6 @@ fi ## Set misc global variables export overwrite=$overwrite -export dropLowStreamOrders=$dropLowStreamOrders ## Define inputs export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg @@ -183,15 +155,9 @@ export extent=GMS export deny_unit_list=$deny_unit_list export deny_branch_zero_list_for_units=$deny_branch_zero_list_for_units - # we are not using the variable output at this time, but keep it anways num_hucs=$(python3 $srcDir/check_huc_inputs.py -u $hucList) -## Make output and data directories ## -if [ "$retry" = "--retry-failed" ]; then - echo "Retrying failed unit level jobs for $runName" -fi - # make dirs if [ ! -d $outputRunDataDir ]; then mkdir -p $outputRunDataDir @@ -202,39 +168,37 @@ rm -rdf $outputRunDataDir/logs rm -rdf $outputRunDataDir/branch_errors rm -rdf $outputRunDataDir/unit_errors -# we need to clean out the all log files overwrite or not +# we need to clean out the all log files and some other files overwrite or not mkdir -p $outputRunDataDir/logs/unit mkdir -p $outputRunDataDir/unit_errors +rm -f $outputRunDataDir/gms_inputs* # copy over config file cp -a $envFile $outputRunDataDir -## RUN GMS BY BRANCH ## -echo "==========================================================================" -echo "Start of unit processing" -echo "Started: `date -u`" - -## Track total time of the overall run -T_total_start -Tstart +## RUN GMS BY UNIT ## +echo +echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echo "---- Start of gms_run_unit" +echo "---- Started: `date -u`" +all_units_start_time=`date +%s` ## GMS BY UNIT## if [ -f "$hucList" ]; then if [ "$jobLimit" -eq 1 ]; then - parallel $retry --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh :::: $hucList + parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh :::: $hucList else - parallel $retry --eta -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh :::: $hucList + parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh :::: $hucList fi else if [ "$jobLimit" -eq 1 ]; then - parallel $retry --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh ::: $hucList + parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh ::: $hucList else - parallel $retry --eta -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh ::: $hucList + parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh ::: $hucList fi fi echo "Unit (HUC) processing is complete" -Tcount date -u ## GET NON ZERO EXIT CODES ## @@ -246,8 +210,9 @@ find $outputRunDataDir/logs/ -name "*_unit.log" -type f | xargs grep -E "Exit st echo -e $startDiv"Start branch aggregation"$stopDiv python3 $srcDir/gms/aggregate_branch_lists.py -d $outputRunDataDir -f "gms_inputs.csv" -l $hucList -echo "==========================================================================" -echo "gms_run_unit processing is complete" -Tcount -date -u +echo +echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echo "---- gms_run_unit is complete" +echo "---- Ended: `date -u`" +Calc_Duration $all_units_start_time echo \ No newline at end of file diff --git a/src/bash_functions.env b/src/bash_functions.env index 17eb08274..f5975995a 100644 --- a/src/bash_functions.env +++ b/src/bash_functions.env @@ -21,6 +21,16 @@ Tcount () { echo "Cumulative Time = $dur_min min(s) and $dur_remainder_sec sec" } +Calc_Duration() { + start_time=$1 + end_time=`date +%s` + + total_sec=$(expr $end_time - $start_time) + dur_min=$((total_sec / 60)) + dur_remainder_sec=$((total_sec % 60)) + echo "Duration = $dur_min min(s) and $dur_remainder_sec sec" +} + export -f T_total_start export -f Tstart export -f Tcount diff --git a/src/filter_catchments_and_add_attributes.py b/src/filter_catchments_and_add_attributes.py index 00c11c9ce..7801b613d 100755 --- a/src/filter_catchments_and_add_attributes.py +++ b/src/filter_catchments_and_add_attributes.py @@ -15,8 +15,7 @@ def filter_catchments_and_add_attributes(input_catchments_filename, output_catchments_filename, output_flows_filename, wbd_filename, - huc_code, - drop_stream_orders=False): + huc_code): input_catchments = gpd.read_file(input_catchments_filename) wbd = gpd.read_file(wbd_filename) @@ -56,21 +55,13 @@ def filter_catchments_and_add_attributes(input_catchments_filename, output_catchments.to_file(output_catchments_filename, driver="GPKG",index=False) output_flows.to_file(output_flows_filename, driver="GPKG", index=False) except ValueError: - if (drop_stream_orders): - # this is not an exception, but a custom exit code that can be trapped - print("There are no flowlines in the HUC after stream order filtering.") - sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back - else: - # if we are not dropping stream orders, then something is wrong - raise Exception("There are no flowlines in the HUC.") - else: - if (drop_stream_orders): # this is not an exception, but a custom exit code that can be trapped print("There are no flowlines in the HUC after stream order filtering.") sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back - else: - # if we are not dropping stream orders, then something is wrong - raise Exception("There are no flowlines in the HUC.") + else: + # this is not an exception, but a custom exit code that can be trapped + print("There are no flowlines in the HUC after stream order filtering.") + sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back if __name__ == '__main__': @@ -83,8 +74,7 @@ def filter_catchments_and_add_attributes(input_catchments_filename, parser.add_argument('-o', '--output-flows-filename', help='output-flows-filename', required=True) parser.add_argument('-w', '--wbd-filename', help='wbd-filename', required=True) parser.add_argument('-u', '--huc-code', help='huc-code', required=True) - parser.add_argument('-s', '--drop-stream-orders', help='Drop stream orders 1 and 2', type=int, required=False, default=False) - + # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) diff --git a/src/gms/delineate_hydros_and_produce_HAND.sh b/src/gms/delineate_hydros_and_produce_HAND.sh index 96e9ae5bd..1d3759a67 100755 --- a/src/gms/delineate_hydros_and_produce_HAND.sh +++ b/src/gms/delineate_hydros_and_produce_HAND.sh @@ -75,7 +75,7 @@ Tcount echo -e $startDiv"Split Derived Reaches $hucNumber $current_branch_id"$stopDiv date -u Tstart -$srcDir/split_flows.py -f $outputCurrentBranchDataDir/demDerived_reaches_$current_branch_id.shp -d $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -s $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -p $outputCurrentBranchDataDir/demDerived_reaches_split_points_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -l $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_$current_branch_id.gpkg -ds $dropLowStreamOrders +$srcDir/split_flows.py -f $outputCurrentBranchDataDir/demDerived_reaches_$current_branch_id.shp -d $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -s $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -p $outputCurrentBranchDataDir/demDerived_reaches_split_points_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -l $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_$current_branch_id.gpkg Tcount ## GAGE WATERSHED FOR REACHES ## @@ -145,7 +145,7 @@ Tcount echo -e $startDiv"Process catchments and model streams $hucNumber $current_branch_id"$stopDiv date -u Tstart -python3 -m memory_profiler $srcDir/filter_catchments_and_add_attributes.py -i $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -c $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -o $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -u $hucNumber -s $dropLowStreamOrders +python3 -m memory_profiler $srcDir/filter_catchments_and_add_attributes.py -i $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -c $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -o $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -u $hucNumber Tcount ## RASTERIZE NEW CATCHMENTS AGAIN ## diff --git a/src/gms/derive_level_paths.py b/src/gms/derive_level_paths.py index 90971d55c..aaf2eeb19 100755 --- a/src/gms/derive_level_paths.py +++ b/src/gms/derive_level_paths.py @@ -13,8 +13,7 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut catchments_outfile=None, branch_inlets_outfile=None, toNode_attribute='To_Node', fromNode_attribute='From_Node', - reach_id_attribute='HydroID', verbose=False, - drop_low_stream_orders=False ): + reach_id_attribute='HydroID', verbose=False): if verbose: @@ -37,15 +36,15 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut print("Sorry, no branches exist and processing can not continue. This could be an empty file.") sys.exit(FIM_exit_codes.GMS_UNIT_NO_BRANCHES.value) # will send a 60 back - if (drop_low_stream_orders): - stream_network = stream_network.exclude_attribute_values(branch_id_attribute="order_", - values_excluded=[1,2] - ) + # values_exluded of 1 and 2 mean where are dropping stream orders 1 and 2. We are leaving those + # for branch zero. + stream_network = stream_network.exclude_attribute_values(branch_id_attribute="order_", + values_excluded=[1,2] ) - # if there are no reaches at this point (due to filtering) - if (len(stream_network) == 0): - print("No branches exist but branch zero processing will continue. This could be due to stream order filtering.") - return + # if there are no reaches at this point (due to filtering) + if (len(stream_network) == 0): + print("No branches exist but branch zero processing will continue. This could be due to stream order filtering.") + return inlets_attribute = 'inlet_id' outlets_attribute = 'outlet_id' @@ -66,19 +65,18 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut stream_network = stream_network.derive_outlets(toNode_attribute, fromNode_attribute, outlets_attribute=outlets_attribute, - verbose=verbose - ) + verbose=verbose) + stream_network = stream_network.derive_inlets(toNode_attribute, fromNode_attribute, inlets_attribute=inlets_attribute, verbose=verbose - ) # derive up and downstream networks + ) # derive up and downstream networks upstreams, downstreams = stream_network.make_up_and_downstream_dictionaries( reach_id_attribute=reach_id_attribute, toNode_attribute=toNode_attribute, fromNode_attribute=fromNode_attribute, - verbose=True - ) + verbose=True) # derive arbolate sum stream_network = stream_network.get_arbolate_sum(arbolate_sum_attribute='arbolate_sum', @@ -87,8 +85,7 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut upstreams=upstreams, downstreams=downstreams, length_conversion_factor_to_km = 0.001, - verbose=verbose - ) + verbose=verbose) # derive stream branches stream_network = stream_network.derive_stream_branches(toNode_attribute=toNode_attribute, @@ -98,8 +95,7 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut reach_id_attribute=reach_id_attribute, comparison_attributes=['arbolate_sum', 'order_'], comparison_function=max, - verbose=verbose - ) + verbose=verbose) # filter out streams without catchments if (catchments is not None) & (catchments_outfile is not None): @@ -110,18 +106,15 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut reach_id_attribute=reach_id_attribute, branch_id_attribute=branch_id_attribute, reach_id_attribute_in_catchments=reach_id_attribute, - verbose=verbose - ) + verbose=verbose) # subset which columns to merge stream_network_to_merge = stream_network.filter(items = [reach_id_attribute,inlets_attribute, - outlets_attribute,branch_id_attribute] - ) + outlets_attribute,branch_id_attribute]) catchments = catchments.merge(stream_network_to_merge,how='inner', left_on=reach_id_attribute, - right_on=reach_id_attribute - ) + right_on=reach_id_attribute) catchments.reset_index(drop=True, inplace=True) @@ -132,8 +125,7 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut headwaters = stream_network.derive_headwater_points_with_inlets( fromNode_attribute=fromNode_attribute, inlets_attribute=inlets_attribute, - outlet_linestring_index=outlet_linestring_index - ) + outlet_linestring_index=outlet_linestring_index) # headwaters write headwaters.to_file(headwaters_outfile, index=False, driver='GPKG') @@ -144,8 +136,7 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut if out_stream_network_dissolved is not None: stream_network = stream_network.trim_branches_in_waterbodies(branch_id_attribute=branch_id_attribute, - verbose=verbose - ) + verbose=verbose) # dissolve by levelpath stream_network = stream_network.dissolve_by_branch(branch_id_attribute=branch_id_attribute, @@ -156,13 +147,11 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut stream_network = stream_network.remove_branches_in_waterbodies(waterbodies=waterbodies, out_vector_files=out_stream_network_dissolved, - verbose=False - ) + verbose=False) if branch_inlets_outfile is not None: branch_inlets = stream_network.derive_inlet_points_by_feature(feature_attribute=branch_id_attribute, - outlet_linestring_index=outlet_linestring_index - ) + outlet_linestring_index=outlet_linestring_index) branch_inlets.to_file(branch_inlets_outfile, index=False, driver='GPKG') @@ -185,7 +174,6 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut parser.add_argument('-e','--headwaters-outfile', help='Output stream network headwater points', required=False, default=None) parser.add_argument('-d','--out-stream-network-dissolved', help='Dissolved output stream network', required=False, default=None) parser.add_argument('-v','--verbose', help='Verbose output', required=False, default=False, action='store_true') - parser.add_argument('-s','--drop-low-stream-orders', help='Drop stream orders 1 and 2', type=int, required=False, default=False) args = vars(parser.parse_args()) diff --git a/src/gms/run_by_unit.sh b/src/gms/run_by_unit.sh index c20e81d13..38a70d6ac 100755 --- a/src/gms/run_by_unit.sh +++ b/src/gms/run_by_unit.sh @@ -116,7 +116,7 @@ Tcount echo -e $startDiv"Generating Level Paths for $hucNumber"$stopDiv date -u Tstart -$srcDir/gms/derive_level_paths.py -i $outputHucDataDir/nwm_subset_streams.gpkg -b $branch_id_attribute -r "ID" -o $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -e $outputHucDataDir/nwm_headwaters.gpkg -c $outputHucDataDir/nwm_catchments_proj_subset.gpkg -t $outputHucDataDir/nwm_catchments_proj_subset_levelPaths.gpkg -n $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg -v -s $dropLowStreamOrders -w $outputHucDataDir/nwm_lakes_proj_subset.gpkg +$srcDir/gms/derive_level_paths.py -i $outputHucDataDir/nwm_subset_streams.gpkg -b $branch_id_attribute -r "ID" -o $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -e $outputHucDataDir/nwm_headwaters.gpkg -c $outputHucDataDir/nwm_catchments_proj_subset.gpkg -t $outputHucDataDir/nwm_catchments_proj_subset_levelPaths.gpkg -n $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg -v -w $outputHucDataDir/nwm_lakes_proj_subset.gpkg # test if we received a non-zero code back from derive_level_paths.py @@ -136,12 +136,7 @@ Tcount echo -e $startDiv"Create file of branch ids for $hucNumber"$stopDiv date -u Tstart -if [ $dropLowStreamOrders != 0 ]; then # only add branch zero to branch list if low stream orders are dropped - $srcDir/gms/generate_branch_list.py -o $outputHucDataDir/branch_id.lst -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute -z $branch_zero_id -else - $srcDir/gms/generate_branch_list.py -o $outputHucDataDir/branch_id.lst -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute -fi - +$srcDir/gms/generate_branch_list.py -o $outputHucDataDir/branch_id.lst -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute -z $branch_zero_id Tcount ## CREATE BRANCH ZERO ## @@ -243,18 +238,16 @@ export xmax=$xmax export ymax=$ymax export ncols=$ncols export nrows=$nrows -if [ $dropLowStreamOrders != 0 ]; then # only produce branch zero HAND if low stream orders are dropped - $srcDir/gms/delineate_hydros_and_produce_HAND.sh "unit" -else - echo -e $startDiv"Skipping branch zero processing because there are no stream orders being dropped $hucNumber"$stopDiv -fi + +## PRODUCE BRANCH ZERO HAND +$srcDir/gms/delineate_hydros_and_produce_HAND.sh "unit" ## CREATE USGS GAGES FILE if [ -f $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg ]; then echo -e $startDiv"Assigning USGS gages to branches for $hucNumber"$stopDiv date -u Tstart - python3 -m memory_profiler $srcDir/usgs_gage_unit_setup.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -nwm $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -o $outputHucDataDir/usgs_subset_gages.gpkg -huc $hucNumber -ahps $inputDataDir/ahps_sites/nws_lid.gpkg -bzero_id $branch_zero_id -bzero $dropLowStreamOrders + python3 -m memory_profiler $srcDir/usgs_gage_unit_setup.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -nwm $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -o $outputHucDataDir/usgs_subset_gages.gpkg -huc $hucNumber -ahps $inputDataDir/ahps_sites/nws_lid.gpkg -bzero_id $branch_zero_id Tcount fi diff --git a/src/split_flows.py b/src/split_flows.py index daf4e757b..9d83122d0 100755 --- a/src/split_flows.py +++ b/src/split_flows.py @@ -40,8 +40,7 @@ def split_flows(max_length, split_points_filename, wbd8_clp_filename, lakes_filename, - nwm_streams_filename, - drop_stream_orders=False): + nwm_streams_filename): toMetersConversion = 1e-3 @@ -49,13 +48,9 @@ def split_flows(max_length, flows = gpd.read_file(flows_filename) if (len(flows) == 0): - if (drop_stream_orders): - # this is not an exception, but a custom exit code that can be trapped - print("No relevant streams within HUC boundaries.") - sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back - else: - # if we are not dropping stream orders, then something is wrong - raise Exception("No flowlines exist.") + # this is not an exception, but a custom exit code that can be trapped + print("No relevant streams within HUC boundaries.") + sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back wbd8 = gpd.read_file(wbd8_clp_filename) dem = rasterio.open(dem_filename,'r') @@ -256,13 +251,10 @@ def split_flows(max_length, remove(split_points_filename) if (len(split_flows_gdf) == 0): - if (drop_stream_orders): - # this is not an exception, but a custom exit code that can be trapped - print("There are no flowlines after stream order filtering.") - sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back - else: - # if we are not dropping stream orders, then something is wrong - raise Exception("No flowlines exist.") + # this is not an exception, but a custom exit code that can be trapped + print("There are no flowlines after stream order filtering.") + sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back + split_flows_gdf.to_file(split_flows_filename,driver=getDriver(split_flows_filename),index=False) if len(split_points_gdf) == 0: @@ -284,7 +276,6 @@ def split_flows(max_length, parser.add_argument('-w', '--wbd8-clp-filename', help='wbd8-clp-filename',required=True) parser.add_argument('-l', '--lakes-filename', help='lakes-filename',required=True) parser.add_argument('-n', '--nwm-streams-filename', help='nwm-streams-filename',required=True) - parser.add_argument('-ds', '--drop-stream-orders', help='Drop stream orders 1 and 2', type=int, required=False, default=False) # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) diff --git a/src/usgs_gage_unit_setup.py b/src/usgs_gage_unit_setup.py index 476777da0..d3efc61c2 100755 --- a/src/usgs_gage_unit_setup.py +++ b/src/usgs_gage_unit_setup.py @@ -104,7 +104,6 @@ def filter_gage_branches(gms_inputs_filename): parser.add_argument('-nwm','--input-nwm-filename', help='NWM stream subset', required=True) parser.add_argument('-o','--output-filename', help='Table to append data', required=True) parser.add_argument('-huc','--huc8-id', help='HUC8 ID (to verify gage location huc)', type=str, required=True) - parser.add_argument('-bzero','--branch-zero-check', help='Check for determining if branch zero is created', type=int, required=True) parser.add_argument('-bzero_id','--branch-zero-id', help='Branch zero ID value', type=str, required=True) parser.add_argument('-ff','--filter-gms-inputs', help='WARNING: only run this parameter if you know exactly what you are doing', required=False) @@ -115,7 +114,6 @@ def filter_gage_branches(gms_inputs_filename): input_nwm_filename = args['input_nwm_filename'] output_filename = args['output_filename'] huc8 = args['huc8_id'] - bzero_check = args['branch_zero_check'] bzero_id = args['branch_zero_id'] filter_gms_inputs = args['filter_gms_inputs'] @@ -129,10 +127,9 @@ def filter_gage_branches(gms_inputs_filename): usgs_gage_subset.write(output_filename) # Create seperate output for branch zero - if bzero_check != 0: - output_filename_zero = os.path.splitext(output_filename)[0] + '_' + bzero_id + os.path.splitext(output_filename)[-1] - usgs_gage_subset.branch_zero(bzero_id) - usgs_gage_subset.write(output_filename_zero) + output_filename_zero = os.path.splitext(output_filename)[0] + '_' + bzero_id + os.path.splitext(output_filename)[-1] + usgs_gage_subset.branch_zero(bzero_id) + usgs_gage_subset.write(output_filename_zero) else: ''' diff --git a/unit_tests/gms/derive_level_paths_params.json b/unit_tests/gms/derive_level_paths_params.json index 36b5a6cf9..97e5e1a8b 100644 --- a/unit_tests/gms/derive_level_paths_params.json +++ b/unit_tests/gms/derive_level_paths_params.json @@ -10,8 +10,7 @@ "catchments_outfile": "/data/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_catchments_proj_subset_levelPaths.gpkg", "branch_inlets_outfile": "/data/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg", "reach_id_attribute": "ID", - "verbose": true, - "drop_low_stream_orders": false + "verbose": true }, "dropped_stream_orders_no_branches_remaining": @@ -25,8 +24,7 @@ "catchments_outfile": "/data/outputs/fim_unit_test_data_do_not_remove/02030201/nwm_catchments_proj_subset_levelPaths.gpkg", "branch_inlets_outfile": "/data/outputs/fim_unit_test_data_do_not_remove/02030201/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg", "reach_id_attribute": "ID", - "verbose": true, - "drop_low_stream_orders": true + "verbose": true } } diff --git a/unit_tests/gms/derive_level_paths_unittests.py b/unit_tests/gms/derive_level_paths_unittests.py index be613204a..7027aa51c 100644 --- a/unit_tests/gms/derive_level_paths_unittests.py +++ b/unit_tests/gms/derive_level_paths_unittests.py @@ -56,8 +56,7 @@ def test_Derive_level_paths_success_all_params(self): catchments_outfile = params["catchments_outfile"], branch_inlets_outfile = params["branch_inlets_outfile"], reach_id_attribute = params["reach_id_attribute"], - verbose = params["verbose"], - drop_low_stream_orders=params["drop_low_stream_orders"]) + verbose = params["verbose"]) # ----------- # test data type being return is as expected. Downstream code might to know that type @@ -94,94 +93,6 @@ def test_Derive_level_paths_success_all_params(self): print(f"Test Success: {inspect.currentframe().f_code.co_name}") print("*************************************************************") - - def test_Derive_level_paths_success_drop_low_stream_orders_not_submitted(self): - - ''' - This test includes most params but does not submit a drop_low_stream_orders param - and it should default to "false", meaning no filtering out of stream orders 1 and 2. - Note: Most path tests done in test_Derive_level_paths_success_all_params - and are not repeated here. - ''' - - params = self.params["valid_data"].copy() - - # Function Notes: - # huc_ids no longer used, so it is not submitted - # other params such as toNode_attribute and fromNode_attribute are defaulted and not passed into __main__, so I will - # skip them here. - # returns GeoDataframe (the nwm_subset_streams_levelPaths_dissolved.gpkg) - actual_df = src.Derive_level_paths(in_stream_network = params["in_stream_network"], - out_stream_network = params["out_stream_network"], - branch_id_attribute = params["branch_id_attribute"], - out_stream_network_dissolved = params["out_stream_network_dissolved"], - headwaters_outfile = params["headwaters_outfile"], - catchments = params["catchments"], - catchments_outfile = params["catchments_outfile"], - branch_inlets_outfile = params["branch_inlets_outfile"], - reach_id_attribute = params["reach_id_attribute"], - verbose = params["verbose"]) - - # ----------- - # test data type being return is as expected. Downstream code might to know that type - self.assertIsInstance(actual_df, stream_branches.StreamNetwork) - - # ----------- - #**** NOTE: Based on 05030104 - # Test row count for dissolved level path GeoDataframe which is returned. - actual_row_count = len(actual_df) - expected_row_count = 58 # should still be 58 with no filtering - self.assertEqual(actual_row_count, expected_row_count) - - print(f"Test Success: {inspect.currentframe().f_code.co_name}") - print("*************************************************************") - - - def test_Derive_level_paths_success_drop_low_stream_orders_is_true(self): - - ''' - This test includes most params but does not submit a drop_low_stream_orders param - and it should default to "false", meaning no filtering out of stream orders 1 and 2. - Note: Most path tests done in test_Derive_level_paths_success_all_params - and are not repeated here. - ''' - - params = self.params["valid_data"].copy() - - params["drop_low_stream_orders"] = True - - # Function Notes: - # huc_ids no longer used, so it is not submitted - # other params such as toNode_attribute and fromNode_attribute are defaulted and not passed into __main__, so I will - # skip them here. - # returns GeoDataframe (the nwm_subset_streams_levelPaths_dissolved.gpkg) - actual_df = src.Derive_level_paths(in_stream_network = params["in_stream_network"], - out_stream_network = params["out_stream_network"], - branch_id_attribute = params["branch_id_attribute"], - out_stream_network_dissolved = params["out_stream_network_dissolved"], - headwaters_outfile = params["headwaters_outfile"], - catchments = params["catchments"], - catchments_outfile = params["catchments_outfile"], - branch_inlets_outfile = params["branch_inlets_outfile"], - reach_id_attribute = params["reach_id_attribute"], - verbose = params["verbose"], - drop_low_stream_orders=params["drop_low_stream_orders"]) - - # ----------- - # test data type being return is as expected. Downstream code might to know that type - self.assertIsInstance(actual_df, stream_branches.StreamNetwork) - - # ----------- - #**** NOTE: Based on 05030104 - # Test row count for dissolved level path GeoDataframe which is returned. - actual_row_count = len(actual_df) - expected_row_count = 4 - self.assertEqual(actual_row_count, expected_row_count) - - print(f"Test Success: {inspect.currentframe().f_code.co_name}") - print("*************************************************************") - - def test_Derive_level_paths_success_drop_low_stream_orders_no_branches_left(self): ''' @@ -205,8 +116,7 @@ def test_Derive_level_paths_success_drop_low_stream_orders_no_branches_left(self catchments_outfile = params["catchments_outfile"], branch_inlets_outfile = params["branch_inlets_outfile"], reach_id_attribute = params["reach_id_attribute"], - verbose = params["verbose"], - drop_low_stream_orders=params["drop_low_stream_orders"]) + verbose = params["verbose"]) self.assertEqual(se.exception.code, fec.GMS_UNIT_NO_BRANCHES.value)