diff --git a/.gitignore b/.gitignore index e18bb6432..7ce22521c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,17 @@ *.pyc *.swp +*.swo +*.swn .nfs* __pycache__/ config/** +!config/deny*.lst +!config/aws*.lst !config/*template* !config/*default* -!config/*calibrated* !config/symbology/ +config/*.swp .vscode/ **/.DS_Store **/*_pytest.py - +.private/ diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 000000000..6d018da0c --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,11 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Aristizabal" + given-names: "Fernando" + orcid: "https://orcid.org/0000-0003-2525-4712" +title: "Generalized Mainstems (GMS): An improvement of the Height Above Nearest Drainage Method (HAND) for flood inundation mapping (FIM) that uses processing units of unit stream order" +version: alpha +doi: None +date-released: pending +url: "https://github.com/NOAA-OWP/fim_4" diff --git a/Dockerfile b/Dockerfile index ef56576b8..f8f535cef 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,17 +49,17 @@ RUN cd taudem_accelerated_flowDirections/taudem/build/bin && mv -t $taudemDir2 d # Base Image that has GDAL, PROJ, etc FROM osgeo/gdal:ubuntu-full-3.1.2 ARG dataDir=/data +# remove below line, and uncomment # ENV projectDir=/foss_fim, & delete this line ARG projectDir=/foss_fim +# ENV projectDir=/foss_fim ARG depDir=/dependencies ENV inputDataDir=$dataDir/inputs -ENV outputDataDir=$dataDir/outputs +ENV outputDataDir=/outputs ENV srcDir=$projectDir/src ENV taudemDir=$depDir/taudem/bin ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin ## ADDING FIM GROUP ## -# ARG GroupID=1370800120 -# ARG GroupName=apd_dev ARG GroupID=1370800235 ARG GroupName=fim RUN addgroup --gid $GroupID $GroupName @@ -69,13 +69,16 @@ ENV GN=$GroupName RUN mkdir -p $depDir COPY --from=builder $depDir $depDir - -RUN apt update --fix-missing && apt install -y p7zip-full python3-pip time mpich=3.3.2-2build1 parallel=20161222-1.1 libgeos-dev=3.8.0-1build1 expect=5.45.4-2build1 - -RUN DEBIAN_FRONTEND=noninteractive apt install -y grass=7.8.2-1build3 grass-doc=7.8.2-1build3 +RUN apt update --fix-missing +RUN apt install -y p7zip-full python3-pip time mpich=3.3.2-2build1 parallel=20161222-1.1 libgeos-dev=3.8.0-1build1 expect=5.45.4-2build1 tmux rsync RUN apt auto-remove +## adding AWS CLI (for bash) ## +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ + unzip awscliv2.zip && \ + ./aws/install + ## adding environment variables for numba and python ## ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 @@ -83,11 +86,20 @@ ENV PYTHONUNBUFFERED=TRUE ## ADD TO PATHS ## ENV PATH="$projectDir:${PATH}" +ENV PYTHONPATH=${PYTHONPATH}:$srcDir:$projectDir/tests:$projectDir/tools ## install python 3 modules ## COPY Pipfile . COPY Pipfile.lock . -RUN pip3 install pipenv && PIP_NO_CACHE_DIR=off PIP_NO_BINARY=shapely,pygeos pipenv install --system --deploy --ignore-pipfile +RUN pip3 install pipenv==2022.4.8 && PIP_NO_CACHE_DIR=off PIP_NO_BINARY=shapely,pygeos pipenv install --system --deploy --ignore-pipfile +#RUN pip3 install pipenv==2022.4.8 && pipenv install --system --deploy --ignore-pipfile (too slow to +# leave out shapely,pygeos at this time. Likely better after upgrading) + +# TEMP FIX as neither shapely or Shapely is staying in the pip list. If we manually add +# it with pip (not pipenv), it works. Notice case for Shapely versus shapely. +# This temp fix works for now until we can reconsile the shapely package, +# pygeos, geopanda's and possibly others (coming soon) +RUN pip install shapely==1.7.0 ## RUN UMASK TO CHANGE DEFAULT PERMISSIONS ## ADD ./src/entrypoint.sh / diff --git a/Pipfile b/Pipfile index b54b7daf7..a84391528 100755 --- a/Pipfile +++ b/Pipfile @@ -10,19 +10,30 @@ ipython = "*" fiona = "==1.8.17" geopandas = "==0.8.1" numba = "==0.50.1" +numpy = "==1.22.4" pandas = "==1.0.5" pygeos = "==0.7.1" rasterio = "==1.1.5" rasterstats = "==0.15.0" richdem = "==0.3.4" tqdm = "==4.48.0" -Shapely = "==1.7.0" -grass-session = "==0.5" seaborn = "==0.11.0" python-dotenv = "*" natsort = "*" memory-profiler = "*" +xarray = "==0.19.0" +netcdf4 = "==1.5.7" +tables = "==3.6.1" +h5py = "==3.4.0" pyproj = "==3.1.0" +psycopg2-binary = "==2.8.6" +boto3 = "*" +jupyter = "*" +jupyterlab = "*" +ipympl = "*" +pytest = "*" +whitebox = "*" +shapely = "==1.7.0" [requires] python_version = "3.8" diff --git a/Pipfile.lock b/Pipfile.lock index 4307ca71f..03826cafa 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "f1296d064e5178d77fdf23130a571d8169821f2eeed04b37c1b2cf60fe6928ba" + "sha256": "4ef68f0d8f3ea64518ebd604722e5283603cf6ffe3401320f3db3be365224ac0" }, "pipfile-spec": 6, "requires": { @@ -18,25 +18,338 @@ "default": { "affine": { "hashes": [ - "sha256:2e045def1aa29e613c42e801a7e10e0b9bacfed1a7c6af0cadf8843530a15102", - "sha256:34b05b070d954c382e56f02c207a372d8a32621a87653cc30cdd31cd7f65799f" + "sha256:8a3df80e2b2378aef598a83c1392efd47967afec4242021a0b06b4c7cbc61a92", + "sha256:a24d818d6a836c131976d22f8c27b8d3ca32d0af64c1d8d29deb7bafa4da1eea" ], - "version": "==2.3.0" + "markers": "python_version >= '3.7'", + "version": "==2.4.0" }, - "attrs": { + "aiofiles": { "hashes": [ - "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1", - "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb" + "sha256:1142fa8e80dbae46bb6339573ad4c8c0841358f79c6eb50a493dceca14621bad", + "sha256:9107f1ca0b2a5553987a94a3c9959fe5b491fdf731389aa5b7b1bd0733e32de6" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "markers": "python_version >= '3.7' and python_version < '4.0'", + "version": "==22.1.0" + }, + "aiosqlite": { + "hashes": [ + "sha256:c3511b841e3a2c5614900ba1d179f366826857586f78abd75e7cbeb88e75a557", + "sha256:faa843ef5fb08bafe9a9b3859012d3d9d6f77ce3637899de20606b7fc39aa213" + ], + "markers": "python_version >= '3.7'", + "version": "==0.18.0" + }, + "anyio": { + "hashes": [ + "sha256:25ea0d673ae30af41a0c442f81cf3b38c7e79fdc7b60335a4c14e05eb0947421", + "sha256:fbbe32bd270d2a2ef3ed1c5d45041250284e31fc0a4df4a5a6071842051a51e3" + ], + "markers": "python_full_version >= '3.6.2'", + "version": "==3.6.2" + }, + "argon2-cffi": { + "hashes": [ + "sha256:8c976986f2c5c0e5000919e6de187906cfd81fb1c72bf9d88c01177e77da7f80", + "sha256:d384164d944190a7dd7ef22c6aa3ff197da12962bd04b17f64d4e93d934dba5b" + ], + "markers": "python_version >= '3.6'", + "version": "==21.3.0" + }, + "argon2-cffi-bindings": { + "hashes": [ + "sha256:20ef543a89dee4db46a1a6e206cd015360e5a75822f76df533845c3cbaf72670", + "sha256:2c3e3cc67fdb7d82c4718f19b4e7a87123caf8a93fde7e23cf66ac0337d3cb3f", + "sha256:3b9ef65804859d335dc6b31582cad2c5166f0c3e7975f324d9ffaa34ee7e6583", + "sha256:3e385d1c39c520c08b53d63300c3ecc28622f076f4c2b0e6d7e796e9f6502194", + "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c", + "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a", + "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082", + "sha256:6a22ad9800121b71099d0fb0a65323810a15f2e292f2ba450810a7316e128ee5", + "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f", + "sha256:93f9bf70084f97245ba10ee36575f0c3f1e7d7724d67d8e5b08e61787c320ed7", + "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d", + "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f", + "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae", + "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3", + "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86", + "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367", + "sha256:d4966ef5848d820776f5f562a7d45fdd70c2f330c961d0d745b784034bd9f48d", + "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93", + "sha256:ed2937d286e2ad0cc79a7087d3c272832865f779430e0cc2b4f3718d3159b0cb", + "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e", + "sha256:f9f8b450ed0547e3d473fdc8612083fd08dd2120d6ac8f73828df9b7d45bb351" + ], + "markers": "python_version >= '3.6'", "version": "==21.2.0" }, + "arrow": { + "hashes": [ + "sha256:3934b30ca1b9f292376d9db15b19446088d12ec58629bc3f0da28fd55fb633a1", + "sha256:5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2" + ], + "markers": "python_version >= '3.6'", + "version": "==1.2.3" + }, + "asttokens": { + "hashes": [ + "sha256:4622110b2a6f30b77e1473affaa97e711bc2f07d3f10848420ff1898edbe94f3", + "sha256:6b0ac9e93fb0335014d382b8fa9b3afa7df546984258005da0b9e7095b3deb1c" + ], + "version": "==2.2.1" + }, + "attrs": { + "hashes": [ + "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836", + "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99" + ], + "markers": "python_version >= '3.6'", + "version": "==22.2.0" + }, + "babel": { + "hashes": [ + "sha256:1ad3eca1c885218f6dce2ab67291178944f810a10a9b5f3cb8382a5a232b64fe", + "sha256:5ef4b3226b0180dedded4229651c8b0e1a3a6a2837d45a073272f313e4cf97f6" + ], + "markers": "python_version >= '3.6'", + "version": "==2.11.0" + }, + "backcall": { + "hashes": [ + "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", + "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255" + ], + "version": "==0.2.0" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:0e79446b10b3ecb499c1556f7e228a53e64a2bfcebd455f370d8927cb5b59e39", + "sha256:bc4bdda6717de5a2987436fb8d72f45dc90dd856bdfd512a1314ce90349a0106" + ], + "markers": "python_version >= '3.6'", + "version": "==4.11.2" + }, + "bleach": { + "hashes": [ + "sha256:1a1a85c1595e07d8db14c5f09f09e6433502c51c595970edc090551f0db99414", + "sha256:33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4" + ], + "markers": "python_version >= '3.7'", + "version": "==6.0.0" + }, + "boto3": { + "hashes": [ + "sha256:30c7d967ed1c6b5a05643e42cae9d4d36c3f1cb6782637ddc7007a104cfd9027", + "sha256:b4c2969b7677762914394b8273cc1905dfe5b71f250741c1a575487ae357e729" + ], + "index": "pypi", + "version": "==1.26.76" + }, + "botocore": { + "hashes": [ + "sha256:70735b00cd529f152992231ca6757e458e5ec25db43767b3526e9a35b2f143b7", + "sha256:c2f67b6b3f8acf2968eafca06526f07b9fb0d27bac4c68a635d51abb675134a7" + ], + "markers": "python_version >= '3.7'", + "version": "==1.29.76" + }, "certifi": { "hashes": [ - "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", - "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", + "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" + ], + "markers": "python_version >= '3.6'", + "version": "==2022.12.7" + }, + "cffi": { + "hashes": [ + "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5", + "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef", + "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104", + "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426", + "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405", + "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375", + "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a", + "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e", + "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc", + "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf", + "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185", + "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497", + "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3", + "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35", + "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c", + "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83", + "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21", + "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca", + "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984", + "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac", + "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd", + "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee", + "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a", + "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2", + "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192", + "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7", + "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585", + "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f", + "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e", + "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27", + "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b", + "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e", + "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e", + "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d", + "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c", + "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415", + "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82", + "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02", + "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314", + "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325", + "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c", + "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3", + "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914", + "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045", + "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d", + "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9", + "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5", + "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2", + "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c", + "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3", + "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2", + "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8", + "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d", + "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d", + "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9", + "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162", + "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76", + "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4", + "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e", + "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9", + "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6", + "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b", + "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01", + "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0" ], - "version": "==2021.10.8" + "version": "==1.15.1" + }, + "cftime": { + "hashes": [ + "sha256:055d5d60a756c6c1857cf84d77655bb707057bb6c4a4fbb104a550e76c40aad9", + "sha256:07fdef2f75a0f0952b0376fa4cd08ef8a1dad3b963976ac07517811d434936b7", + "sha256:0955e1f3e1c09a9e0296b50f135ff9719cb2466f81c8ad4a10ef06fa394de984", + "sha256:29c18601abea0fd160fbe423e05c7a56fe1d38dd250a6b010de499a132d3fe18", + "sha256:2abdac6ca5b8b6102f319122546739dfc42406b816c16f2a98a8f0cd406d3bf0", + "sha256:2ba7909a0cd4adcb16797d8d6ab2767e7ddb980b2bf9dbabfc71b3bdd94f072b", + "sha256:3042048324b4d6a1066c978ec78101effdd84320e8862bfdbf8122d7ad7588ec", + "sha256:455cec3627e6ca8694b0d9201da6581eb4381b58389f1fbcb51a14fa0e2b3d94", + "sha256:56d0242fc4990584b265622622b25bb262a178097711d2d95e53ef52a9d23e7e", + "sha256:8614c00fb8a5046de304fdd86dbd224f99408185d7b245ac6628d0276596e6d2", + "sha256:86fe550b94525c327578a90b2e13418ca5ba6c636d5efe3edec310e631757eea", + "sha256:892d5dc38f8b998c83a2a01f131e63896d020586de473e1878f9e85acc70ad44", + "sha256:8d49d69c64cee2c175478eed84c3a57fce083da4ceebce16440f72be561a8489", + "sha256:93f00f454329c1f2588ebca2650e8edf7607d6189dbdcc81b5f3be2080155cc4", + "sha256:acb294fdb80e33545ae54b4421df35c4e578708a5ffce1c00408b2294e70ecef", + "sha256:aedfb7a783d19d7a30cb41951310f3bfe98f9f21fffc723c8af08a11962b0b17", + "sha256:afb5b38b51b8bc02f1656a9f15c52b0b20a3999adbe1ab9ac57f926e0065b48a", + "sha256:b4d2a1920f0aad663f25700b30621ff64af373499e52b544da1148dd8c09409a", + "sha256:e83db2fdda900eb154a9f79dfb665ac6190781c61d2e18151996de5ee7ffd8a2", + "sha256:eb7f8cd0996640b83020133b5ef6b97fc9216c3129eaeeaca361abdff5d82166", + "sha256:ee70fa069802652cf534de1dd3fc590b7d22d4127447bf96ac9849abcdadadf1" + ], + "markers": "python_version >= '3.7'", + "version": "==1.6.2" + }, + "charset-normalizer": { + "hashes": [ + "sha256:00d3ffdaafe92a5dc603cb9bd5111aaa36dfa187c8285c543be562e61b755f6b", + "sha256:024e606be3ed92216e2b6952ed859d86b4cfa52cd5bc5f050e7dc28f9b43ec42", + "sha256:0298eafff88c99982a4cf66ba2efa1128e4ddaca0b05eec4c456bbc7db691d8d", + "sha256:02a51034802cbf38db3f89c66fb5d2ec57e6fe7ef2f4a44d070a593c3688667b", + "sha256:083c8d17153ecb403e5e1eb76a7ef4babfc2c48d58899c98fcaa04833e7a2f9a", + "sha256:0a11e971ed097d24c534c037d298ad32c6ce81a45736d31e0ff0ad37ab437d59", + "sha256:0bf2dae5291758b6f84cf923bfaa285632816007db0330002fa1de38bfcb7154", + "sha256:0c0a590235ccd933d9892c627dec5bc7511ce6ad6c1011fdf5b11363022746c1", + "sha256:0f438ae3532723fb6ead77e7c604be7c8374094ef4ee2c5e03a3a17f1fca256c", + "sha256:109487860ef6a328f3eec66f2bf78b0b72400280d8f8ea05f69c51644ba6521a", + "sha256:11b53acf2411c3b09e6af37e4b9005cba376c872503c8f28218c7243582df45d", + "sha256:12db3b2c533c23ab812c2b25934f60383361f8a376ae272665f8e48b88e8e1c6", + "sha256:14e76c0f23218b8f46c4d87018ca2e441535aed3632ca134b10239dfb6dadd6b", + "sha256:16a8663d6e281208d78806dbe14ee9903715361cf81f6d4309944e4d1e59ac5b", + "sha256:292d5e8ba896bbfd6334b096e34bffb56161c81408d6d036a7dfa6929cff8783", + "sha256:2c03cc56021a4bd59be889c2b9257dae13bf55041a3372d3295416f86b295fb5", + "sha256:2e396d70bc4ef5325b72b593a72c8979999aa52fb8bcf03f701c1b03e1166918", + "sha256:2edb64ee7bf1ed524a1da60cdcd2e1f6e2b4f66ef7c077680739f1641f62f555", + "sha256:31a9ddf4718d10ae04d9b18801bd776693487cbb57d74cc3458a7673f6f34639", + "sha256:356541bf4381fa35856dafa6a965916e54bed415ad8a24ee6de6e37deccf2786", + "sha256:358a7c4cb8ba9b46c453b1dd8d9e431452d5249072e4f56cfda3149f6ab1405e", + "sha256:37f8febc8ec50c14f3ec9637505f28e58d4f66752207ea177c1d67df25da5aed", + "sha256:39049da0ffb96c8cbb65cbf5c5f3ca3168990adf3551bd1dee10c48fce8ae820", + "sha256:39cf9ed17fe3b1bc81f33c9ceb6ce67683ee7526e65fde1447c772afc54a1bb8", + "sha256:3ae1de54a77dc0d6d5fcf623290af4266412a7c4be0b1ff7444394f03f5c54e3", + "sha256:3b590df687e3c5ee0deef9fc8c547d81986d9a1b56073d82de008744452d6541", + "sha256:3e45867f1f2ab0711d60c6c71746ac53537f1684baa699f4f668d4c6f6ce8e14", + "sha256:3fc1c4a2ffd64890aebdb3f97e1278b0cc72579a08ca4de8cd2c04799a3a22be", + "sha256:4457ea6774b5611f4bed5eaa5df55f70abde42364d498c5134b7ef4c6958e20e", + "sha256:44ba614de5361b3e5278e1241fda3dc1838deed864b50a10d7ce92983797fa76", + "sha256:4a8fcf28c05c1f6d7e177a9a46a1c52798bfe2ad80681d275b10dcf317deaf0b", + "sha256:4b0d02d7102dd0f997580b51edc4cebcf2ab6397a7edf89f1c73b586c614272c", + "sha256:502218f52498a36d6bf5ea77081844017bf7982cdbe521ad85e64cabee1b608b", + "sha256:503e65837c71b875ecdd733877d852adbc465bd82c768a067badd953bf1bc5a3", + "sha256:5995f0164fa7df59db4746112fec3f49c461dd6b31b841873443bdb077c13cfc", + "sha256:59e5686dd847347e55dffcc191a96622f016bc0ad89105e24c14e0d6305acbc6", + "sha256:601f36512f9e28f029d9481bdaf8e89e5148ac5d89cffd3b05cd533eeb423b59", + "sha256:608862a7bf6957f2333fc54ab4399e405baad0163dc9f8d99cb236816db169d4", + "sha256:62595ab75873d50d57323a91dd03e6966eb79c41fa834b7a1661ed043b2d404d", + "sha256:70990b9c51340e4044cfc394a81f614f3f90d41397104d226f21e66de668730d", + "sha256:71140351489970dfe5e60fc621ada3e0f41104a5eddaca47a7acb3c1b851d6d3", + "sha256:72966d1b297c741541ca8cf1223ff262a6febe52481af742036a0b296e35fa5a", + "sha256:74292fc76c905c0ef095fe11e188a32ebd03bc38f3f3e9bcb85e4e6db177b7ea", + "sha256:761e8904c07ad053d285670f36dd94e1b6ab7f16ce62b9805c475b7aa1cffde6", + "sha256:772b87914ff1152b92a197ef4ea40efe27a378606c39446ded52c8f80f79702e", + "sha256:79909e27e8e4fcc9db4addea88aa63f6423ebb171db091fb4373e3312cb6d603", + "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24", + "sha256:7eb33a30d75562222b64f569c642ff3dc6689e09adda43a082208397f016c39a", + "sha256:81d6741ab457d14fdedc215516665050f3822d3e56508921cc7239f8c8e66a58", + "sha256:8499ca8f4502af841f68135133d8258f7b32a53a1d594aa98cc52013fff55678", + "sha256:84c3990934bae40ea69a82034912ffe5a62c60bbf6ec5bc9691419641d7d5c9a", + "sha256:87701167f2a5c930b403e9756fab1d31d4d4da52856143b609e30a1ce7160f3c", + "sha256:88600c72ef7587fe1708fd242b385b6ed4b8904976d5da0893e31df8b3480cb6", + "sha256:8ac7b6a045b814cf0c47f3623d21ebd88b3e8cf216a14790b455ea7ff0135d18", + "sha256:8b8af03d2e37866d023ad0ddea594edefc31e827fee64f8de5611a1dbc373174", + "sha256:8c7fe7afa480e3e82eed58e0ca89f751cd14d767638e2550c77a92a9e749c317", + "sha256:8eade758719add78ec36dc13201483f8e9b5d940329285edcd5f70c0a9edbd7f", + "sha256:911d8a40b2bef5b8bbae2e36a0b103f142ac53557ab421dc16ac4aafee6f53dc", + "sha256:93ad6d87ac18e2a90b0fe89df7c65263b9a99a0eb98f0a3d2e079f12a0735837", + "sha256:95dea361dd73757c6f1c0a1480ac499952c16ac83f7f5f4f84f0658a01b8ef41", + "sha256:9ab77acb98eba3fd2a85cd160851816bfce6871d944d885febf012713f06659c", + "sha256:9cb3032517f1627cc012dbc80a8ec976ae76d93ea2b5feaa9d2a5b8882597579", + "sha256:9cf4e8ad252f7c38dd1f676b46514f92dc0ebeb0db5552f5f403509705e24753", + "sha256:9d9153257a3f70d5f69edf2325357251ed20f772b12e593f3b3377b5f78e7ef8", + "sha256:a152f5f33d64a6be73f1d30c9cc82dfc73cec6477ec268e7c6e4c7d23c2d2291", + "sha256:a16418ecf1329f71df119e8a65f3aa68004a3f9383821edcb20f0702934d8087", + "sha256:a60332922359f920193b1d4826953c507a877b523b2395ad7bc716ddd386d866", + "sha256:a8d0fc946c784ff7f7c3742310cc8a57c5c6dc31631269876a88b809dbeff3d3", + "sha256:ab5de034a886f616a5668aa5d098af2b5385ed70142090e2a31bcbd0af0fdb3d", + "sha256:c22d3fe05ce11d3671297dc8973267daa0f938b93ec716e12e0f6dee81591dc1", + "sha256:c2ac1b08635a8cd4e0cbeaf6f5e922085908d48eb05d44c5ae9eabab148512ca", + "sha256:c512accbd6ff0270939b9ac214b84fb5ada5f0409c44298361b2f5e13f9aed9e", + "sha256:c75ffc45f25324e68ab238cb4b5c0a38cd1c3d7f1fb1f72b5541de469e2247db", + "sha256:c95a03c79bbe30eec3ec2b7f076074f4281526724c8685a42872974ef4d36b72", + "sha256:cadaeaba78750d58d3cc6ac4d1fd867da6fc73c88156b7a3212a3cd4819d679d", + "sha256:cd6056167405314a4dc3c173943f11249fa0f1b204f8b51ed4bde1a9cd1834dc", + "sha256:db72b07027db150f468fbada4d85b3b2729a3db39178abf5c543b784c1254539", + "sha256:df2c707231459e8a4028eabcd3cfc827befd635b3ef72eada84ab13b52e1574d", + "sha256:e62164b50f84e20601c1ff8eb55620d2ad25fb81b59e3cd776a1902527a788af", + "sha256:e696f0dd336161fca9adbb846875d40752e6eba585843c768935ba5c9960722b", + "sha256:eaa379fcd227ca235d04152ca6704c7cb55564116f8bc52545ff357628e10602", + "sha256:ebea339af930f8ca5d7a699b921106c6e29c617fe9606fa7baa043c1cdae326f", + "sha256:f4c39b0e3eac288fedc2b43055cfc2ca7a60362d0e5e87a637beac5d801ef478", + "sha256:f5057856d21e7586765171eac8b9fc3f7d44ef39425f85dbcccb13b3ebea806c", + "sha256:f6f45710b4459401609ebebdbcfb34515da4fc2aa886f95107f556ac69a9147e", + "sha256:f97e83fa6c25693c7a35de154681fcc257c1c41b38beb0304b9c4d2d9e164479", + "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7", + "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8" + ], + "markers": "python_version >= '3.6'", + "version": "==3.0.1" }, "click": { "hashes": [ @@ -58,9 +371,78 @@ "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27", "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4.0'", "version": "==0.7.2" }, + "comm": { + "hashes": [ + "sha256:3e2f5826578e683999b93716285b3b1f344f157bf75fa9ce0a797564e742f062", + "sha256:9f3abf3515112fa7c55a42a6a5ab358735c9dccc8b5910a9d8e3ef5998130666" + ], + "markers": "python_version >= '3.6'", + "version": "==0.1.2" + }, + "contourpy": { + "hashes": [ + "sha256:031154ed61f7328ad7f97662e48660a150ef84ee1bc8876b6472af88bf5a9b98", + "sha256:0f9d350b639db6c2c233d92c7f213d94d2e444d8e8fc5ca44c9706cf72193772", + "sha256:130230b7e49825c98edf0b428b7aa1125503d91732735ef897786fe5452b1ec2", + "sha256:152fd8f730c31fd67fe0ffebe1df38ab6a669403da93df218801a893645c6ccc", + "sha256:1c71fdd8f1c0f84ffd58fca37d00ca4ebaa9e502fb49825484da075ac0b0b803", + "sha256:24847601071f740837aefb730e01bd169fbcaa610209779a78db7ebb6e6a7051", + "sha256:2e9ebb4425fc1b658e13bace354c48a933b842d53c458f02c86f371cecbedecc", + "sha256:30676ca45084ee61e9c3da589042c24a57592e375d4b138bd84d8709893a1ba4", + "sha256:31a55dccc8426e71817e3fe09b37d6d48ae40aae4ecbc8c7ad59d6893569c436", + "sha256:366a0cf0fc079af5204801786ad7a1c007714ee3909e364dbac1729f5b0849e5", + "sha256:38e2e577f0f092b8e6774459317c05a69935a1755ecfb621c0a98f0e3c09c9a5", + "sha256:3c184ad2433635f216645fdf0493011a4667e8d46b34082f5a3de702b6ec42e3", + "sha256:3caea6365b13119626ee996711ab63e0c9d7496f65641f4459c60a009a1f3e80", + "sha256:3e927b3868bd1e12acee7cc8f3747d815b4ab3e445a28d2e5373a7f4a6e76ba1", + "sha256:4ee3ee247f795a69e53cd91d927146fb16c4e803c7ac86c84104940c7d2cabf0", + "sha256:54d43960d809c4c12508a60b66cb936e7ed57d51fb5e30b513934a4a23874fae", + "sha256:57119b0116e3f408acbdccf9eb6ef19d7fe7baf0d1e9aaa5381489bc1aa56556", + "sha256:58569c491e7f7e874f11519ef46737cea1d6eda1b514e4eb5ac7dab6aa864d02", + "sha256:5a011cf354107b47c58ea932d13b04d93c6d1d69b8b6dce885e642531f847566", + "sha256:5caeacc68642e5f19d707471890f037a13007feba8427eb7f2a60811a1fc1350", + "sha256:5dd34c1ae752515318224cba7fc62b53130c45ac6a1040c8b7c1a223c46e8967", + "sha256:60835badb5ed5f4e194a6f21c09283dd6e007664a86101431bf870d9e86266c4", + "sha256:62398c80ef57589bdbe1eb8537127321c1abcfdf8c5f14f479dbbe27d0322e66", + "sha256:6381fa66866b0ea35e15d197fc06ac3840a9b2643a6475c8fff267db8b9f1e69", + "sha256:64757f6460fc55d7e16ed4f1de193f362104285c667c112b50a804d482777edd", + "sha256:69f8ff4db108815addd900a74df665e135dbbd6547a8a69333a68e1f6e368ac2", + "sha256:6c180d89a28787e4b73b07e9b0e2dac7741261dbdca95f2b489c4f8f887dd810", + "sha256:71b0bf0c30d432278793d2141362ac853859e87de0a7dee24a1cea35231f0d50", + "sha256:769eef00437edf115e24d87f8926955f00f7704bede656ce605097584f9966dc", + "sha256:7f6979d20ee5693a1057ab53e043adffa1e7418d734c1532e2d9e915b08d8ec2", + "sha256:87f4d8941a9564cda3f7fa6a6cd9b32ec575830780677932abdec7bcb61717b0", + "sha256:89ba9bb365446a22411f0673abf6ee1fea3b2cf47b37533b970904880ceb72f3", + "sha256:8acf74b5d383414401926c1598ed77825cd530ac7b463ebc2e4f46638f56cce6", + "sha256:9056c5310eb1daa33fc234ef39ebfb8c8e2533f088bbf0bc7350f70a29bde1ac", + "sha256:95c3acddf921944f241b6773b767f1cbce71d03307270e2d769fd584d5d1092d", + "sha256:9e20e5a1908e18aaa60d9077a6d8753090e3f85ca25da6e25d30dc0a9e84c2c6", + "sha256:a1e97b86f73715e8670ef45292d7cc033548266f07d54e2183ecb3c87598888f", + "sha256:a877ada905f7d69b2a31796c4b66e31a8068b37aa9b78832d41c82fc3e056ddd", + "sha256:a9d7587d2fdc820cc9177139b56795c39fb8560f540bba9ceea215f1f66e1566", + "sha256:abf298af1e7ad44eeb93501e40eb5a67abbf93b5d90e468d01fc0c4451971afa", + "sha256:ae90d5a8590e5310c32a7630b4b8618cef7563cebf649011da80874d0aa8f414", + "sha256:b6d0f9e1d39dbfb3977f9dd79f156c86eb03e57a7face96f199e02b18e58d32a", + "sha256:b8d587cc39057d0afd4166083d289bdeff221ac6d3ee5046aef2d480dc4b503c", + "sha256:c5210e5d5117e9aec8c47d9156d1d3835570dd909a899171b9535cb4a3f32693", + "sha256:cc331c13902d0f50845099434cd936d49d7a2ca76cb654b39691974cb1e4812d", + "sha256:ce41676b3d0dd16dbcfabcc1dc46090aaf4688fd6e819ef343dbda5a57ef0161", + "sha256:d8165a088d31798b59e91117d1f5fc3df8168d8b48c4acc10fc0df0d0bdbcc5e", + "sha256:e7281244c99fd7c6f27c1c6bfafba878517b0b62925a09b586d88ce750a016d2", + "sha256:e96a08b62bb8de960d3a6afbc5ed8421bf1a2d9c85cc4ea73f4bc81b4910500f", + "sha256:ed33433fc3820263a6368e532f19ddb4c5990855e4886088ad84fd7c4e561c71", + "sha256:efb8f6d08ca7998cf59eaf50c9d60717f29a1a0a09caa46460d33b2924839dbd", + "sha256:efe99298ba37e37787f6a2ea868265465410822f7bea163edcc1bd3903354ea9", + "sha256:f99e9486bf1bb979d95d5cffed40689cb595abb2b841f2991fc894b3452290e8", + "sha256:fc1464c97579da9f3ab16763c32e5c5d5bb5fa1ec7ce509a4ca6108b61b84fab", + "sha256:fd7dc0e6812b799a34f6d12fcb1000539098c249c8da54f3566c6a6461d0dbad" + ], + "markers": "python_version >= '3.8'", + "version": "==1.0.7" + }, "cycler": { "hashes": [ "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3", @@ -69,6 +451,67 @@ "markers": "python_version >= '3.6'", "version": "==0.11.0" }, + "debugpy": { + "hashes": [ + "sha256:0ea1011e94416e90fb3598cc3ef5e08b0a4dd6ce6b9b33ccd436c1dffc8cd664", + "sha256:23363e6d2a04d726bbc1400bd4e9898d54419b36b2cdf7020e3e215e1dcd0f8e", + "sha256:23c29e40e39ad7d869d408ded414f6d46d82f8a93b5857ac3ac1e915893139ca", + "sha256:549ae0cb2d34fc09d1675f9b01942499751d174381b6082279cf19cdb3c47cbe", + "sha256:70ab53918fd907a3ade01909b3ed783287ede362c80c75f41e79596d5ccacd32", + "sha256:72687b62a54d9d9e3fb85e7a37ea67f0e803aaa31be700e61d2f3742a5683917", + "sha256:78739f77c58048ec006e2b3eb2e0cd5a06d5f48c915e2fc7911a337354508110", + "sha256:7aa7e103610e5867d19a7d069e02e72eb2b3045b124d051cfd1538f1d8832d1b", + "sha256:87755e173fcf2ec45f584bb9d61aa7686bb665d861b81faa366d59808bbd3494", + "sha256:9b5d1b13d7c7bf5d7cf700e33c0b8ddb7baf030fcf502f76fc061ddd9405d16c", + "sha256:a771739902b1ae22a120dbbb6bd91b2cae6696c0e318b5007c5348519a4211c6", + "sha256:b9c2130e1c632540fbf9c2c88341493797ddf58016e7cba02e311de9b0a96b67", + "sha256:be596b44448aac14eb3614248c91586e2bc1728e020e82ef3197189aae556115", + "sha256:c05349890804d846eca32ce0623ab66c06f8800db881af7a876dc073ac1c2225", + "sha256:de4a045fbf388e120bb6ec66501458d3134f4729faed26ff95de52a754abddb1", + "sha256:dff595686178b0e75580c24d316aa45a8f4d56e2418063865c114eef651a982e", + "sha256:f6383c29e796203a0bba74a250615ad262c4279d398e89d895a69d3069498305" + ], + "markers": "python_version >= '3.7'", + "version": "==1.6.6" + }, + "decorator": { + "hashes": [ + "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", + "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186" + ], + "markers": "python_version >= '3.5'", + "version": "==5.1.1" + }, + "defusedxml": { + "hashes": [ + "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", + "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.7.1" + }, + "exceptiongroup": { + "hashes": [ + "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e", + "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23" + ], + "markers": "python_version < '3.11'", + "version": "==1.1.0" + }, + "executing": { + "hashes": [ + "sha256:0314a69e37426e3608aada02473b4161d4caf5a4b244d1d0c48072b8fee7bacc", + "sha256:19da64c18d2d851112f09c287f8d3dbbdf725ab0e569077efb6cdcbd3497c107" + ], + "version": "==1.2.0" + }, + "fastjsonschema": { + "hashes": [ + "sha256:01e366f25d9047816fe3d288cbfc3e10541daf0af2044763f3d0ade42476da18", + "sha256:21f918e8d9a1a4ba9c22e09574ba72267a6762d47822db9add95f6454e51cc1c" + ], + "version": "==2.16.2" + }, "fiona": { "hashes": [ "sha256:2563d9a9f21390a7376556c82d9cef90a20a362949a9c7cb66f4ce63d90b66cd", @@ -87,11 +530,18 @@ }, "fonttools": { "hashes": [ - "sha256:68071406009e7ef6a5fdcd85d95975cd6963867bb226f2b786bfffe15d1959ef", - "sha256:8c8f84131bf04f3b1dcf99b9763cec35c347164ab6ad006e18d2f99fcab05529" + "sha256:2bb244009f9bf3fa100fc3ead6aeb99febe5985fa20afbfbaa2f8946c2fbdaf1", + "sha256:820466f43c8be8c3009aef8b87e785014133508f0de64ec469e4efb643ae54fb" ], "markers": "python_version >= '3.7'", - "version": "==4.28.1" + "version": "==4.38.0" + }, + "fqdn": { + "hashes": [ + "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f", + "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014" + ], + "version": "==1.5.1" }, "geopandas": { "hashes": [ @@ -101,63 +551,335 @@ "index": "pypi", "version": "==0.8.1" }, - "grass-session": { + "h5py": { + "hashes": [ + "sha256:0b0f002f5f341afe7d3d7e15198e80d9021da24a4d182d88068d79bfc91fba86", + "sha256:1edf33e722d47c6eb3878d51173b23dd848939f006f41b498bafceff87fb4cbd", + "sha256:46917f20021dde02865572a5fd2bb620945f7b7cd268bdc8e3f5720c32b38140", + "sha256:708ddff49af12c01d77e0f9782bb1a0364d96459ec0d1f85d90baea6d203764b", + "sha256:8745e5159830d7975a9cf38690455f22601509cda04de29b7e88b3fbdc747611", + "sha256:8e809149f95d9a3a33b1279bfbf894c78635a5497e8d5ac37420fa5ec0cf4f29", + "sha256:aa511bd05a9174c3008becdc93bd5785e254d34a6ab5f0425e6b2fbbc88afa6d", + "sha256:bb4ce46095e3b16c872aaf62adad33f40039fecae04674eb62c035386affcb91", + "sha256:be2a545f09074546f73305e0db6d36aaf1fb6ea2fcf1add2ce306b9c7f78e55a", + "sha256:ee1c683d91ab010d5e85cb61e8f9e7ee0d8eab545bf3dd50a9618f1d0e8f615e" + ], + "index": "pypi", + "version": "==3.4.0" + }, + "idna": { + "hashes": [ + "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", + "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" + ], + "markers": "python_version >= '3.5'", + "version": "==3.4" + }, + "importlib-metadata": { + "hashes": [ + "sha256:7efb448ec9a5e313a57655d35aa54cd3e01b7e1fbcf72dce1bf06119420f5bad", + "sha256:e354bedeb60efa6affdcc8ae121b73544a7aa74156d047311948f6d711cd378d" + ], + "markers": "python_version < '3.10'", + "version": "==6.0.0" + }, + "importlib-resources": { + "hashes": [ + "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6", + "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a" + ], + "markers": "python_version < '3.9'", + "version": "==5.12.0" + }, + "iniconfig": { + "hashes": [ + "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", + "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, + "ipykernel": { + "hashes": [ + "sha256:430d00549b6aaf49bd0f5393150691edb1815afa62d457ee6b1a66b25cb17874", + "sha256:6e9213484e4ce1fb14267ee435e18f23cc3a0634e635b9fb4ed4677b84e0fdf8" + ], + "markers": "python_version >= '3.8'", + "version": "==6.21.2" + }, + "ipympl": { "hashes": [ - "sha256:7155314535790145da8e2e31b0d20cd2be91477d54083a738b5c319164e7f03b", - "sha256:ce03a53e28cc14bc7fff91482e83ed4f174a1325732c4333ed183dd15de39f8d" + "sha256:49bab75c05673a6881d1aaec5d8ac81d4624f73d292d154c5fb7096f10236a2b", + "sha256:d113cd55891bafe9b27ef99b6dd111a87beb6bb2ae550c404292272103be8013" ], "index": "pypi", - "version": "==0.5" + "version": "==0.9.3" + }, + "ipython": { + "hashes": [ + "sha256:b13a1d6c1f5818bd388db53b7107d17454129a70de2b87481d555daede5eb49e", + "sha256:b38c31e8fc7eff642fc7c597061fff462537cf2314e3225a19c906b7b0d8a345" + ], + "markers": "python_version >= '3.8'", + "version": "==8.10.0" + }, + "ipython-genutils": { + "hashes": [ + "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8", + "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8" + ], + "version": "==0.2.0" + }, + "ipywidgets": { + "hashes": [ + "sha256:c0005a77a47d77889cafed892b58e33b4a2a96712154404c6548ec22272811ea", + "sha256:ebb195e743b16c3947fe8827190fb87b4d00979c0fbf685afe4d2c4927059fa1" + ], + "markers": "python_version >= '3.7'", + "version": "==8.0.4" + }, + "isoduration": { + "hashes": [ + "sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9", + "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042" + ], + "version": "==20.11.0" + }, + "jedi": { + "hashes": [ + "sha256:203c1fd9d969ab8f2119ec0a3342e0b49910045abe6af0a3ae83a5764d54639e", + "sha256:bae794c30d07f6d910d32a7048af09b5a39ed740918da923c6b780790ebac612" + ], + "markers": "python_version >= '3.6'", + "version": "==0.18.2" + }, + "jinja2": { + "hashes": [ + "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852", + "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61" + ], + "markers": "python_version >= '3.7'", + "version": "==3.1.2" + }, + "jmespath": { + "hashes": [ + "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", + "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.1" + }, + "json5": { + "hashes": [ + "sha256:1aa54b80b5e507dfe31d12b7743a642e2ffa6f70bf73b8e3d7d1d5fba83d99bd", + "sha256:4f1e196acc55b83985a51318489f345963c7ba84aa37607e49073066c562e99b" + ], + "version": "==0.9.11" + }, + "jsonpointer": { + "hashes": [ + "sha256:51801e558539b4e9cd268638c078c6c5746c9ac96bc38152d443400e4f3793e9", + "sha256:97cba51526c829282218feb99dab1b1e6bdf8efd1c43dc9d57be093c0d69c99a" + ], + "version": "==2.3" + }, + "jsonschema": { + "extras": [ + "format-nongpl" + ], + "hashes": [ + "sha256:0f864437ab8b6076ba6707453ef8f98a6a0d512a80e93f8abdb676f737ecb60d", + "sha256:a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6" + ], + "markers": "python_version >= '3.7'", + "version": "==4.17.3" + }, + "jupyter": { + "hashes": [ + "sha256:3e1f86076bbb7c8c207829390305a2b1fe836d471ed54be66a3b8c41e7f46cc7", + "sha256:5b290f93b98ffbc21c0c7e749f054b3267782166d72fa5e3ed1ed4eaf34a2b78", + "sha256:d9dc4b3318f310e34c82951ea5d6683f67bed7def4b259fafbfe4f1beb1d8e5f" + ], + "index": "pypi", + "version": "==1.0.0" + }, + "jupyter-client": { + "hashes": [ + "sha256:be48ac6bd659cbbddb7a674cf06b3b8afbf53f228253cf58bde604c03bd487b0", + "sha256:ed65498bea6d876ef9d8da3e0db3dd33c5d129f5b2645f56ae03993782966bd0" + ], + "markers": "python_version >= '3.8'", + "version": "==8.0.3" + }, + "jupyter-console": { + "hashes": [ + "sha256:5931212d5cbc1f956f6fd615755b5e15f389a8eaa697288dbbe4377017615ecc", + "sha256:b32b5cb673a90909911988a2f1906ced005b07e8dadab6c8664d3f5de44a8191" + ], + "markers": "python_version >= '3.7'", + "version": "==6.6.1" + }, + "jupyter-core": { + "hashes": [ + "sha256:1407cdb4c79ee467696c04b76633fc1884015fa109323365a6372c8e890cc83f", + "sha256:4bdc2928c37f6917130c667d8b8708f20aee539d8283c6be72aabd2a4b4c83b0" + ], + "markers": "python_version >= '3.8'", + "version": "==5.2.0" + }, + "jupyter-events": { + "hashes": [ + "sha256:57a2749f87ba387cd1bfd9b22a0875b889237dbf2edc2121ebb22bde47036c17", + "sha256:9a6e9995f75d1b7146b436ea24d696ce3a35bfa8bfe45e0c33c334c79464d0b3" + ], + "markers": "python_version >= '3.7'", + "version": "==0.6.3" + }, + "jupyter-server": { + "hashes": [ + "sha256:29d6657bfb160b0e39b9030d67f33f918a188f2eba28065314a933b327fef872", + "sha256:b15078954120886d580e19d1746e2b62a3dc7bd082cb4716115c25fcd7061b00" + ], + "markers": "python_version >= '3.8'", + "version": "==2.3.0" + }, + "jupyter-server-fileid": { + "hashes": [ + "sha256:0b580a9d75cc7a7132132d6bb3faa4645e34527bff8760861e9e6de51bec7397", + "sha256:340e86b45875d51a60e0e93d8d3bcb609c2bc8d315ec07c003f36d561f637c0e" + ], + "markers": "python_version >= '3.7'", + "version": "==0.7.0" + }, + "jupyter-server-terminals": { + "hashes": [ + "sha256:57ab779797c25a7ba68e97bcfb5d7740f2b5e8a83b5e8102b10438041a7eac5d", + "sha256:75779164661cec02a8758a5311e18bb8eb70c4e86c6b699403100f1585a12a36" + ], + "markers": "python_version >= '3.8'", + "version": "==0.4.4" + }, + "jupyter-server-ydoc": { + "hashes": [ + "sha256:18275ff1ce7e93bbda2301ca066273b3951fc50b0d9c8fc33788374134ad7920", + "sha256:ab10864708c81fa41ab9f2ed3626b54ff6926eaf14545d1d439714978dad6e9f" + ], + "markers": "python_version >= '3.7'", + "version": "==0.6.1" + }, + "jupyter-ydoc": { + "hashes": [ + "sha256:3163bd4745eedd46d4bba6df52ab26be3c5c44c3a8aaf247635062486ea8f84f", + "sha256:596a9ae5986b59f8776c42430b5ad516405963574078ab801781933c9690be93" + ], + "markers": "python_version >= '3.7'", + "version": "==0.2.2" + }, + "jupyterlab": { + "hashes": [ + "sha256:ad6707dd0149b629d0ed5b56916cfcdb816b376c6af3190337faba09e27ea29e", + "sha256:aee98c174180e98a30470297d10b959e8e64f2288970c0de65f0a6d2b4807034" + ], + "index": "pypi", + "version": "==3.6.1" + }, + "jupyterlab-pygments": { + "hashes": [ + "sha256:2405800db07c9f770863bcf8049a529c3dd4d3e28536638bd7c1c01d2748309f", + "sha256:7405d7fde60819d905a9fa8ce89e4cd830e318cdad22a0030f7a901da705585d" + ], + "markers": "python_version >= '3.7'", + "version": "==0.2.2" + }, + "jupyterlab-server": { + "hashes": [ + "sha256:51f6922e34f9f3db875051f4f7b57539a04ddd030f42d9ce6062dedf67bf7f2f", + "sha256:9aec21a2183bbedd9f91a86628355449575f1862d88b28ad5f905019d31e6c21" + ], + "markers": "python_version >= '3.7'", + "version": "==2.19.0" + }, + "jupyterlab-widgets": { + "hashes": [ + "sha256:a04a42e50231b355b7087e16a818f541e53589f7647144ea0344c4bf16f300e5", + "sha256:eeaecdeaf6c03afc960ddae201ced88d5979b4ca9c3891bcb8f6631af705f5ef" + ], + "markers": "python_version >= '3.7'", + "version": "==3.0.5" }, "kiwisolver": { "hashes": [ - "sha256:0007840186bacfaa0aba4466d5890334ea5938e0bb7e28078a0eb0e63b5b59d5", - "sha256:19554bd8d54cf41139f376753af1a644b63c9ca93f8f72009d50a2080f870f77", - "sha256:1d45d1c74f88b9f41062716c727f78f2a59a5476ecbe74956fafb423c5c87a76", - "sha256:1d819553730d3c2724582124aee8a03c846ec4362ded1034c16fb3ef309264e6", - "sha256:2210f28778c7d2ee13f3c2a20a3a22db889e75f4ec13a21072eabb5693801e84", - "sha256:22521219ca739654a296eea6d4367703558fba16f98688bd8ce65abff36eaa84", - "sha256:25405f88a37c5f5bcba01c6e350086d65e7465fd1caaf986333d2a045045a223", - "sha256:2b65bd35f3e06a47b5c30ea99e0c2b88f72c6476eedaf8cfbc8e66adb5479dcf", - "sha256:2ddb500a2808c100e72c075cbb00bf32e62763c82b6a882d403f01a119e3f402", - "sha256:2f8f6c8f4f1cff93ca5058d6ec5f0efda922ecb3f4c5fb76181f327decff98b8", - "sha256:30fa008c172355c7768159983a7270cb23838c4d7db73d6c0f6b60dde0d432c6", - "sha256:3dbb3cea20b4af4f49f84cffaf45dd5f88e8594d18568e0225e6ad9dec0e7967", - "sha256:4116ba9a58109ed5e4cb315bdcbff9838f3159d099ba5259c7c7fb77f8537492", - "sha256:44e6adf67577dbdfa2d9f06db9fbc5639afefdb5bf2b4dfec25c3a7fbc619536", - "sha256:5326ddfacbe51abf9469fe668944bc2e399181a2158cb5d45e1d40856b2a0589", - "sha256:70adc3658138bc77a36ce769f5f183169bc0a2906a4f61f09673f7181255ac9b", - "sha256:72be6ebb4e92520b9726d7146bc9c9b277513a57a38efcf66db0620aec0097e0", - "sha256:7843b1624d6ccca403a610d1277f7c28ad184c5aa88a1750c1a999754e65b439", - "sha256:7ba5a1041480c6e0a8b11a9544d53562abc2d19220bfa14133e0cdd9967e97af", - "sha256:80efd202108c3a4150e042b269f7c78643420cc232a0a771743bb96b742f838f", - "sha256:82f49c5a79d3839bc8f38cb5f4bfc87e15f04cbafa5fbd12fb32c941cb529cfb", - "sha256:83d2c9db5dfc537d0171e32de160461230eb14663299b7e6d18ca6dca21e4977", - "sha256:8d93a1095f83e908fc253f2fb569c2711414c0bfd451cab580466465b235b470", - "sha256:8dc3d842fa41a33fe83d9f5c66c0cc1f28756530cd89944b63b072281e852031", - "sha256:9661a04ca3c950a8ac8c47f53cbc0b530bce1b52f516a1e87b7736fec24bfff0", - "sha256:a498bcd005e8a3fedd0022bb30ee0ad92728154a8798b703f394484452550507", - "sha256:a7a4cf5bbdc861987a7745aed7a536c6405256853c94abc9f3287c3fa401b174", - "sha256:b5074fb09429f2b7bc82b6fb4be8645dcbac14e592128beeff5461dcde0af09f", - "sha256:b6a5431940f28b6de123de42f0eb47b84a073ee3c3345dc109ad550a3307dd28", - "sha256:ba677bcaff9429fd1bf01648ad0901cea56c0d068df383d5f5856d88221fe75b", - "sha256:bcadb05c3d4794eb9eee1dddf1c24215c92fb7b55a80beae7a60530a91060560", - "sha256:bf7eb45d14fc036514c09554bf983f2a72323254912ed0c3c8e697b62c4c158f", - "sha256:c358721aebd40c243894298f685a19eb0491a5c3e0b923b9f887ef1193ddf829", - "sha256:c4550a359c5157aaf8507e6820d98682872b9100ce7607f8aa070b4b8af6c298", - "sha256:c6572c2dab23c86a14e82c245473d45b4c515314f1f859e92608dcafbd2f19b8", - "sha256:cba430db673c29376135e695c6e2501c44c256a81495da849e85d1793ee975ad", - "sha256:dedc71c8eb9c5096037766390172c34fb86ef048b8e8958b4e484b9e505d66bc", - "sha256:e6f5eb2f53fac7d408a45fbcdeda7224b1cfff64919d0f95473420a931347ae9", - "sha256:ec2eba188c1906b05b9b49ae55aae4efd8150c61ba450e6721f64620c50b59eb", - "sha256:ee040a7de8d295dbd261ef2d6d3192f13e2b08ec4a954de34a6fb8ff6422e24c", - "sha256:eedd3b59190885d1ebdf6c5e0ca56828beb1949b4dfe6e5d0256a461429ac386", - "sha256:f441422bb313ab25de7b3dbfd388e790eceb76ce01a18199ec4944b369017009", - "sha256:f8eb7b6716f5b50e9c06207a14172cf2de201e41912ebe732846c02c830455b9", - "sha256:fc4453705b81d03568d5b808ad8f09c77c47534f6ac2e72e733f9ca4714aa75c" - ], - "markers": "python_version >= '3.7'", - "version": "==1.3.2" + "sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b", + "sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166", + "sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c", + "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c", + "sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0", + "sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4", + "sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9", + "sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286", + "sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767", + "sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c", + "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6", + "sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b", + "sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004", + "sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf", + "sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494", + "sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac", + "sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626", + "sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766", + "sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514", + "sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6", + "sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f", + "sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d", + "sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191", + "sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d", + "sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51", + "sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f", + "sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8", + "sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454", + "sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb", + "sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da", + "sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8", + "sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de", + "sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a", + "sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9", + "sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008", + "sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3", + "sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32", + "sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938", + "sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1", + "sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9", + "sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d", + "sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824", + "sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b", + "sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd", + "sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2", + "sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5", + "sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69", + "sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3", + "sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae", + "sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597", + "sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e", + "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955", + "sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca", + "sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a", + "sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea", + "sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede", + "sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4", + "sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6", + "sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686", + "sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408", + "sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871", + "sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29", + "sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750", + "sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897", + "sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0", + "sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2", + "sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09", + "sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c" + ], + "markers": "python_version >= '3.7'", + "version": "==1.4.4" }, "llvmlite": { "hashes": [ @@ -181,53 +903,131 @@ "markers": "python_version >= '3.6'", "version": "==0.33.0" }, + "markupsafe": { + "hashes": [ + "sha256:0576fe974b40a400449768941d5d0858cc624e3249dfd1e0c33674e5c7ca7aed", + "sha256:085fd3201e7b12809f9e6e9bc1e5c96a368c8523fad5afb02afe3c051ae4afcc", + "sha256:090376d812fb6ac5f171e5938e82e7f2d7adc2b629101cec0db8b267815c85e2", + "sha256:0b462104ba25f1ac006fdab8b6a01ebbfbce9ed37fd37fd4acd70c67c973e460", + "sha256:137678c63c977754abe9086a3ec011e8fd985ab90631145dfb9294ad09c102a7", + "sha256:1bea30e9bf331f3fef67e0a3877b2288593c98a21ccb2cf29b74c581a4eb3af0", + "sha256:22152d00bf4a9c7c83960521fc558f55a1adbc0631fbb00a9471e097b19d72e1", + "sha256:22731d79ed2eb25059ae3df1dfc9cb1546691cc41f4e3130fe6bfbc3ecbbecfa", + "sha256:2298c859cfc5463f1b64bd55cb3e602528db6fa0f3cfd568d3605c50678f8f03", + "sha256:28057e985dace2f478e042eaa15606c7efccb700797660629da387eb289b9323", + "sha256:2e7821bffe00aa6bd07a23913b7f4e01328c3d5cc0b40b36c0bd81d362faeb65", + "sha256:2ec4f2d48ae59bbb9d1f9d7efb9236ab81429a764dedca114f5fdabbc3788013", + "sha256:340bea174e9761308703ae988e982005aedf427de816d1afe98147668cc03036", + "sha256:40627dcf047dadb22cd25ea7ecfe9cbf3bbbad0482ee5920b582f3809c97654f", + "sha256:40dfd3fefbef579ee058f139733ac336312663c6706d1163b82b3003fb1925c4", + "sha256:4cf06cdc1dda95223e9d2d3c58d3b178aa5dacb35ee7e3bbac10e4e1faacb419", + "sha256:50c42830a633fa0cf9e7d27664637532791bfc31c731a87b202d2d8ac40c3ea2", + "sha256:55f44b440d491028addb3b88f72207d71eeebfb7b5dbf0643f7c023ae1fba619", + "sha256:608e7073dfa9e38a85d38474c082d4281f4ce276ac0010224eaba11e929dd53a", + "sha256:63ba06c9941e46fa389d389644e2d8225e0e3e5ebcc4ff1ea8506dce646f8c8a", + "sha256:65608c35bfb8a76763f37036547f7adfd09270fbdbf96608be2bead319728fcd", + "sha256:665a36ae6f8f20a4676b53224e33d456a6f5a72657d9c83c2aa00765072f31f7", + "sha256:6d6607f98fcf17e534162f0709aaad3ab7a96032723d8ac8750ffe17ae5a0666", + "sha256:7313ce6a199651c4ed9d7e4cfb4aa56fe923b1adf9af3b420ee14e6d9a73df65", + "sha256:7668b52e102d0ed87cb082380a7e2e1e78737ddecdde129acadb0eccc5423859", + "sha256:7df70907e00c970c60b9ef2938d894a9381f38e6b9db73c5be35e59d92e06625", + "sha256:7e007132af78ea9df29495dbf7b5824cb71648d7133cf7848a2a5dd00d36f9ff", + "sha256:835fb5e38fd89328e9c81067fd642b3593c33e1e17e2fdbf77f5676abb14a156", + "sha256:8bca7e26c1dd751236cfb0c6c72d4ad61d986e9a41bbf76cb445f69488b2a2bd", + "sha256:8db032bf0ce9022a8e41a22598eefc802314e81b879ae093f36ce9ddf39ab1ba", + "sha256:99625a92da8229df6d44335e6fcc558a5037dd0a760e11d84be2260e6f37002f", + "sha256:9cad97ab29dfc3f0249b483412c85c8ef4766d96cdf9dcf5a1e3caa3f3661cf1", + "sha256:a4abaec6ca3ad8660690236d11bfe28dfd707778e2442b45addd2f086d6ef094", + "sha256:a6e40afa7f45939ca356f348c8e23048e02cb109ced1eb8420961b2f40fb373a", + "sha256:a6f2fcca746e8d5910e18782f976489939d54a91f9411c32051b4aab2bd7c513", + "sha256:a806db027852538d2ad7555b203300173dd1b77ba116de92da9afbc3a3be3eed", + "sha256:abcabc8c2b26036d62d4c746381a6f7cf60aafcc653198ad678306986b09450d", + "sha256:b8526c6d437855442cdd3d87eede9c425c4445ea011ca38d937db299382e6fa3", + "sha256:bb06feb762bade6bf3c8b844462274db0c76acc95c52abe8dbed28ae3d44a147", + "sha256:c0a33bc9f02c2b17c3ea382f91b4db0e6cde90b63b296422a939886a7a80de1c", + "sha256:c4a549890a45f57f1ebf99c067a4ad0cb423a05544accaf2b065246827ed9603", + "sha256:ca244fa73f50a800cf8c3ebf7fd93149ec37f5cb9596aa8873ae2c1d23498601", + "sha256:cf877ab4ed6e302ec1d04952ca358b381a882fbd9d1b07cccbfd61783561f98a", + "sha256:d9d971ec1e79906046aa3ca266de79eac42f1dbf3612a05dc9368125952bd1a1", + "sha256:da25303d91526aac3672ee6d49a2f3db2d9502a4a60b55519feb1a4c7714e07d", + "sha256:e55e40ff0cc8cc5c07996915ad367fa47da6b3fc091fdadca7f5403239c5fec3", + "sha256:f03a532d7dee1bed20bc4884194a16160a2de9ffc6354b3878ec9682bb623c54", + "sha256:f1cd098434e83e656abf198f103a8207a8187c0fc110306691a2e94a78d0abb2", + "sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6", + "sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58" + ], + "markers": "python_version >= '3.7'", + "version": "==2.1.2" + }, "matplotlib": { "hashes": [ - "sha256:0abf8b51cc6d3ba34d1b15b26e329f23879848a0cf1216954c1f432ffc7e1af7", - "sha256:0e020a42f3338823a393dd2f80e39a2c07b9f941dfe2c778eb104eeb33d60bb5", - "sha256:13930a0c9bec0fd25f43c448b047a21af1353328b946f044a8fc3be077c6b1a8", - "sha256:153a0cf6a6ff4f406a0600d2034710c49988bacc6313d193b32716f98a697580", - "sha256:18f6e52386300db5cc4d1e9019ad9da2e80658bab018834d963ebb0aa5355095", - "sha256:2089b9014792dcc87bb1d620cde847913338abf7d957ef05587382b0cb76d44e", - "sha256:2eea16883aa7724c95eea0eb473ab585c6cf66f0e28f7f13e63deb38f4fd6d0f", - "sha256:38892a254420d95594285077276162a5e9e9c30b6da08bdc2a4d53331ad9a6fa", - "sha256:4b018ea6f26424a0852eb60eb406420d9f0d34f65736ea7bbfbb104946a66d86", - "sha256:65f877882b7ddede7090c7d87be27a0f4720fe7fc6fddd4409c06e1aa0f1ae8d", - "sha256:666d717a4798eb9c5d3ae83fe80c7bc6ed696b93e879cb01cb24a74155c73612", - "sha256:66b172610db0ececebebb09d146f54205f87c7b841454e408fba854764f91bdd", - "sha256:6db02c5605f063b67780f4d5753476b6a4944343284aa4e93c5e8ff6e9ec7f76", - "sha256:6e0e6b2111165522ad336705499b1f968c34a9e84d05d498ee5af0b5697d1efe", - "sha256:71a1851111f23f82fc43d2b6b2bfdd3f760579a664ebc939576fe21cc6133d01", - "sha256:7a7cb59ebd63a8ac4542ec1c61dd08724f82ec3aa7bb6b4b9e212d43c611ce3d", - "sha256:7baf23adb698d8c6ca7339c9dde00931bc47b2dd82fa912827fef9f93db77f5e", - "sha256:970aa97297537540369d05fe0fd1bb952593f9ab696c9b427c06990a83e2418b", - "sha256:9bac8eb1eccef540d7f4e844b6313d9f7722efd48c07e1b4bfec1056132127fd", - "sha256:a07ff2565da72a7b384a9e000b15b6b8270d81370af8a3531a16f6fbcee023cc", - "sha256:a0dcaf5648cecddc328e81a0421821a1f65a1d517b20746c94a1f0f5c36fb51a", - "sha256:a0ea10faa3bab0714d3a19c7e0921279a68d57552414d6eceaea99f97d7735db", - "sha256:a5b62d1805cc83d755972033c05cea78a1e177a159fc84da5c9c4ab6303ccbd9", - "sha256:a6cef5b31e27c31253c0f852b629a38d550ae66ec6850129c49d872f9ee428cb", - "sha256:a7bf8b05c214d32fb7ca7c001fde70b9b426378e897b0adbf77b85ea3569d56a", - "sha256:ac17a7e7b06ee426a4989f0b7f24ab1a592e39cdf56353a90f4e998bc0bf44d6", - "sha256:b3b687e905da32e5f2e5f16efa713f5d1fcd9fb8b8c697895de35c91fedeb086", - "sha256:b5e439d9e55d645f2a4dca63e2f66d68fe974c405053b132d61c7e98c25dfeb2", - "sha256:ba107add08e12600b072cf3c47aaa1ab85dd4d3c48107a5d3377d1bf80f8b235", - "sha256:d092b7ba63182d2dd427904e3eb58dd5c46ec67c5968de14a4b5007010a3a4cc", - "sha256:dc8c5c23e7056e126275dbf29efba817b3d94196690930d0968873ac3a94ab82", - "sha256:df0042cab69f4d246f4cb8fc297770ac4ae6ec2983f61836b04a117722037dcd", - "sha256:ee3d9ff16d749a9aa521bd7d86f0dbf256b2d2ac8ce31b19e4d2c86d2f2ff0b6", - "sha256:f23fbf70d2e80f4e03a83fc1206a8306d9bc50482fee4239f10676ce7e470c83", - "sha256:ff5d9fe518ad2de14ce82ab906b6ab5c2b0c7f4f984400ff8a7a905daa580a0a" - ], - "markers": "python_version >= '3.7'", - "version": "==3.5.0" + "sha256:01681566e95b9423021b49dea6a2395c16fa054604eacb87f0f4c439750f9114", + "sha256:03eb2c8ff8d85da679b71e14c7c95d16d014c48e0c0bfa14db85f6cdc5c92aad", + "sha256:092e6abc80cdf8a95f7d1813e16c0e99ceda8d5b195a3ab859c680f3487b80a2", + "sha256:0a776462a4a63c0bfc9df106c15a0897aa2dbab6795c693aa366e8e283958854", + "sha256:0dfd4a0cbd151f6439e6d7f8dca5292839ca311e7e650596d073774847ca2e4f", + "sha256:111ef351f28fd823ed7177632070a6badd6f475607122bc9002a526f2502a0b5", + "sha256:21269450243d6928da81a9bed201f0909432a74e7d0d65db5545b9fa8a0d0223", + "sha256:21a8aeac39b4a795e697265d800ce52ab59bdeb6bb23082e2d971f3041074f02", + "sha256:21bd4033c40b95abd5b8453f036ed5aa70856e56ecbd887705c37dce007a4c21", + "sha256:3493b48e56468c39bd9c1532566dff3b8062952721b7521e1f394eb6791495f4", + "sha256:3a10428d4f8d1a478ceabd652e61a175b2fdeed4175ab48da4a7b8deb561e3fa", + "sha256:3d1e52365d8d5af699f04581ca191112e1d1220a9ce4386b57d807124d8b55e6", + "sha256:3da8b9618188346239e51f1ea6c0f8f05c6e218cfcc30b399dd7dd7f52e8bceb", + "sha256:4497d88c559b76da320b7759d64db442178beeea06a52dc0c629086982082dcd", + "sha256:46ca923e980f76d34c1c633343a72bb042d6ba690ecc649aababf5317997171d", + "sha256:4f640534ec2760e270801056bc0d8a10777c48b30966eef78a7c35d8590915ba", + "sha256:51fb664c37714cbaac69c16d6b3719f517a13c96c3f76f4caadd5a0aa7ed0329", + "sha256:56b7b79488209041a9bf7ddc34f1b069274489ce69e34dc63ae241d0d6b4b736", + "sha256:691ef1f15360e439886186d0db77b5345b24da12cbc4fc57b26c4826db4d6cab", + "sha256:71b751d06b2ed1fd017de512d7439c0259822864ea16731522b251a27c0b2ede", + "sha256:7d0dcd1a0bf8d56551e8617d6dc3881d8a1c7fb37d14e5ec12cbb293f3e6170a", + "sha256:827e78239292e561cfb70abf356a9d7eaf5bf6a85c97877f254009f20b892f89", + "sha256:8665855f3919c80551f377bc16df618ceabf3ef65270bc14b60302dce88ca9ab", + "sha256:8f6efd313430d7ef70a38a3276281cb2e8646b3a22b3b21eb227da20e15e6813", + "sha256:9d85355c48ef8b9994293eb7c00f44aa8a43cad7a297fbf0770a25cdb2244b91", + "sha256:a06a6c9822e80f323549c6bc9da96d4f233178212ad9a5f4ab87fd153077a507", + "sha256:b51ab8a5d5d3bbd4527af633a638325f492e09e45e78afdf816ef55217a09664", + "sha256:c0592ba57217c22987b7322df10f75ef95bc44dce781692b4b7524085de66019", + "sha256:c5465735eaaafd1cfaec3fed60aee776aeb3fd3992aa2e49f4635339c931d443", + "sha256:c849aa94ff2a70fb71f318f48a61076d1205c6013b9d3885ade7f992093ac434", + "sha256:c869b646489c6a94375714032e5cec08e3aa8d3f7d4e8ef2b0fb50a52b317ce6", + "sha256:cb52aa97b92acdee090edfb65d1cb84ea60ab38e871ba8321a10bbcebc2a3540", + "sha256:cf119eee4e57389fba5ac8b816934e95c256535e55f0b21628b4205737d1de85", + "sha256:cf6346644e8fe234dc847e6232145dac199a650d3d8025b3ef65107221584ba4", + "sha256:de20eb1247725a2f889173d391a6d9e7e0f2540feda24030748283108b0478ec", + "sha256:eb2e76cd429058d8954121c334dddfcd11a6186c6975bca61f3f248c99031b05", + "sha256:f336e7014889c38c59029ebacc35c59236a852e4b23836708cfd3f43d1eaeed5", + "sha256:f4ddac5f59e78d04b20469bc43853a8e619bb6505c7eac8ffb343ff2c516d72f", + "sha256:f910d924da8b9fb066b5beae0b85e34ed1b6293014892baadcf2a51da1c65807", + "sha256:f91d35b3ef51d29d9c661069b9e4ba431ce283ffc533b981506889e144b5b40e", + "sha256:fb0304c1cd802e9a25743414c887e8a7cd51d96c9ec96d388625d2cd1c137ae3" + ], + "markers": "python_version >= '3.8'", + "version": "==3.7.0" + }, + "matplotlib-inline": { + "hashes": [ + "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311", + "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304" + ], + "markers": "python_version >= '3.5'", + "version": "==0.1.6" }, "memory-profiler": { "hashes": [ - "sha256:01385ac0fec944fcf7969814ec4406c6d8a9c66c079d09276723c5a7680f44e5" + "sha256:400348e61031e3942ad4d4109d18753b2fb08c2f6fb8290671c5513a34182d84", + "sha256:4e5b73d7864a1d1292fb76a03e82a3e78ef934d06828a698d9dada76da2067b0" ], "index": "pypi", - "version": "==0.58.0" + "version": "==0.61.0" + }, + "mistune": { + "hashes": [ + "sha256:0246113cb2492db875c6be56974a7c893333bf26cd92891c85f63151cee09d34", + "sha256:bad7f5d431886fcbaf5f758118ecff70d31f75231b34024a1341120340a65ce8" + ], + "version": "==2.0.5" }, "munch": { "hashes": [ @@ -238,11 +1038,108 @@ }, "natsort": { "hashes": [ - "sha256:5f5f4ea471d655b1b1611eef1cf0c6d3397095d2d3a1aab7098d6a50e4c3901a", - "sha256:a0a4fd71aee20a6d648da61e01180a63f7268e69983d0440bd3ad80ef1ba6981" + "sha256:04fe18fdd2b9e5957f19f687eb117f102ef8dde6b574764e536e91194bed4f5f", + "sha256:57f85b72c688b09e053cdac302dd5b5b53df5f73ae20b4874fcbffd8bf783d11" ], "index": "pypi", - "version": "==8.0.0" + "version": "==8.2.0" + }, + "nbclassic": { + "hashes": [ + "sha256:40f11bbcc59e8956c3d5ef132dec8e5a853e893ecf831e791d54da0d8a50d79d", + "sha256:6403a996562dadefa7fee9c49e17b663b5fd508241de5df655b90011cf3342d9" + ], + "markers": "python_version >= '3.7'", + "version": "==0.5.2" + }, + "nbclient": { + "hashes": [ + "sha256:884a3f4a8c4fc24bb9302f263e0af47d97f0d01fe11ba714171b320c8ac09547", + "sha256:d97ac6257de2794f5397609df754fcbca1a603e94e924eb9b99787c031ae2e7c" + ], + "markers": "python_version >= '3.7'", + "version": "==0.7.2" + }, + "nbconvert": { + "hashes": [ + "sha256:495638c5e06005f4a5ce828d8a81d28e34f95c20f4384d5d7a22254b443836e7", + "sha256:a42c3ac137c64f70cbe4d763111bf358641ea53b37a01a5c202ed86374af5234" + ], + "markers": "python_version >= '3.7'", + "version": "==7.2.9" + }, + "nbformat": { + "hashes": [ + "sha256:22a98a6516ca216002b0a34591af5bcb8072ca6c63910baffc901cfa07fefbf0", + "sha256:4b021fca24d3a747bf4e626694033d792d594705829e5e35b14ee3369f9f6477" + ], + "markers": "python_version >= '3.7'", + "version": "==5.7.3" + }, + "nest-asyncio": { + "hashes": [ + "sha256:b9a953fb40dceaa587d109609098db21900182b16440652454a146cffb06e8b8", + "sha256:d267cc1ff794403f7df692964d1d2a3fa9418ffea2a3f6859a439ff482fef290" + ], + "markers": "python_version >= '3.5'", + "version": "==1.5.6" + }, + "netcdf4": { + "hashes": [ + "sha256:02c2317f377f23005e447e4b187ebce3cdeae76b9d0a443d9d6f5156a3980f04", + "sha256:087d09594c0739b03a18ed058d6eca8dc80cc672961b231473bdf2c728112ec8", + "sha256:0bf6b4375d99ef6aa9b5d82bdd2a92113c130821461e10bf9831609783f43a2a", + "sha256:1dd21bff8e29476decfe42602ab40694f10be84b1a585bc5d28c8b08c19f7283", + "sha256:1ee78d5c129fcfeafd6d4a6339984d6108dbabfb8a2cec651f9dc2132c792c78", + "sha256:206af8b5d110b89132ddea9b7b162efe82c26db37bc7a7633110750685243195", + "sha256:2106c5eaacb8e4dd1ad9837cb020fdf1b9c9e5630cd6648a6ac760690f098eed", + "sha256:34355a0129f05e44b26be22e352af183dee7dcd4c6504b398c10ef89ca9f9303", + "sha256:35f527f9d22b23c93cbb6b20e07fe1e3e27ed5ac80a699c2b1f2d150d5db70ee", + "sha256:393f9c7e92f03e08f8093100396dddb8619a43cb37ad2ac6beb70aaa71226dd5", + "sha256:41c0ff220a16c894bf147ce2b1c0266610a5b94bf7aed66c5ee2e5aeaa6bfb20", + "sha256:41ff33837732c94a9446ebc1562bf5d954c43e1009f1429423da9151bee5bac1", + "sha256:43c95d05458b730a496bbfa52080f4133ec60f07b3994a5b7f45034c65c6d31b", + "sha256:4933f948dba82f4f95dcb4d0c4227ab4e085c35e33048009c77ec446a86b2d63", + "sha256:50821324f804cd99043b7273d15e65e204cadbf7ca8af27059fade765fb2c430", + "sha256:56808afaf4f8afd6d7f6dcdba5ee39d37160028066abba140e0210bdb1d50712", + "sha256:5eb58e4b4ad9b79b72fceb8a806b5236b6ce4d5053f7020cff367fcc0bebc765", + "sha256:5f520457cf48541edb83655567b5b17cdb2df19a305703f015a85e7f2aceb560", + "sha256:7d31d1b736d402b2779f703479d0fbdee59fc6e1e880d1af6eb36fded934a982", + "sha256:8164915dc40baa26c65d0e5661d96cd8cbc87157563cf364167d9ec82a6ef7f6", + "sha256:8bb9e8b073e25c44cdcc42947e945a5b950ea93a519ba28fec642a92a1c983a9", + "sha256:8f4352717660ea48df8c21920f29c708ba69b7002f609dc89036507913b82608", + "sha256:a0a47694a123647ff7ef4c16a863510ad640fd44e75f8e2ee5042578ad2a0d67", + "sha256:ad705a74449bcefeebb6ae41a5e5987ab29942f508e092151dac7508803e12e8", + "sha256:aff200ee07495c73f4257f55d19d13f3678d7f3171d48a85481706e06a6db0e9", + "sha256:c06f59f54f73aab86b123838fabe68c728503be981cad9f28283dcfb627f7023", + "sha256:c270e4e5161955a35a152348a659270540c85fb2f238c13601c8649cd992032f", + "sha256:c3056cc36669abd35b152b978a0bc54b622995d9d7165f8d380c134fa60e27d6", + "sha256:c6b0c6ae9f9a050e9878f700e87405044fb2ee9a730559d6feb87f2361b361c6", + "sha256:ca8f6f519b290b68892186cb966ee8d2287faec8463128fe6aaec188a0c5ae96", + "sha256:cb374f259d0a6d0e436294607d4bc5999f20f61e1a7d365b50e07898ea43a7de", + "sha256:d145f9c12da29da3922d8b8aafea2a2a89501bcb28a219a46b7b828b57191594", + "sha256:e8d64208e69cd5e67112eb4378066946124dcd1fbb7c2aa3d12d488f4d024759", + "sha256:f7b8b19971ff5e1dc5267f1f917667214c6614816f879ff6620bc152132c2629", + "sha256:fc489ecca6fb2e5ab738eedf962f9213dcde005b6f9e6f9b2d3d9d6513811759" + ], + "index": "pypi", + "version": "==1.5.7" + }, + "notebook": { + "hashes": [ + "sha256:c1897e5317e225fc78b45549a6ab4b668e4c996fd03a04e938fe5e7af2bfffd0", + "sha256:e04f9018ceb86e4fa841e92ea8fb214f8d23c1cedfde530cc96f92446924f0e4" + ], + "markers": "python_version >= '3.7'", + "version": "==6.5.2" + }, + "notebook-shim": { + "hashes": [ + "sha256:090e0baf9a5582ff59b607af523ca2db68ff216da0c69956b62cab2ef4fc9c3f", + "sha256:9c6c30f74c4fbea6fce55c1be58e7fd0409b1c681b075dcedceb005db5026949" + ], + "markers": "python_version >= '3.7'", + "version": "==0.2.2" }, "numba": { "hashes": [ @@ -269,49 +1166,77 @@ "index": "pypi", "version": "==0.50.1" }, + "numexpr": { + "hashes": [ + "sha256:059546e8f6283ccdb47c683101a890844f667fa6d56258d48ae2ecf1b3875957", + "sha256:17ac9cfe6d0078c5fc06ba1c1bbd20b8783f28c6f475bbabd3cad53683075cab", + "sha256:3f039321d1c17962c33079987b675fb251b273dbec0f51aac0934e932446ccc3", + "sha256:5538b30199bfc68886d2be18fcef3abd11d9271767a7a69ff3688defe782800a", + "sha256:655d84eb09adfee3c09ecf4a89a512225da153fdb7de13c447404b7d0523a9a7", + "sha256:6931b1e9d4f629f43c14b21d44f3f77997298bea43790cfcdb4dd98804f90783", + "sha256:6c368aa35ae9b18840e78b05f929d3a7b3abccdba9630a878c7db74ca2368339", + "sha256:6ee9db7598dd4001138b482342b96d78110dd77cefc051ec75af3295604dde6a", + "sha256:77898fdf3da6bb96aa8a4759a8231d763a75d848b2f2e5c5279dad0b243c8dfe", + "sha256:7bca95f4473b444428061d4cda8e59ac564dc7dc6a1dea3015af9805c6bc2946", + "sha256:7d71add384adc9119568d7e9ffa8a35b195decae81e0abf54a2b7779852f0637", + "sha256:845a6aa0ed3e2a53239b89c1ebfa8cf052d3cc6e053c72805e8153300078c0b1", + "sha256:90f12cc851240f7911a47c91aaf223dba753e98e46dff3017282e633602e76a7", + "sha256:9400781553541f414f82eac056f2b4c965373650df9694286b9bd7e8d413f8d8", + "sha256:9e34931089a6bafc77aaae21f37ad6594b98aa1085bb8b45d5b3cd038c3c17d9", + "sha256:9f096d707290a6a00b6ffdaf581ee37331109fb7b6c8744e9ded7c779a48e517", + "sha256:a38664e699526cb1687aefd9069e2b5b9387da7feac4545de446141f1ef86f46", + "sha256:a6d2d7740ae83ba5f3531e83afc4b626daa71df1ef903970947903345c37bd03", + "sha256:a75967d46b6bd56455dd32da6285e5ffabe155d0ee61eef685bbfb8dafb2e484", + "sha256:b076db98ca65eeaf9bd224576e3ac84c05e451c0bd85b13664b7e5f7b62e2c70", + "sha256:b318541bf3d8326682ebada087ba0050549a16d8b3fa260dd2585d73a83d20a7", + "sha256:b96334fc1748e9ec4f93d5fadb1044089d73fb08208fdb8382ed77c893f0be01", + "sha256:c867cc36cf815a3ec9122029874e00d8fbcef65035c4a5901e9b120dd5d626a2", + "sha256:d5432537418d18691b9115d615d6daa17ee8275baef3edf1afbbf8bc69806147", + "sha256:db93cf1842f068247de631bfc8af20118bf1f9447cd929b531595a5e0efc9346", + "sha256:df35324666b693f13a016bc7957de7cc4d8801b746b81060b671bf78a52b9037", + "sha256:df3a1f6b24214a1ab826e9c1c99edf1686c8e307547a9aef33910d586f626d01", + "sha256:eaec59e9bf70ff05615c34a8b8d6c7bd042bd9f55465d7b495ea5436f45319d0", + "sha256:f3a920bfac2645017110b87ddbe364c9c7a742870a4d2f6120b8786c25dc6db3", + "sha256:ff5835e8af9a212e8480003d731aad1727aaea909926fd009e8ae6a1cba7f141" + ], + "markers": "python_version >= '3.7'", + "version": "==2.8.4" + }, "numpy": { "hashes": [ - "sha256:0b78ecfa070460104934e2caf51694ccd00f37d5e5dbe76f021b1b0b0d221823", - "sha256:1247ef28387b7bb7f21caf2dbe4767f4f4175df44d30604d42ad9bd701ebb31f", - "sha256:1403b4e2181fc72664737d848b60e65150f272fe5a1c1cbc16145ed43884065a", - "sha256:170b2a0805c6891ca78c1d96ee72e4c3ed1ae0a992c75444b6ab20ff038ba2cd", - "sha256:2e4ed57f45f0aa38beca2a03b6532e70e548faf2debbeb3291cfc9b315d9be8f", - "sha256:32fe5b12061f6446adcbb32cf4060a14741f9c21e15aaee59a207b6ce6423469", - "sha256:34f3456f530ae8b44231c63082c8899fe9c983fd9b108c997c4b1c8c2d435333", - "sha256:4c9c23158b87ed0e70d9a50c67e5c0b3f75bcf2581a8e34668d4e9d7474d76c6", - "sha256:5d95668e727c75b3f5088ec7700e260f90ec83f488e4c0aaccb941148b2cd377", - "sha256:615d4e328af7204c13ae3d4df7615a13ff60a49cb0d9106fde07f541207883ca", - "sha256:69077388c5a4b997442b843dbdc3a85b420fb693ec8e33020bb24d647c164fa5", - "sha256:74b85a17528ca60cf98381a5e779fc0264b4a88b46025e6bcbe9621f46bb3e63", - "sha256:81225e58ef5fce7f1d80399575576fc5febec79a8a2742e8ef86d7b03beef49f", - "sha256:8890b3360f345e8360133bc078d2dacc2843b6ee6059b568781b15b97acbe39f", - "sha256:92aafa03da8658609f59f18722b88f0a73a249101169e28415b4fa148caf7e41", - "sha256:9864424631775b0c052f3bd98bc2712d131b3e2cd95d1c0c68b91709170890b0", - "sha256:9e6f5f50d1eff2f2f752b3089a118aee1ea0da63d56c44f3865681009b0af162", - "sha256:a3deb31bc84f2b42584b8c4001c85d1934dbfb4030827110bc36bfd11509b7bf", - "sha256:ad010846cdffe7ec27e3f933397f8a8d6c801a48634f419e3d075db27acf5880", - "sha256:b1e2312f5b8843a3e4e8224b2b48fe16119617b8fc0a54df8f50098721b5bed2", - "sha256:bc988afcea53e6156546e5b2885b7efab089570783d9d82caf1cfd323b0bb3dd", - "sha256:c449eb870616a7b62e097982c622d2577b3dbc800aaf8689254ec6e0197cbf1e", - "sha256:c74c699b122918a6c4611285cc2cad4a3aafdb135c22a16ec483340ef97d573c", - "sha256:c885bfc07f77e8fee3dc879152ba993732601f1f11de248d4f357f0ffea6a6d4", - "sha256:e3c3e990274444031482a31280bf48674441e0a5b55ddb168f3a6db3e0c38ec8", - "sha256:e4799be6a2d7d3c33699a6f77201836ac975b2e1b98c2a07f66a38f499cb50ce", - "sha256:e6c76a87633aa3fa16614b61ccedfae45b91df2767cf097aa9c933932a7ed1e0", - "sha256:e89717274b41ebd568cd7943fc9418eeb49b1785b66031bc8a7f6300463c5898", - "sha256:f5162ec777ba7138906c9c274353ece5603646c6965570d82905546579573f73", - "sha256:fde96af889262e85aa033f8ee1d3241e32bf36228318a61f1ace579df4e8170d" - ], - "markers": "python_version < '3.11' and python_version >= '3.7'", - "version": "==1.21.4" + "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207", + "sha256:1ce7ab2053e36c0a71e7a13a7475bd3b1f54750b4b433adc96313e127b870887", + "sha256:2d487e06ecbf1dc2f18e7efce82ded4f705f4bd0cd02677ffccfb39e5c284c7e", + "sha256:37431a77ceb9307c28382c9773da9f306435135fae6b80b62a11c53cfedd8802", + "sha256:3e1ffa4748168e1cc8d3cde93f006fe92b5421396221a02f2274aab6ac83b077", + "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af", + "sha256:43a8ca7391b626b4c4fe20aefe79fec683279e31e7c79716863b4b25021e0e74", + "sha256:4c6036521f11a731ce0648f10c18ae66d7143865f19f7299943c985cdc95afb5", + "sha256:59d55e634968b8f77d3fd674a3cf0b96e85147cd6556ec64ade018f27e9479e1", + "sha256:64f56fc53a2d18b1924abd15745e30d82a5782b2cab3429aceecc6875bd5add0", + "sha256:7228ad13744f63575b3a972d7ee4fd61815b2879998e70930d4ccf9ec721dce0", + "sha256:9ce7df0abeabe7fbd8ccbf343dc0db72f68549856b863ae3dd580255d009648e", + "sha256:a911e317e8c826ea632205e63ed8507e0dc877dcdc49744584dfc363df9ca08c", + "sha256:b89bf9b94b3d624e7bb480344e91f68c1c6c75f026ed6755955117de00917a7c", + "sha256:ba9ead61dfb5d971d77b6c131a9dbee62294a932bf6a356e48c75ae684e635b3", + "sha256:c1d937820db6e43bec43e8d016b9b3165dcb42892ea9f106c70fb13d430ffe72", + "sha256:cc7f00008eb7d3f2489fca6f334ec19ca63e31371be28fd5dad955b16ec285bd", + "sha256:d4c5d5eb2ec8da0b4f50c9a843393971f31f1d60be87e0fb0917a49133d257d6", + "sha256:e96d7f3096a36c8754207ab89d4b3282ba7b49ea140e4973591852c77d09eb76", + "sha256:f0725df166cf4785c0bc4cbfb320203182b1ecd30fee6e541c8752a92df6aa32", + "sha256:f3eb268dbd5cfaffd9448113539e44e2dd1c5ca9ce25576f7c04a5453edc26fa", + "sha256:fb7a980c81dd932381f8228a426df8aeb70d59bbcda2af075b627bbc50207cba" + ], + "index": "pypi", + "version": "==1.22.4" }, "packaging": { "hashes": [ - "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", - "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" + "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", + "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" ], - "markers": "python_version >= '3.6'", - "version": "==21.3" + "markers": "python_version >= '3.7'", + "version": "==23.0" }, "pandas": { "hashes": [ @@ -335,86 +1260,241 @@ "index": "pypi", "version": "==1.0.5" }, + "pandocfilters": { + "hashes": [ + "sha256:0b679503337d233b4339a817bfc8c50064e2eff681314376a47cb582305a7a38", + "sha256:33aae3f25fd1a026079f5d27bdd52496f0e0803b3469282162bafdcbdf6ef14f" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.5.0" + }, + "parso": { + "hashes": [ + "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0", + "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75" + ], + "markers": "python_version >= '3.6'", + "version": "==0.8.3" + }, + "pexpect": { + "hashes": [ + "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937", + "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c" + ], + "markers": "sys_platform != 'win32'", + "version": "==4.8.0" + }, + "pickleshare": { + "hashes": [ + "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", + "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56" + ], + "version": "==0.7.5" + }, "pillow": { "hashes": [ - "sha256:066f3999cb3b070a95c3652712cffa1a748cd02d60ad7b4e485c3748a04d9d76", - "sha256:0a0956fdc5defc34462bb1c765ee88d933239f9a94bc37d132004775241a7585", - "sha256:0b052a619a8bfcf26bd8b3f48f45283f9e977890263e4571f2393ed8898d331b", - "sha256:1394a6ad5abc838c5cd8a92c5a07535648cdf6d09e8e2d6df916dfa9ea86ead8", - "sha256:1bc723b434fbc4ab50bb68e11e93ce5fb69866ad621e3c2c9bdb0cd70e345f55", - "sha256:244cf3b97802c34c41905d22810846802a3329ddcb93ccc432870243211c79fc", - "sha256:25a49dc2e2f74e65efaa32b153527fc5ac98508d502fa46e74fa4fd678ed6645", - "sha256:2e4440b8f00f504ee4b53fe30f4e381aae30b0568193be305256b1462216feff", - "sha256:3862b7256046fcd950618ed22d1d60b842e3a40a48236a5498746f21189afbbc", - "sha256:3eb1ce5f65908556c2d8685a8f0a6e989d887ec4057326f6c22b24e8a172c66b", - "sha256:3f97cfb1e5a392d75dd8b9fd274d205404729923840ca94ca45a0af57e13dbe6", - "sha256:493cb4e415f44cd601fcec11c99836f707bb714ab03f5ed46ac25713baf0ff20", - "sha256:4acc0985ddf39d1bc969a9220b51d94ed51695d455c228d8ac29fcdb25810e6e", - "sha256:5503c86916d27c2e101b7f71c2ae2cddba01a2cf55b8395b0255fd33fa4d1f1a", - "sha256:5b7bb9de00197fb4261825c15551adf7605cf14a80badf1761d61e59da347779", - "sha256:5e9ac5f66616b87d4da618a20ab0a38324dbe88d8a39b55be8964eb520021e02", - "sha256:620582db2a85b2df5f8a82ddeb52116560d7e5e6b055095f04ad828d1b0baa39", - "sha256:62cc1afda735a8d109007164714e73771b499768b9bb5afcbbee9d0ff374b43f", - "sha256:70ad9e5c6cb9b8487280a02c0ad8a51581dcbbe8484ce058477692a27c151c0a", - "sha256:72b9e656e340447f827885b8d7a15fc8c4e68d410dc2297ef6787eec0f0ea409", - "sha256:72cbcfd54df6caf85cc35264c77ede902452d6df41166010262374155947460c", - "sha256:792e5c12376594bfcb986ebf3855aa4b7c225754e9a9521298e460e92fb4a488", - "sha256:7b7017b61bbcdd7f6363aeceb881e23c46583739cb69a3ab39cb384f6ec82e5b", - "sha256:81f8d5c81e483a9442d72d182e1fb6dcb9723f289a57e8030811bac9ea3fef8d", - "sha256:82aafa8d5eb68c8463b6e9baeb4f19043bb31fefc03eb7b216b51e6a9981ae09", - "sha256:84c471a734240653a0ec91dec0996696eea227eafe72a33bd06c92697728046b", - "sha256:8c803ac3c28bbc53763e6825746f05cc407b20e4a69d0122e526a582e3b5e153", - "sha256:93ce9e955cc95959df98505e4608ad98281fff037350d8c2671c9aa86bcf10a9", - "sha256:9a3e5ddc44c14042f0844b8cf7d2cd455f6cc80fd7f5eefbe657292cf601d9ad", - "sha256:a4901622493f88b1a29bd30ec1a2f683782e57c3c16a2dbc7f2595ba01f639df", - "sha256:a5a4532a12314149d8b4e4ad8ff09dde7427731fcfa5917ff16d0291f13609df", - "sha256:b8831cb7332eda5dc89b21a7bce7ef6ad305548820595033a4b03cf3091235ed", - "sha256:b8e2f83c56e141920c39464b852de3719dfbfb6e3c99a2d8da0edf4fb33176ed", - "sha256:c70e94281588ef053ae8998039610dbd71bc509e4acbc77ab59d7d2937b10698", - "sha256:c8a17b5d948f4ceeceb66384727dde11b240736fddeda54ca740b9b8b1556b29", - "sha256:d82cdb63100ef5eedb8391732375e6d05993b765f72cb34311fab92103314649", - "sha256:d89363f02658e253dbd171f7c3716a5d340a24ee82d38aab9183f7fdf0cdca49", - "sha256:d99ec152570e4196772e7a8e4ba5320d2d27bf22fdf11743dd882936ed64305b", - "sha256:ddc4d832a0f0b4c52fff973a0d44b6c99839a9d016fe4e6a1cb8f3eea96479c2", - "sha256:e3dacecfbeec9a33e932f00c6cd7996e62f53ad46fbe677577394aaa90ee419a", - "sha256:eb9fc393f3c61f9054e1ed26e6fe912c7321af2f41ff49d3f83d05bacf22cc78" + "sha256:013016af6b3a12a2f40b704677f8b51f72cb007dac785a9933d5c86a72a7fe33", + "sha256:0845adc64fe9886db00f5ab68c4a8cd933ab749a87747555cec1c95acea64b0b", + "sha256:0884ba7b515163a1a05440a138adeb722b8a6ae2c2b33aea93ea3118dd3a899e", + "sha256:09b89ddc95c248ee788328528e6a2996e09eaccddeeb82a5356e92645733be35", + "sha256:0dd4c681b82214b36273c18ca7ee87065a50e013112eea7d78c7a1b89a739153", + "sha256:0e51f608da093e5d9038c592b5b575cadc12fd748af1479b5e858045fff955a9", + "sha256:0f3269304c1a7ce82f1759c12ce731ef9b6e95b6df829dccd9fe42912cc48569", + "sha256:16a8df99701f9095bea8a6c4b3197da105df6f74e6176c5b410bc2df2fd29a57", + "sha256:19005a8e58b7c1796bc0167862b1f54a64d3b44ee5d48152b06bb861458bc0f8", + "sha256:1b4b4e9dda4f4e4c4e6896f93e84a8f0bcca3b059de9ddf67dac3c334b1195e1", + "sha256:28676836c7796805914b76b1837a40f76827ee0d5398f72f7dcc634bae7c6264", + "sha256:2968c58feca624bb6c8502f9564dd187d0e1389964898f5e9e1fbc8533169157", + "sha256:3f4cc516e0b264c8d4ccd6b6cbc69a07c6d582d8337df79be1e15a5056b258c9", + "sha256:3fa1284762aacca6dc97474ee9c16f83990b8eeb6697f2ba17140d54b453e133", + "sha256:43521ce2c4b865d385e78579a082b6ad1166ebed2b1a2293c3be1d68dd7ca3b9", + "sha256:451f10ef963918e65b8869e17d67db5e2f4ab40e716ee6ce7129b0cde2876eab", + "sha256:46c259e87199041583658457372a183636ae8cd56dbf3f0755e0f376a7f9d0e6", + "sha256:46f39cab8bbf4a384ba7cb0bc8bae7b7062b6a11cfac1ca4bc144dea90d4a9f5", + "sha256:519e14e2c49fcf7616d6d2cfc5c70adae95682ae20f0395e9280db85e8d6c4df", + "sha256:53dcb50fbdc3fb2c55431a9b30caeb2f7027fcd2aeb501459464f0214200a503", + "sha256:54614444887e0d3043557d9dbc697dbb16cfb5a35d672b7a0fcc1ed0cf1c600b", + "sha256:575d8912dca808edd9acd6f7795199332696d3469665ef26163cd090fa1f8bfa", + "sha256:5dd5a9c3091a0f414a963d427f920368e2b6a4c2f7527fdd82cde8ef0bc7a327", + "sha256:5f532a2ad4d174eb73494e7397988e22bf427f91acc8e6ebf5bb10597b49c493", + "sha256:60e7da3a3ad1812c128750fc1bc14a7ceeb8d29f77e0a2356a8fb2aa8925287d", + "sha256:653d7fb2df65efefbcbf81ef5fe5e5be931f1ee4332c2893ca638c9b11a409c4", + "sha256:6663977496d616b618b6cfa43ec86e479ee62b942e1da76a2c3daa1c75933ef4", + "sha256:6abfb51a82e919e3933eb137e17c4ae9c0475a25508ea88993bb59faf82f3b35", + "sha256:6c6b1389ed66cdd174d040105123a5a1bc91d0aa7059c7261d20e583b6d8cbd2", + "sha256:6d9dfb9959a3b0039ee06c1a1a90dc23bac3b430842dcb97908ddde05870601c", + "sha256:765cb54c0b8724a7c12c55146ae4647e0274a839fb6de7bcba841e04298e1011", + "sha256:7a21222644ab69ddd9967cfe6f2bb420b460dae4289c9d40ff9a4896e7c35c9a", + "sha256:7ac7594397698f77bce84382929747130765f66406dc2cd8b4ab4da68ade4c6e", + "sha256:7cfc287da09f9d2a7ec146ee4d72d6ea1342e770d975e49a8621bf54eaa8f30f", + "sha256:83125753a60cfc8c412de5896d10a0a405e0bd88d0470ad82e0869ddf0cb3848", + "sha256:847b114580c5cc9ebaf216dd8c8dbc6b00a3b7ab0131e173d7120e6deade1f57", + "sha256:87708d78a14d56a990fbf4f9cb350b7d89ee8988705e58e39bdf4d82c149210f", + "sha256:8a2b5874d17e72dfb80d917213abd55d7e1ed2479f38f001f264f7ce7bae757c", + "sha256:8f127e7b028900421cad64f51f75c051b628db17fb00e099eb148761eed598c9", + "sha256:94cdff45173b1919350601f82d61365e792895e3c3a3443cf99819e6fbf717a5", + "sha256:99d92d148dd03fd19d16175b6d355cc1b01faf80dae93c6c3eb4163709edc0a9", + "sha256:9a3049a10261d7f2b6514d35bbb7a4dfc3ece4c4de14ef5876c4b7a23a0e566d", + "sha256:9d9a62576b68cd90f7075876f4e8444487db5eeea0e4df3ba298ee38a8d067b0", + "sha256:9e5f94742033898bfe84c93c831a6f552bb629448d4072dd312306bab3bd96f1", + "sha256:a1c2d7780448eb93fbcc3789bf3916aa5720d942e37945f4056680317f1cd23e", + "sha256:a2e0f87144fcbbe54297cae708c5e7f9da21a4646523456b00cc956bd4c65815", + "sha256:a4dfdae195335abb4e89cc9762b2edc524f3c6e80d647a9a81bf81e17e3fb6f0", + "sha256:a96e6e23f2b79433390273eaf8cc94fec9c6370842e577ab10dabdcc7ea0a66b", + "sha256:aabdab8ec1e7ca7f1434d042bf8b1e92056245fb179790dc97ed040361f16bfd", + "sha256:b222090c455d6d1a64e6b7bb5f4035c4dff479e22455c9eaa1bdd4c75b52c80c", + "sha256:b52ff4f4e002f828ea6483faf4c4e8deea8d743cf801b74910243c58acc6eda3", + "sha256:b70756ec9417c34e097f987b4d8c510975216ad26ba6e57ccb53bc758f490dab", + "sha256:b8c2f6eb0df979ee99433d8b3f6d193d9590f735cf12274c108bd954e30ca858", + "sha256:b9b752ab91e78234941e44abdecc07f1f0d8f51fb62941d32995b8161f68cfe5", + "sha256:ba6612b6548220ff5e9df85261bddc811a057b0b465a1226b39bfb8550616aee", + "sha256:bd752c5ff1b4a870b7661234694f24b1d2b9076b8bf337321a814c612665f343", + "sha256:c3c4ed2ff6760e98d262e0cc9c9a7f7b8a9f61aa4d47c58835cdaf7b0b8811bb", + "sha256:c5c1362c14aee73f50143d74389b2c158707b4abce2cb055b7ad37ce60738d47", + "sha256:cb362e3b0976dc994857391b776ddaa8c13c28a16f80ac6522c23d5257156bed", + "sha256:d197df5489004db87d90b918033edbeee0bd6df3848a204bca3ff0a903bef837", + "sha256:d3b56206244dc8711f7e8b7d6cad4663917cd5b2d950799425076681e8766286", + "sha256:d5b2f8a31bd43e0f18172d8ac82347c8f37ef3e0b414431157718aa234991b28", + "sha256:d7081c084ceb58278dd3cf81f836bc818978c0ccc770cbbb202125ddabec6628", + "sha256:db74f5562c09953b2c5f8ec4b7dfd3f5421f31811e97d1dbc0a7c93d6e3a24df", + "sha256:df41112ccce5d47770a0c13651479fbcd8793f34232a2dd9faeccb75eb5d0d0d", + "sha256:e1339790c083c5a4de48f688b4841f18df839eb3c9584a770cbd818b33e26d5d", + "sha256:e621b0246192d3b9cb1dc62c78cfa4c6f6d2ddc0ec207d43c0dedecb914f152a", + "sha256:e8c5cf126889a4de385c02a2c3d3aba4b00f70234bfddae82a5eaa3ee6d5e3e6", + "sha256:e9d7747847c53a16a729b6ee5e737cf170f7a16611c143d95aa60a109a59c336", + "sha256:eaef5d2de3c7e9b21f1e762f289d17b726c2239a42b11e25446abf82b26ac132", + "sha256:ed3e4b4e1e6de75fdc16d3259098de7c6571b1a6cc863b1a49e7d3d53e036070", + "sha256:ef21af928e807f10bf4141cad4746eee692a0dd3ff56cfb25fce076ec3cc8abe", + "sha256:f09598b416ba39a8f489c124447b007fe865f786a89dbfa48bb5cf395693132a", + "sha256:f0caf4a5dcf610d96c3bd32932bfac8aee61c96e60481c2a0ea58da435e25acd", + "sha256:f6e78171be3fb7941f9910ea15b4b14ec27725865a73c15277bc39f5ca4f8391", + "sha256:f715c32e774a60a337b2bb8ad9839b4abf75b267a0f18806f6f4f5f1688c4b5a", + "sha256:fb5c1ad6bad98c57482236a21bf985ab0ef42bd51f7ad4e4538e89a997624e12" + ], + "markers": "python_version >= '3.7'", + "version": "==9.4.0" + }, + "pkgutil-resolve-name": { + "hashes": [ + "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174", + "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e" + ], + "markers": "python_version < '3.9'", + "version": "==1.3.10" + }, + "platformdirs": { + "hashes": [ + "sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9", + "sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567" + ], + "markers": "python_version >= '3.7'", + "version": "==3.0.0" + }, + "pluggy": { + "hashes": [ + "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", + "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3" ], "markers": "python_version >= '3.6'", - "version": "==8.4.0" + "version": "==1.0.0" + }, + "prometheus-client": { + "hashes": [ + "sha256:0836af6eb2c8f4fed712b2f279f6c0a8bbab29f9f4aa15276b91c7cb0d1616ab", + "sha256:a03e35b359f14dd1630898543e2120addfdeacd1a6069c1367ae90fd93ad3f48" + ], + "markers": "python_version >= '3.6'", + "version": "==0.16.0" + }, + "prompt-toolkit": { + "hashes": [ + "sha256:6a2948ec427dfcc7c983027b1044b355db6aaa8be374f54ad2015471f7d81c5b" + ], + "markers": "python_version >= '3.7'", + "version": "==3.0.37" }, "psutil": { "hashes": [ - "sha256:0066a82f7b1b37d334e68697faba68e5ad5e858279fd6351c8ca6024e8d6ba64", - "sha256:02b8292609b1f7fcb34173b25e48d0da8667bc85f81d7476584d889c6e0f2131", - "sha256:0ae6f386d8d297177fd288be6e8d1afc05966878704dad9847719650e44fc49c", - "sha256:0c9ccb99ab76025f2f0bbecf341d4656e9c1351db8cc8a03ccd62e318ab4b5c6", - "sha256:0dd4465a039d343925cdc29023bb6960ccf4e74a65ad53e768403746a9207023", - "sha256:12d844996d6c2b1d3881cfa6fa201fd635971869a9da945cf6756105af73d2df", - "sha256:1bff0d07e76114ec24ee32e7f7f8d0c4b0514b3fae93e3d2aaafd65d22502394", - "sha256:245b5509968ac0bd179287d91210cd3f37add77dad385ef238b275bad35fa1c4", - "sha256:28ff7c95293ae74bf1ca1a79e8805fcde005c18a122ca983abf676ea3466362b", - "sha256:36b3b6c9e2a34b7d7fbae330a85bf72c30b1c827a4366a07443fc4b6270449e2", - "sha256:52de075468cd394ac98c66f9ca33b2f54ae1d9bff1ef6b67a212ee8f639ec06d", - "sha256:5da29e394bdedd9144c7331192e20c1f79283fb03b06e6abd3a8ae45ffecee65", - "sha256:61f05864b42fedc0771d6d8e49c35f07efd209ade09a5afe6a5059e7bb7bf83d", - "sha256:6223d07a1ae93f86451d0198a0c361032c4c93ebd4bf6d25e2fb3edfad9571ef", - "sha256:6323d5d845c2785efb20aded4726636546b26d3b577aded22492908f7c1bdda7", - "sha256:6ffe81843131ee0ffa02c317186ed1e759a145267d54fdef1bc4ea5f5931ab60", - "sha256:74f2d0be88db96ada78756cb3a3e1b107ce8ab79f65aa885f76d7664e56928f6", - "sha256:74fb2557d1430fff18ff0d72613c5ca30c45cdbfcddd6a5773e9fc1fe9364be8", - "sha256:90d4091c2d30ddd0a03e0b97e6a33a48628469b99585e2ad6bf21f17423b112b", - "sha256:90f31c34d25b1b3ed6c40cdd34ff122b1887a825297c017e4cbd6796dd8b672d", - "sha256:99de3e8739258b3c3e8669cb9757c9a861b2a25ad0955f8e53ac662d66de61ac", - "sha256:c6a5fd10ce6b6344e616cf01cc5b849fa8103fbb5ba507b6b2dee4c11e84c935", - "sha256:ce8b867423291cb65cfc6d9c4955ee9bfc1e21fe03bb50e177f2b957f1c2469d", - "sha256:d225cd8319aa1d3c85bf195c4e07d17d3cd68636b8fc97e6cf198f782f99af28", - "sha256:ea313bb02e5e25224e518e4352af4bf5e062755160f77e4b1767dd5ccb65f876", - "sha256:ea372bcc129394485824ae3e3ddabe67dc0b118d262c568b4d2602a7070afdb0", - "sha256:f4634b033faf0d968bb9220dd1c793b897ab7f1189956e1aa9eae752527127d3", - "sha256:fcc01e900c1d7bee2a37e5d6e4f9194760a93597c97fee89c4ae51701de03563" - ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==5.8.0" + "sha256:149555f59a69b33f056ba1c4eb22bb7bf24332ce631c44a319cec09f876aaeff", + "sha256:16653106f3b59386ffe10e0bad3bb6299e169d5327d3f187614b1cb8f24cf2e1", + "sha256:3d7f9739eb435d4b1338944abe23f49584bde5395f27487d2ee25ad9a8774a62", + "sha256:3ff89f9b835100a825b14c2808a106b6fdcc4b15483141482a12c725e7f78549", + "sha256:54c0d3d8e0078b7666984e11b12b88af2db11d11249a8ac8920dd5ef68a66e08", + "sha256:54d5b184728298f2ca8567bf83c422b706200bcbbfafdc06718264f9393cfeb7", + "sha256:6001c809253a29599bc0dfd5179d9f8a5779f9dffea1da0f13c53ee568115e1e", + "sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe", + "sha256:6b92c532979bafc2df23ddc785ed116fced1f492ad90a6830cf24f4d1ea27d24", + "sha256:852dd5d9f8a47169fe62fd4a971aa07859476c2ba22c2254d4a1baa4e10b95ad", + "sha256:9120cd39dca5c5e1c54b59a41d205023d436799b1c8c4d3ff71af18535728e94", + "sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8", + "sha256:efeae04f9516907be44904cc7ce08defb6b665128992a56957abc9b61dca94b7", + "sha256:fd8522436a6ada7b4aad6638662966de0d61d241cb821239b2ae7013d41a43d4" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==5.9.4" + }, + "psycopg2-binary": { + "hashes": [ + "sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c", + "sha256:0e4dc3d5996760104746e6cfcdb519d9d2cd27c738296525d5867ea695774e67", + "sha256:11b9c0ebce097180129e422379b824ae21c8f2a6596b159c7659e2e5a00e1aa0", + "sha256:15978a1fbd225583dd8cdaf37e67ccc278b5abecb4caf6b2d6b8e2b948e953f6", + "sha256:1fabed9ea2acc4efe4671b92c669a213db744d2af8a9fc5d69a8e9bc14b7a9db", + "sha256:2dac98e85565d5688e8ab7bdea5446674a83a3945a8f416ad0110018d1501b94", + "sha256:42ec1035841b389e8cc3692277a0bd81cdfe0b65d575a2c8862cec7a80e62e52", + "sha256:6422f2ff0919fd720195f64ffd8f924c1395d30f9a495f31e2392c2efafb5056", + "sha256:6a32f3a4cb2f6e1a0b15215f448e8ce2da192fd4ff35084d80d5e39da683e79b", + "sha256:7312e931b90fe14f925729cde58022f5d034241918a5c4f9797cac62f6b3a9dd", + "sha256:7d92a09b788cbb1aec325af5fcba9fed7203897bbd9269d5691bb1e3bce29550", + "sha256:833709a5c66ca52f1d21d41865a637223b368c0ee76ea54ca5bad6f2526c7679", + "sha256:89705f45ce07b2dfa806ee84439ec67c5d9a0ef20154e0e475e2b2ed392a5b83", + "sha256:8cd0fb36c7412996859cb4606a35969dd01f4ea34d9812a141cd920c3b18be77", + "sha256:950bc22bb56ee6ff142a2cb9ee980b571dd0912b0334aa3fe0fe3788d860bea2", + "sha256:a0c50db33c32594305b0ef9abc0cb7db13de7621d2cadf8392a1d9b3c437ef77", + "sha256:a0eb43a07386c3f1f1ebb4dc7aafb13f67188eab896e7397aa1ee95a9c884eb2", + "sha256:aaa4213c862f0ef00022751161df35804127b78adf4a2755b9f991a507e425fd", + "sha256:ac0c682111fbf404525dfc0f18a8b5f11be52657d4f96e9fcb75daf4f3984859", + "sha256:ad20d2eb875aaa1ea6d0f2916949f5c08a19c74d05b16ce6ebf6d24f2c9f75d1", + "sha256:b4afc542c0ac0db720cf516dd20c0846f71c248d2b3d21013aa0d4ef9c71ca25", + "sha256:b8a3715b3c4e604bcc94c90a825cd7f5635417453b253499664f784fc4da0152", + "sha256:ba28584e6bca48c59eecbf7efb1576ca214b47f05194646b081717fa628dfddf", + "sha256:ba381aec3a5dc29634f20692349d73f2d21f17653bda1decf0b52b11d694541f", + "sha256:bd1be66dde2b82f80afb9459fc618216753f67109b859a361cf7def5c7968729", + "sha256:c2507d796fca339c8fb03216364cca68d87e037c1f774977c8fc377627d01c71", + "sha256:cec7e622ebc545dbb4564e483dd20e4e404da17ae07e06f3e780b2dacd5cee66", + "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", + "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", + "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", + "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", + "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", + "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", + "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", + "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" + ], + "index": "pypi", + "version": "==2.8.6" + }, + "ptyprocess": { + "hashes": [ + "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", + "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" + ], + "markers": "os_name != 'nt'", + "version": "==0.7.0" + }, + "pure-eval": { + "hashes": [ + "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", + "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" + ], + "version": "==0.2.2" + }, + "pycparser": { + "hashes": [ + "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9", + "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206" + ], + "version": "==2.21" }, "pygeos": { "hashes": [ @@ -438,13 +1518,21 @@ "index": "pypi", "version": "==0.7.1" }, - "pyparsing": { + "pygments": { "hashes": [ - "sha256:04ff808a5b90911829c55c4e26f75fa5ca8a2f5f36aa3a51f68e27033341d3e4", - "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81" + "sha256:b3ed06a9e8ac9a9aae5a6f5dbe78a8a58655d17b43b93c078f094ddc476ae297", + "sha256:fa7bd7bd2771287c0de303af8bfdfc731f51bd2c6a47ab69d117138893b82717" ], "markers": "python_version >= '3.6'", - "version": "==3.0.6" + "version": "==2.14.0" + }, + "pyparsing": { + "hashes": [ + "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb", + "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc" + ], + "markers": "python_full_version >= '3.6.8'", + "version": "==3.0.9" }, "pyproj": { "hashes": [ @@ -471,6 +1559,47 @@ "index": "pypi", "version": "==3.1.0" }, + "pyrsistent": { + "hashes": [ + "sha256:016ad1afadf318eb7911baa24b049909f7f3bb2c5b1ed7b6a8f21db21ea3faa8", + "sha256:1a2994773706bbb4995c31a97bc94f1418314923bd1048c6d964837040376440", + "sha256:20460ac0ea439a3e79caa1dbd560344b64ed75e85d8703943e0b66c2a6150e4a", + "sha256:3311cb4237a341aa52ab8448c27e3a9931e2ee09561ad150ba94e4cfd3fc888c", + "sha256:3a8cb235fa6d3fd7aae6a4f1429bbb1fec1577d978098da1252f0489937786f3", + "sha256:3ab2204234c0ecd8b9368dbd6a53e83c3d4f3cab10ecaf6d0e772f456c442393", + "sha256:42ac0b2f44607eb92ae88609eda931a4f0dfa03038c44c772e07f43e738bcac9", + "sha256:49c32f216c17148695ca0e02a5c521e28a4ee6c5089f97e34fe24163113722da", + "sha256:4b774f9288dda8d425adb6544e5903f1fb6c273ab3128a355c6b972b7df39dcf", + "sha256:4c18264cb84b5e68e7085a43723f9e4c1fd1d935ab240ce02c0324a8e01ccb64", + "sha256:5a474fb80f5e0d6c9394d8db0fc19e90fa540b82ee52dba7d246a7791712f74a", + "sha256:64220c429e42a7150f4bfd280f6f4bb2850f95956bde93c6fda1b70507af6ef3", + "sha256:878433581fc23e906d947a6814336eee031a00e6defba224234169ae3d3d6a98", + "sha256:99abb85579e2165bd8522f0c0138864da97847875ecbd45f3e7e2af569bfc6f2", + "sha256:a2471f3f8693101975b1ff85ffd19bb7ca7dd7c38f8a81701f67d6b4f97b87d8", + "sha256:aeda827381f5e5d65cced3024126529ddc4289d944f75e090572c77ceb19adbf", + "sha256:b735e538f74ec31378f5a1e3886a26d2ca6351106b4dfde376a26fc32a044edc", + "sha256:c147257a92374fde8498491f53ffa8f4822cd70c0d85037e09028e478cababb7", + "sha256:c4db1bd596fefd66b296a3d5d943c94f4fac5bcd13e99bffe2ba6a759d959a28", + "sha256:c74bed51f9b41c48366a286395c67f4e894374306b197e62810e0fdaf2364da2", + "sha256:c9bb60a40a0ab9aba40a59f68214eed5a29c6274c83b2cc206a359c4a89fa41b", + "sha256:cc5d149f31706762c1f8bda2e8c4f8fead6e80312e3692619a75301d3dbb819a", + "sha256:ccf0d6bd208f8111179f0c26fdf84ed7c3891982f2edaeae7422575f47e66b64", + "sha256:e42296a09e83028b3476f7073fcb69ffebac0e66dbbfd1bd847d61f74db30f19", + "sha256:e8f2b814a3dc6225964fa03d8582c6e0b6650d68a232df41e3cc1b66a5d2f8d1", + "sha256:f0774bf48631f3a20471dd7c5989657b639fd2d285b861237ea9e82c36a415a9", + "sha256:f0e7c4b2f77593871e918be000b96c8107da48444d57005b6a6bc61fb4331b2c" + ], + "markers": "python_version >= '3.7'", + "version": "==0.19.3" + }, + "pytest": { + "hashes": [ + "sha256:c7c6ca206e93355074ae32f7403e8ea12163b1163c976fee7d4d84027c162be5", + "sha256:d45e0952f3727241918b8fd0f376f5ff6b301cc0777c6f9a556935c92d8a7d42" + ], + "index": "pypi", + "version": "==7.2.1" + }, "python-dateutil": { "hashes": [ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", @@ -481,18 +1610,171 @@ }, "python-dotenv": { "hashes": [ - "sha256:32b2bdc1873fd3a3c346da1c6db83d0053c3c62f28f1f38516070c4c8971b1d3", - "sha256:a5de49a31e953b45ff2d2fd434bbc2670e8db5273606c1e737cc6b93eff3655f" + "sha256:1c93de8f636cde3ce377292818d0e440b6e45a82f215c3744979151fa8151c49", + "sha256:41e12e0318bebc859fcc4d97d4db8d20ad21721a6aa5047dd59f090391cb549a" ], "index": "pypi", - "version": "==0.19.2" + "version": "==0.21.1" + }, + "python-json-logger": { + "hashes": [ + "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c", + "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd" + ], + "markers": "python_version >= '3.6'", + "version": "==2.0.7" }, "pytz": { "hashes": [ - "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c", - "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326" + "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0", + "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a" ], - "version": "==2021.3" + "version": "==2022.7.1" + }, + "pyyaml": { + "hashes": [ + "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf", + "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", + "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", + "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", + "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b", + "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4", + "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07", + "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba", + "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9", + "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", + "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", + "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", + "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782", + "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", + "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", + "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", + "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", + "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", + "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1", + "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", + "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", + "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", + "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", + "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", + "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", + "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d", + "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", + "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", + "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7", + "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", + "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", + "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", + "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358", + "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", + "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", + "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", + "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", + "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f", + "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", + "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" + ], + "markers": "python_version >= '3.6'", + "version": "==6.0" + }, + "pyzmq": { + "hashes": [ + "sha256:00c94fd4c9dd3c95aace0c629a7fa713627a5c80c1819326b642adf6c4b8e2a2", + "sha256:01d53958c787cfea34091fcb8ef36003dbb7913b8e9f8f62a0715234ebc98b70", + "sha256:0282bba9aee6e0346aa27d6c69b5f7df72b5a964c91958fc9e0c62dcae5fdcdc", + "sha256:02f5cb60a7da1edd5591a15efa654ffe2303297a41e1b40c3c8942f8f11fc17c", + "sha256:0645b5a2d2a06fd8eb738018490c514907f7488bf9359c6ee9d92f62e844b76f", + "sha256:0a154ef810d44f9d28868be04641f837374a64e7449df98d9208e76c260c7ef1", + "sha256:0a90b2480a26aef7c13cff18703ba8d68e181facb40f78873df79e6d42c1facc", + "sha256:0e8d00228db627ddd1b418c7afd81820b38575f237128c9650365f2dd6ac3443", + "sha256:17e1cb97d573ea84d7cd97188b42ca6f611ab3ee600f6a75041294ede58e3d20", + "sha256:183e18742be3621acf8908903f689ec520aee3f08449bfd29f583010ca33022b", + "sha256:1f6116991568aac48b94d6d8aaed6157d407942ea385335a6ed313692777fb9d", + "sha256:20638121b0bdc80777ce0ec8c1f14f1ffec0697a1f88f0b564fa4a23078791c4", + "sha256:2754fa68da08a854f4816e05160137fa938a2347276471103d31e04bcee5365c", + "sha256:28bcb2e66224a7ac2843eb632e4109d6b161479e7a2baf24e37210461485b4f1", + "sha256:293a7c2128690f496057f1f1eb6074f8746058d13588389981089ec45d8fdc77", + "sha256:2a73af6504e0d2805e926abf136ebf536735a13c22f709be7113c2ec65b4bec3", + "sha256:2d05d904f03ddf1e0d83d97341354dfe52244a619b5a1440a5f47a5b3451e84e", + "sha256:2e7b87638ee30ab13230e37ce5331b3e730b1e0dda30120b9eeec3540ed292c8", + "sha256:3100dddcada66ec5940ed6391ebf9d003cc3ede3d320748b2737553019f58230", + "sha256:31e523d067ce44a04e876bed3ff9ea1ff8d1b6636d16e5fcace9d22f8c564369", + "sha256:3594c0ff604e685d7e907860b61d0e10e46c74a9ffca168f6e9e50ea934ee440", + "sha256:3670e8c5644768f214a3b598fe46378a4a6f096d5fb82a67dfd3440028460565", + "sha256:4046d03100aca266e70d54a35694cb35d6654cfbef633e848b3c4a8d64b9d187", + "sha256:4725412e27612f0d7d7c2f794d89807ad0227c2fc01dd6146b39ada49c748ef9", + "sha256:484c2c4ee02c1edc07039f42130bd16e804b1fe81c4f428e0042e03967f40c20", + "sha256:487305c2a011fdcf3db1f24e8814bb76d23bc4d2f46e145bc80316a59a9aa07d", + "sha256:4a1bc30f0c18444d51e9b0d0dd39e3a4e7c53ee74190bebef238cd58de577ea9", + "sha256:4c25c95416133942280faaf068d0fddfd642b927fb28aaf4ab201a738e597c1e", + "sha256:4cbb885f347eba7ab7681c450dee5b14aed9f153eec224ec0c3f299273d9241f", + "sha256:4d3d604fe0a67afd1aff906e54da557a5203368a99dcc50a70eef374f1d2abef", + "sha256:4e295f7928a31ae0f657e848c5045ba6d693fe8921205f408ca3804b1b236968", + "sha256:5049e75cc99db65754a3da5f079230fb8889230cf09462ec972d884d1704a3ed", + "sha256:5050f5c50b58a6e38ccaf9263a356f74ef1040f5ca4030225d1cb1a858c5b7b6", + "sha256:526f884a27e8bba62fe1f4e07c62be2cfe492b6d432a8fdc4210397f8cf15331", + "sha256:531866c491aee5a1e967c286cfa470dffac1e2a203b1afda52d62b58782651e9", + "sha256:5605621f2181f20b71f13f698944deb26a0a71af4aaf435b34dd90146092d530", + "sha256:58fc3ad5e1cfd2e6d24741fbb1e216b388115d31b0ca6670f894187f280b6ba6", + "sha256:60ecbfe7669d3808ffa8a7dd1487d6eb8a4015b07235e3b723d4b2a2d4de7203", + "sha256:610d2d112acd4e5501fac31010064a6c6efd716ceb968e443cae0059eb7b86de", + "sha256:6136bfb0e5a9cf8c60c6ac763eb21f82940a77e6758ea53516c8c7074f4ff948", + "sha256:62b9e80890c0d2408eb42d5d7e1fc62a5ce71be3288684788f74cf3e59ffd6e2", + "sha256:656281d496aaf9ca4fd4cea84e6d893e3361057c4707bd38618f7e811759103c", + "sha256:66509c48f7446b640eeae24b60c9c1461799a27b1b0754e438582e36b5af3315", + "sha256:6bf3842af37af43fa953e96074ebbb5315f6a297198f805d019d788a1021dbc8", + "sha256:731b208bc9412deeb553c9519dca47136b5a01ca66667cafd8733211941b17e4", + "sha256:75243e422e85a62f0ab7953dc315452a56b2c6a7e7d1a3c3109ac3cc57ed6b47", + "sha256:7877264aa851c19404b1bb9dbe6eed21ea0c13698be1eda3784aab3036d1c861", + "sha256:81f99fb1224d36eb91557afec8cdc2264e856f3464500b55749020ce4c848ef2", + "sha256:8539216173135e9e89f6b1cc392e74e6b935b91e8c76106cf50e7a02ab02efe5", + "sha256:85456f0d8f3268eecd63dede3b99d5bd8d3b306310c37d4c15141111d22baeaf", + "sha256:866eabf7c1315ef2e93e34230db7cbf672e0d7c626b37c11f7e870c8612c3dcc", + "sha256:926236ca003aec70574754f39703528947211a406f5c6c8b3e50eca04a9e87fc", + "sha256:930e6ad4f2eaac31a3d0c2130619d25db754b267487ebc186c6ad18af2a74018", + "sha256:94f0a7289d0f5c80807c37ebb404205e7deb737e8763eb176f4770839ee2a287", + "sha256:9a2d5e419bd39a1edb6cdd326d831f0120ddb9b1ff397e7d73541bf393294973", + "sha256:9ca6db34b26c4d3e9b0728841ec9aa39484eee272caa97972ec8c8e231b20c7e", + "sha256:9f72ea279b2941a5203e935a4588b9ba8a48aeb9a926d9dfa1986278bd362cb8", + "sha256:a0e7ef9ac807db50b4eb6f534c5dcc22f998f5dae920cc28873d2c1d080a4fc9", + "sha256:a1cd4a95f176cdc0ee0a82d49d5830f13ae6015d89decbf834c273bc33eeb3d3", + "sha256:a9c464cc508177c09a5a6122b67f978f20e2954a21362bf095a0da4647e3e908", + "sha256:ac97e7d647d5519bcef48dd8d3d331f72975afa5c4496c95f6e854686f45e2d9", + "sha256:af1fbfb7ad6ac0009ccee33c90a1d303431c7fb594335eb97760988727a37577", + "sha256:b055a1cddf8035966ad13aa51edae5dc8f1bba0b5d5e06f7a843d8b83dc9b66b", + "sha256:b6f75b4b8574f3a8a0d6b4b52606fc75b82cb4391471be48ab0b8677c82f9ed4", + "sha256:b90bb8dfbbd138558f1f284fecfe328f7653616ff9a972433a00711d9475d1a9", + "sha256:be05504af0619d1cffa500af1e0ede69fb683f301003851f5993b5247cc2c576", + "sha256:c21a5f4e54a807df5afdef52b6d24ec1580153a6bcf0607f70a6e1d9fa74c5c3", + "sha256:c48f257da280b3be6c94e05bd575eddb1373419dbb1a72c3ce64e88f29d1cd6d", + "sha256:cac602e02341eaaf4edfd3e29bd3fdef672e61d4e6dfe5c1d065172aee00acee", + "sha256:ccb3e1a863222afdbda42b7ca8ac8569959593d7abd44f5a709177d6fa27d266", + "sha256:e1081d7030a1229c8ff90120346fb7599b54f552e98fcea5170544e7c6725aab", + "sha256:e14df47c1265356715d3d66e90282a645ebc077b70b3806cf47efcb7d1d630cb", + "sha256:e4bba04ea779a3d7ef25a821bb63fd0939142c88e7813e5bd9c6265a20c523a2", + "sha256:e99629a976809fe102ef73e856cf4b2660acd82a412a51e80ba2215e523dfd0a", + "sha256:f330a1a2c7f89fd4b0aa4dcb7bf50243bf1c8da9a2f1efc31daf57a2046b31f2", + "sha256:f3f96d452e9580cb961ece2e5a788e64abaecb1232a80e61deffb28e105ff84a", + "sha256:fc7c1421c5b1c916acf3128bf3cc7ea7f5018b58c69a6866d70c14190e600ce9" + ], + "markers": "python_version >= '3.6'", + "version": "==25.0.0" + }, + "qtconsole": { + "hashes": [ + "sha256:57748ea2fd26320a0b77adba20131cfbb13818c7c96d83fafcb110ff55f58b35", + "sha256:be13560c19bdb3b54ed9741a915aa701a68d424519e8341ac479a91209e694b2" + ], + "markers": "python_version >= '3.7'", + "version": "==5.4.0" + }, + "qtpy": { + "hashes": [ + "sha256:0603c9c83ccc035a4717a12908bf6bc6cb22509827ea2ec0e94c2da7c9ed57c5", + "sha256:8d6d544fc20facd27360ea189592e6135c614785f0dec0b4f083289de6beb408" + ], + "markers": "python_version >= '3.7'", + "version": "==2.3.0" }, "rasterio": { "hashes": [ @@ -518,6 +1800,30 @@ "index": "pypi", "version": "==0.15.0" }, + "requests": { + "hashes": [ + "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa", + "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf" + ], + "markers": "python_version >= '3.7' and python_version < '4.0'", + "version": "==2.28.2" + }, + "rfc3339-validator": { + "hashes": [ + "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b", + "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.1.4" + }, + "rfc3986-validator": { + "hashes": [ + "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9", + "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.1.1" + }, "richdem": { "hashes": [ "sha256:0c5988b58bea942a31ea9f5c5502a6a34aacad5cd1ab6b5e32ac627630a36b9b", @@ -537,35 +1843,40 @@ "index": "pypi", "version": "==0.3.4" }, + "s3transfer": { + "hashes": [ + "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd", + "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947" + ], + "markers": "python_version >= '3.7'", + "version": "==0.6.0" + }, "scipy": { "hashes": [ - "sha256:1437073f1d4664990879aa8f9547524764372e0fef84a077be4b19e82bba7a8d", - "sha256:17fd991a275e4283453f89d404209aa92059ac68d76d804b4bc1716a3742e1b5", - "sha256:1ea6233f5a365cb7945b4304bd06323ece3ece85d6a3fa8598d2f53e513467c9", - "sha256:2d25272c03ee3c0fe5e0dff1bb7889280bb6c9e1766fa9c7bde81ad8a5f78694", - "sha256:30bdda199667e74b50208a793eb1ba47a04e5e3fa16f5ff06c6f7969ae78e4da", - "sha256:359b60a0cccd17723b9d5e329a5212a710e771a3ddde800e472fb93732756c46", - "sha256:39f838ea5ce8da868785193d88d05cf5a6d5c390804ec99de29a28e1dcdd53e6", - "sha256:4d175ba93e00d8eef8f7cd70d4d88a9106a86800c82ea03cf2268c36d6545483", - "sha256:5273d832fb9cd5724ee0d335c16a903b923441107dd973d27fc4293075a9f4e3", - "sha256:54951f51d731c832b1b8885e0a92e89f33d087de7e40d02078bf0d49c7cbdbb5", - "sha256:74f518ce542533054695f743e4271cb8986b63f95bb51d70fcee4f3929cbff7d", - "sha256:7b1d0f5f524518f1a86f288443528e4ff4a739c0966db663af4129b7ac7849f8", - "sha256:82c5befebf54d799d77e5f0205c03030f57f69ba2541baa44d2e6ad138c28cd3", - "sha256:8482c8e45857ab0a5446eb7460d2307a27cbbe659d6d2257820c6d6eb950fd0f", - "sha256:87cf3964db0f1cce17aeed5bfc1b89a6b4b07dbfc48e50d21fa3549e00456803", - "sha256:8b5726a0fedeaa6beb1095e4466998bdd1d1e960b28db9b5a16c89cbd7b2ebf1", - "sha256:97eb573e361a73a553b915dc195c6f72a08249964b1a33f157f9659f3b6210d1", - "sha256:a80eb01c43fd98257ec7a49ff5cec0edba32031b5f86503f55399a48cb2c5379", - "sha256:cac71d5476a6f56b50459da21f6221707e0051ebd428b2137db32ef4a43bb15e", - "sha256:d86abd1ddf421dea5e9cebfeb4de0d205b3dc04e78249afedba9c6c3b2227ff2", - "sha256:dc2d1bf41294e63c7302bf499973ac0c7f73c93c01763db43055f6525234bf11", - "sha256:e08b81fcd9bf98740b58dc6fdd7879e33a64dcb682201c1135f7d4a75216bb05", - "sha256:e3efe7ef75dfe627b354ab0af0dbc918eadee97cc80ff1aabea6d3e01114ebdd", - "sha256:fa2dbabaaecdb502641b0b3c00dec05fb475ae48655c66da16c9ed24eda1e711" - ], - "markers": "python_version < '3.11' and python_version >= '3.7'", - "version": "==1.7.2" + "sha256:049a8bbf0ad95277ffba9b3b7d23e5369cc39e66406d60422c8cfef40ccc8415", + "sha256:07c3457ce0b3ad5124f98a86533106b643dd811dd61b548e78cf4c8786652f6f", + "sha256:0f1564ea217e82c1bbe75ddf7285ba0709ecd503f048cb1236ae9995f64217bd", + "sha256:1553b5dcddd64ba9a0d95355e63fe6c3fc303a8fd77c7bc91e77d61363f7433f", + "sha256:15a35c4242ec5f292c3dd364a7c71a61be87a3d4ddcc693372813c0b73c9af1d", + "sha256:1b4735d6c28aad3cdcf52117e0e91d6b39acd4272f3f5cd9907c24ee931ad601", + "sha256:2cf9dfb80a7b4589ba4c40ce7588986d6d5cebc5457cad2c2880f6bc2d42f3a5", + "sha256:39becb03541f9e58243f4197584286e339029e8908c46f7221abeea4b749fa88", + "sha256:43b8e0bcb877faf0abfb613d51026cd5cc78918e9530e375727bf0625c82788f", + "sha256:4b3f429188c66603a1a5c549fb414e4d3bdc2a24792e061ffbd607d3d75fd84e", + "sha256:4c0ff64b06b10e35215abce517252b375e580a6125fd5fdf6421b98efbefb2d2", + "sha256:51af417a000d2dbe1ec6c372dfe688e041a7084da4fdd350aeb139bd3fb55353", + "sha256:5678f88c68ea866ed9ebe3a989091088553ba12c6090244fdae3e467b1139c35", + "sha256:79c8e5a6c6ffaf3a2262ef1be1e108a035cf4f05c14df56057b64acc5bebffb6", + "sha256:7ff7f37b1bf4417baca958d254e8e2875d0cc23aaadbe65b3d5b3077b0eb23ea", + "sha256:aaea0a6be54462ec027de54fca511540980d1e9eea68b2d5c1dbfe084797be35", + "sha256:bce5869c8d68cf383ce240e44c1d9ae7c06078a9396df68ce88a1230f93a30c1", + "sha256:cd9f1027ff30d90618914a64ca9b1a77a431159df0e2a195d8a9e8a04c78abf9", + "sha256:d925fa1c81b772882aa55bcc10bf88324dadb66ff85d548c71515f6689c6dac5", + "sha256:e7354fd7527a4b0377ce55f286805b34e8c54b91be865bac273f527e1b839019", + "sha256:fae8a7b898c42dffe3f7361c40d5952b6bf32d10c4569098d276b4c547905ee1" + ], + "markers": "python_version < '3.12' and python_version >= '3.8'", + "version": "==1.10.1" }, "seaborn": { "hashes": [ @@ -575,13 +1886,20 @@ "index": "pypi", "version": "==0.11.0" }, - "setuptools-scm": { + "send2trash": { "hashes": [ - "sha256:4c64444b1d49c4063ae60bfe1680f611c8b13833d556fd1d6050c0023162a119", - "sha256:a49aa8081eeb3514eb9728fa5040f2eaa962d6c6f4ec9c32f6c1fba88f88a0f2" + "sha256:d2c24762fd3759860a0aff155e45871447ea58d2be6bdd39b5c8f966a0c99c2d", + "sha256:f20eaadfdb517eaca5ce077640cb261c7d2698385a6a0f072a4a5447fd49fa08" ], - "markers": "python_version >= '3.6'", - "version": "==6.3.2" + "version": "==1.8.0" + }, + "setuptools": { + "hashes": [ + "sha256:e5fd0a713141a4a105412233c63dc4e17ba0090c8e8334594ac790ec97792330", + "sha256:f106dee1b506dee5102cc3f3e9e68137bbad6d47b616be7991714b0c62204251" + ], + "markers": "python_version >= '3.7'", + "version": "==67.4.0" }, "shapely": { "hashes": [ @@ -610,70 +1928,82 @@ }, "simplejson": { "hashes": [ - "sha256:04e31fa6ac8e326480703fb6ded1488bfa6f1d3f760d32e29dbf66d0838982ce", - "sha256:068670af975247acbb9fc3d5393293368cda17026db467bf7a51548ee8f17ee1", - "sha256:07ecaafc1b1501f275bf5acdee34a4ad33c7c24ede287183ea77a02dc071e0c0", - "sha256:0b4126cac7d69ac06ff22efd3e0b3328a4a70624fcd6bca4fc1b4e6d9e2e12bf", - "sha256:0de783e9c2b87bdd75b57efa2b6260c24b94605b5c9843517577d40ee0c3cc8a", - "sha256:12133863178a8080a3dccbf5cb2edfab0001bc41e5d6d2446af2a1131105adfe", - "sha256:1c9b1ed7ed282b36571638297525f8ef80f34b3e2d600a56f962c6044f24200d", - "sha256:23fe704da910ff45e72543cbba152821685a889cf00fc58d5c8ee96a9bad5f94", - "sha256:28221620f4dcabdeac310846629b976e599a13f59abb21616356a85231ebd6ad", - "sha256:35a49ebef25f1ebdef54262e54ae80904d8692367a9f208cdfbc38dbf649e00a", - "sha256:37bc0cf0e5599f36072077e56e248f3336917ded1d33d2688624d8ed3cefd7d2", - "sha256:3fe87570168b2ae018391e2b43fbf66e8593a86feccb4b0500d134c998983ccc", - "sha256:3ff5b3464e1ce86a8de8c88e61d4836927d5595c2162cab22e96ff551b916e81", - "sha256:401d40969cee3df7bda211e57b903a534561b77a7ade0dd622a8d1a31eaa8ba7", - "sha256:4b6bd8144f15a491c662f06814bd8eaa54b17f26095bb775411f39bacaf66837", - "sha256:4c09868ddb86bf79b1feb4e3e7e4a35cd6e61ddb3452b54e20cf296313622566", - "sha256:4d1c135af0c72cb28dd259cf7ba218338f4dc027061262e46fe058b4e6a4c6a3", - "sha256:4ff4ac6ff3aa8f814ac0f50bf218a2e1a434a17aafad4f0400a57a8cc62ef17f", - "sha256:521877c7bd060470806eb6335926e27453d740ac1958eaf0d8c00911bc5e1802", - "sha256:522fad7be85de57430d6d287c4b635813932946ebf41b913fe7e880d154ade2e", - "sha256:5540fba2d437edaf4aa4fbb80f43f42a8334206ad1ad3b27aef577fd989f20d9", - "sha256:5d6b4af7ad7e4ac515bc6e602e7b79e2204e25dbd10ab3aa2beef3c5a9cad2c7", - "sha256:5decdc78849617917c206b01e9fc1d694fd58caa961be816cb37d3150d613d9a", - "sha256:632ecbbd2228575e6860c9e49ea3cc5423764d5aa70b92acc4e74096fb434044", - "sha256:65b998193bd7b0c7ecdfffbc825d808eac66279313cb67d8892bb259c9d91494", - "sha256:67093a526e42981fdd954868062e56c9b67fdd7e712616cc3265ad0c210ecb51", - "sha256:681eb4d37c9a9a6eb9b3245a5e89d7f7b2b9895590bb08a20aa598c1eb0a1d9d", - "sha256:69bd56b1d257a91e763256d63606937ae4eb890b18a789b66951c00062afec33", - "sha256:724c1fe135aa437d5126138d977004d165a3b5e2ee98fc4eb3e7c0ef645e7e27", - "sha256:7255a37ff50593c9b2f1afa8fafd6ef5763213c1ed5a9e2c6f5b9cc925ab979f", - "sha256:743cd768affaa508a21499f4858c5b824ffa2e1394ed94eb85caf47ac0732198", - "sha256:80d3bc9944be1d73e5b1726c3bbfd2628d3d7fe2880711b1eb90b617b9b8ac70", - "sha256:82ff356ff91be0ab2293fc6d8d262451eb6ac4fd999244c4b5f863e049ba219c", - "sha256:8e8607d8f6b4f9d46fee11447e334d6ab50e993dd4dbfb22f674616ce20907ab", - "sha256:97202f939c3ff341fc3fa84d15db86156b1edc669424ba20b0a1fcd4a796a045", - "sha256:9b01e7b00654115965a206e3015f0166674ec1e575198a62a977355597c0bef5", - "sha256:9fa621b3c0c05d965882c920347b6593751b7ab20d8fa81e426f1735ca1a9fc7", - "sha256:a1aa6e4cae8e3b8d5321be4f51c5ce77188faf7baa9fe1e78611f93a8eed2882", - "sha256:a2d30d6c1652140181dc6861f564449ad71a45e4f165a6868c27d36745b65d40", - "sha256:a649d0f66029c7eb67042b15374bd93a26aae202591d9afd71e111dd0006b198", - "sha256:a7854326920d41c3b5d468154318fe6ba4390cb2410480976787c640707e0180", - "sha256:a89acae02b2975b1f8e4974cb8cdf9bf9f6c91162fb8dec50c259ce700f2770a", - "sha256:a8bbdb166e2fb816e43ab034c865147edafe28e1b19c72433147789ac83e2dda", - "sha256:ac786f6cb7aa10d44e9641c7a7d16d7f6e095b138795cd43503769d4154e0dc2", - "sha256:b09bc62e5193e31d7f9876220fb429ec13a6a181a24d897b9edfbbdbcd678851", - "sha256:b10556817f09d46d420edd982dd0653940b90151d0576f09143a8e773459f6fe", - "sha256:b81076552d34c27e5149a40187a8f7e2abb2d3185576a317aaf14aeeedad862a", - "sha256:bdfc54b4468ed4cd7415928cbe782f4d782722a81aeb0f81e2ddca9932632211", - "sha256:cf6e7d5fe2aeb54898df18db1baf479863eae581cce05410f61f6b4188c8ada1", - "sha256:cf98038d2abf63a1ada5730e91e84c642ba6c225b0198c3684151b1f80c5f8a6", - "sha256:d24a9e61df7a7787b338a58abfba975414937b609eb6b18973e25f573bc0eeeb", - "sha256:d74ee72b5071818a1a5dab47338e87f08a738cb938a3b0653b9e4d959ddd1fd9", - "sha256:dd16302d39c4d6f4afde80edd0c97d4db643327d355a312762ccd9bd2ca515ed", - "sha256:dd2fb11922f58df8528adfca123f6a84748ad17d066007e7ac977720063556bd", - "sha256:deac4bdafa19bbb89edfb73b19f7f69a52d0b5bd3bb0c4ad404c1bbfd7b4b7fd", - "sha256:e03c3b8cc7883a54c3f34a6a135c4a17bc9088a33f36796acdb47162791b02f6", - "sha256:e1ec8a9ee0987d4524ffd6299e778c16cc35fef6d1a2764e609f90962f0b293a", - "sha256:e8603e691580487f11306ecb066c76f1f4a8b54fb3bdb23fa40643a059509366", - "sha256:f444762fed1bc1fd75187ef14a20ed900c1fbb245d45be9e834b822a0223bc81", - "sha256:f63600ec06982cdf480899026f4fda622776f5fabed9a869fdb32d72bc17e99a", - "sha256:fb62d517a516128bacf08cb6a86ecd39fb06d08e7c4980251f5d5601d29989ba" + "sha256:04a4b9a297cccbc9e1d66fe652fbffd55b36d6579c43132e821d315957302194", + "sha256:063db62a9251e61ea0c17e49c3e7bed465bfcc5359655abcb8c0bc6130a4e0d4", + "sha256:070ab073ce72f1624107dfd6d095c87ac32aafe7ba54a5c5055a3dd83cb06e51", + "sha256:099bbd3b5b4ea83159a980348cd481a34984dee5fe1b9fac31a9137158f46960", + "sha256:0baf8c60efef74944ed4adb034d14bcf737731576f0e4c3c56fb875ea256af69", + "sha256:0e7c3fae6c9540064e06a653780b4f263675cd69ca6841345029fee3e27e9bb5", + "sha256:141782a0a25c1792627575b37b4951583358ccc7137623aa45947f8425ee8d96", + "sha256:14b35fb90083218e59df5dba733c7086655f2938f3fcabe36ad849623941d660", + "sha256:169c2c7446ef33439c304a6aa5b7b5a2dbc938c9c2dd882dd3f2553f9518ebf6", + "sha256:16cc750d19852fa5ebafd55da86fa357f87991e07b4e2afb37a5975dfdde0153", + "sha256:1907d49d70c75530976119c13785db91168d2599288debaca7d25da9cd2f3747", + "sha256:1b79e2607ac5ba98381c2e068727acc1e4dd385a6d216914c0613f8f568a06a5", + "sha256:1e49c84df6e71e3c23169d3df481565dd607cbee4aa1e0af15c493cccad7c745", + "sha256:23fce984045804194f513a2739dcd82be350198470d5ade5058da019a48cf3f8", + "sha256:24823364fee93bab141621b3a2e10612e31be7ca58788bf9b2cd2b1ce37ab07d", + "sha256:290bbcdcbb37af3f7e43378f592ab7a9168fca640da6af63d42cdb535f96bbf2", + "sha256:2a1b3222bc8f6ac91b5ebe3263111c7dc4dc4b01c52f0153f5bb1f3ef3bf0023", + "sha256:2b0f6de11f5ce4b80f51bc49d08b898602e190547f8efe4e44af8ae3cda7779d", + "sha256:2be75f4cb9951efeb2616e16f944ee4f9a09768475a3f5c40a6ac4dc5ee68dfd", + "sha256:2c7ee643ee93684bf76196e2d84a2090c6df8f01737a016e869b579593827b6e", + "sha256:37bdef13412c0bc338db2993a38f3911d5bd2a0ba8d00b3bc66d1063edd7c33e", + "sha256:3bab9ea49ff477c926c5787f79ec47cf51c7ffb15c9d8dd0f09e728807d44f4b", + "sha256:44d6c52d4f5c0c087a6e88a92bf9f94234321d21be32c6471ba39856e304bbe3", + "sha256:4b8d4d958c5ab3489d1174917a7fad82da642560c39ce559a624e63deaaa36b1", + "sha256:4de9fed1166aeedee44150fa83bc059aca6b612940281f8b5a39374781f16196", + "sha256:502d86fbfe914263642479b87ed61af3b27b9e039df77acd2416cfccfc892e68", + "sha256:508342d7227ed66beecfbba7a38b46e1a713faeb034216f43f03ec5c175e0622", + "sha256:50f4b6d52f3a2d1cffd11834a1fe7f9516f0e3f20cbe78027aa88ff990fad7d6", + "sha256:52465a5578cfc2c5e374a574df14dfb75e04c6cb6a100b7abc8bf6c89bea8f5e", + "sha256:55aa983575b0aef143845f5bfbb35075475eccaebf7d4b30f4037a2fe8414666", + "sha256:55df3dfd8777bf134e1078d2f195352432a77f23ccb90b92b08218123d56adc9", + "sha256:56f186d44a9f625b5e5d9ba4b9551e263604000a7df60cb373b3e789ca603b2a", + "sha256:5780e3929435a8d39671537174f8ce0ccafb4f6e0c748ffe139916ffbdca39d3", + "sha256:59a629240cfbc5b4f390a8578dca74ae77ab617de971862acb946822d2eb1b11", + "sha256:5b009342e712026ffabe8a471d5b4a4ff2a038687387e74eae601574c04dae33", + "sha256:62628ea5df8c830d00a7417d5ecd949a1b24a8d0a5063a2a77f7ec7522110a0f", + "sha256:694332fd6fd10fe8868c2508583220d1a1a7be9ff049dab5bd6b9aedfb9edc50", + "sha256:6a49665169c18f27a0fc10935466332ee7406ee14ced8dc0a1b4d465547299aa", + "sha256:6b997739fdbc9b7030ff490fc8e5f8c144b8ec80f3605eff643983672bb8cfde", + "sha256:6bd81d10cb3384f64242316da8a2b2f88618776bc1ef38bcc79f1afe8ad36616", + "sha256:6c4c56c5abb82e22877b913186e5c0fd7d9eef0c930719e28fa451d3f11defb4", + "sha256:6fe1173b4146641c872bafa6f9a21f3a2012f502d54fbb523a76e6320024fae9", + "sha256:75eb555dc349d0cbe2c95ea2be665b306c6ac6d5b64e3a3920af9b805ecdb5f7", + "sha256:7c26fe63755ecc59c502ddde8e58ce8b765bf4fdd3f5858d2b7c8ab28bc2a9c8", + "sha256:7e73d9d6af3c29b60a92e28b3144d951110f59a3d05fc402c3f6c5248b883400", + "sha256:7ff65b475091084e5bdb7f26e9c555956be7355b573ce494fa96f9f8e34541ac", + "sha256:8209c40279ed9b2cd5fbe2d617a29a074e90ea97fce7c07a0128a01cb3e8afc5", + "sha256:88f59a07873dc1f06fd9e6712dd71286f1b297a066ad2fd9110ad080d3cb011c", + "sha256:96ade36640734b54176c4765d00a60767bd7fae5b7a5b3574accc055ac18e34c", + "sha256:9cf299fbb7d476676dfea372a3262654af98694bd1df35b060ce0fe1b68087f1", + "sha256:a2960b95f3ba822d077d1afa7e1fea9799cfb2990028cf010e666f64195ecb5a", + "sha256:a80bd9a3db88a76a401155c64e3499376c702307c2206cb381cc2a8dd9cc4f1f", + "sha256:aad323e92cb1bd3b1db6f57c007dca964d13c52247ad844203ce381e94066601", + "sha256:ab5bdf0b8d07f7fd603b2d0c1982412cd9f8ade997088ddced251f7e656c7fd4", + "sha256:b0352428b35da859a98770949e7353866ae65463026f1c8e4c89a6395d4b5fd7", + "sha256:b2c4e8b65987f3c6529149495d28e23efe213e94dc3659176c4ab22d18a9ee4a", + "sha256:bcd9eac304a133ee4af58e68c5ded4c5ba663d3ee4602e8613359b776a1f8c8f", + "sha256:c3b696770b504f881f271f97b94a687487ec1ef20bfbd5f20d92bbab7a85952d", + "sha256:c4514675f6571da8190fea52a110bca686fa844972e8b2b3bc07ace9e632ee4f", + "sha256:c98fddc374468158778a8afb3fd7296412a2b2fc34cebba64212ac3e018e7382", + "sha256:cde5a3ff5e0bd5d6da676314dfae86c9e99bff77bca03d30223c9718a58f9e83", + "sha256:cf7168b2046db0eceb83d8ed2ee31c0847ce18b2d8baf3e93de9560f3921a8c3", + "sha256:d774782159347d66563cd7ac18b9dd37010438a825160cde4818caa18110a746", + "sha256:d990ea42ba908cb57a3df97d283aa26c1822f10a0a60e250b54ee21cd08c48d0", + "sha256:e762e9d8556fa9f3a99f8a278eeba50a35b5f554b82deeb282ddbdd85816e638", + "sha256:e8a4750e8db92109e6f1f7783a7faae4254d6d5dc28a41ff7eff7d2265f0586b", + "sha256:eb81cfef0c0039010f0212f4e5eb6909641b8a54c761584054ac97fd7bd0c21a", + "sha256:ebb53837c5ffcb6100646018565d3f1afed6f4b185b14b2c9cbccf874fe40157", + "sha256:efa70fd9b6c7b57b048ecadb909683acd535cddebc5b22f3c05ba3b369739caf", + "sha256:f73bae5e315adf7bc8cb7f0a13a1e9e33bead42e8ce174be83ac9ecc2513c86a", + "sha256:f89f078114cacedb9a3392615cc099cf02a51efa7507f90e2006bf7ec38c880d", + "sha256:f9f72d2b539512f382a48cc9ad6cea2d3a572e71e92c40e03d2140041eeaa233", + "sha256:fc8df5831b645e96a318ea51a66ce6e2bb869eebc3fa9a860bbf67aecd270055" ], "markers": "python_version >= '2.5' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==3.17.6" + "version": "==3.18.3" }, "six": { "hashes": [ @@ -683,6 +2013,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, + "sniffio": { + "hashes": [ + "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101", + "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.0" + }, "snuggs": { "hashes": [ "sha256:501cf113fe3892e14e2fee76da5cd0606b7e149c411c271898e6259ebde2617b", @@ -690,13 +2028,91 @@ ], "version": "==1.4.7" }, + "soupsieve": { + "hashes": [ + "sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955", + "sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a" + ], + "markers": "python_version >= '3.7'", + "version": "==2.4" + }, + "stack-data": { + "hashes": [ + "sha256:32d2dd0376772d01b6cb9fc996f3c8b57a357089dec328ed4b6553d037eaf815", + "sha256:cbb2a53eb64e5785878201a97ed7c7b94883f48b87bfb0bbe8b623c74679e4a8" + ], + "version": "==0.6.2" + }, + "tables": { + "hashes": [ + "sha256:169450bd11959c0e1c43137e768cf8b60b2a4f3b2ebf9a620e21865dc0c2d059", + "sha256:361da30289ecdcb39b7648c786d8185f9ab08879c0a58a3fc56dab026e122d8e", + "sha256:4628e762a8aacfa038cdae118d2d1df9a9ddd9b4a82d6993f4bcbfa7744a9f8a", + "sha256:49a972b8a7c27a8a173aeb05f67acb45fe608b64cd8e9fa667c0962a60b71b49", + "sha256:6055dd1d3ec03fd25c60bb93a4be396464f0640fd5845884230dae1deb7e6cc6", + "sha256:6e13a3eaf86f9eba582a04b44929ee1585a05dd539d207a10a22374b7e4552ca", + "sha256:7acbf0e2fb7132a40f441ebb53b53c97cee05fb88ce743afdd97c681d1d377d7", + "sha256:8c96c5d4a9ebe34b72b918b3189954e2d5b6f87cb211d4244b7c001661d8a861", + "sha256:8ea87231788cfd5773ffbe33f149f778f9ef4ab681149dec00cb88e1681bd299", + "sha256:94d7ccac04277089e3bb466bf5c8f7038dd53bb8f19ea9679b7fea62c5c3ae8f", + "sha256:950167d56b45ece117f79d839d5d55f0cb45bfca20290fa9dcd70255282f969e", + "sha256:9d06c5fda6657698bae4fbe841204625b501ddf2e2a77131c23f3d3ac072db82", + "sha256:acb3f905c63e437023071833744b3e5a83376dc457f413f0840d8d50dd5d402b", + "sha256:bfdbcacffec122ce8d1b0dd6ffc3c6051bedd6081e20264fa96165d43fc78f52", + "sha256:c0b97a7363941d9518573c217cb5bfe4b2b456748aac1e9420d3979f7d5e82d2", + "sha256:d95faa1174653a738ac8183a95f050a29a3f69efac6e71f70cde8d717e31af17", + "sha256:da9e1ee83c01ed4d1382c7b186d77b4c0ef80b340a48d11a66346e30342c5929", + "sha256:db163df08ded7804d596dee14d88397f6c55cdf4671b3992cb885c0b3890a54d", + "sha256:dedb959c00ac9e84562a69e80fa858d7aa06d91f96c6cb8cccbbbaf7a879436b", + "sha256:ea4b41ed95953ad588bcd6e557577414e50754011430c27934daf5dbd2d52251", + "sha256:eed1e030bb077476d585697e37f2b8e37db4157ff93b485b43f374254cff8698", + "sha256:f1327aeef8b6c0fec5aae9f5f5a57b2d8ec98c08495fd09471b749ea46de9eb0", + "sha256:f9c88511483c8fd39e7841fc60bc7038c96eeb87fe776092439172e1e6330f49", + "sha256:fd63c94960f8208cb13d41033a3114c0242e7737cb578f2454c6a087c5d246ec" + ], + "index": "pypi", + "version": "==3.6.1" + }, + "terminado": { + "hashes": [ + "sha256:6ccbbcd3a4f8a25a5ec04991f39a0b8db52dfcd487ea0e578d977e6752380333", + "sha256:8650d44334eba354dd591129ca3124a6ba42c3d5b70df5051b6921d506fdaeae" + ], + "markers": "python_version >= '3.7'", + "version": "==0.17.1" + }, + "tinycss2": { + "hashes": [ + "sha256:2b80a96d41e7c3914b8cda8bc7f705a4d9c49275616e886103dd839dfc847847", + "sha256:8cff3a8f066c2ec677c06dbc7b45619804a6938478d9d73c284b29d14ecb0627" + ], + "markers": "python_version >= '3.7'", + "version": "==1.2.1" + }, "tomli": { "hashes": [ - "sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee", - "sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade" + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" ], - "markers": "python_version >= '3.6'", - "version": "==1.2.2" + "markers": "python_version < '3.11'", + "version": "==2.0.1" + }, + "tornado": { + "hashes": [ + "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca", + "sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72", + "sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23", + "sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8", + "sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b", + "sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9", + "sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13", + "sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75", + "sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac", + "sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e", + "sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b" + ], + "markers": "python_version >= '3.7'", + "version": "==6.2" }, "tqdm": { "hashes": [ @@ -705,9 +2121,179 @@ ], "index": "pypi", "version": "==4.48.0" + }, + "traitlets": { + "hashes": [ + "sha256:9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8", + "sha256:f6cde21a9c68cf756af02035f72d5a723bf607e862e7be33ece505abf4a3bad9" + ], + "markers": "python_version >= '3.7'", + "version": "==5.9.0" + }, + "uri-template": { + "hashes": [ + "sha256:934e4d09d108b70eb8a24410af8615294d09d279ce0e7cbcdaef1bd21f932b06", + "sha256:f1699c77b73b925cf4937eae31ab282a86dc885c333f2e942513f08f691fc7db" + ], + "version": "==1.2.0" + }, + "urllib3": { + "hashes": [ + "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72", + "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==1.26.14" + }, + "wcwidth": { + "hashes": [ + "sha256:795b138f6875577cd91bba52baf9e445cd5118fd32723b460e30a0af30ea230e", + "sha256:a5220780a404dbe3353789870978e472cfe477761f06ee55077256e509b156d0" + ], + "version": "==0.2.6" + }, + "webcolors": { + "hashes": [ + "sha256:16d043d3a08fd6a1b1b7e3e9e62640d09790dce80d2bdd4792a175b35fe794a9", + "sha256:d98743d81d498a2d3eaf165196e65481f0d2ea85281463d856b1e51b09f62dce" + ], + "version": "==1.12" + }, + "webencodings": { + "hashes": [ + "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", + "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" + ], + "version": "==0.5.1" + }, + "websocket-client": { + "hashes": [ + "sha256:3f09e6d8230892547132177f575a4e3e73cfdf06526e20cc02aa1c3b47184d40", + "sha256:cdf5877568b7e83aa7cf2244ab56a3213de587bbe0ce9d8b9600fc77b455d89e" + ], + "markers": "python_version >= '3.7'", + "version": "==1.5.1" + }, + "whitebox": { + "hashes": [ + "sha256:0455cf727233adb53abe1bbe221fe0ac597d3c2609a3f7e74c1243f8ffb4b190", + "sha256:8afc78a2f59e1b58e1cae2509f9489b7fc9f2105a5ee7d84a4005e5905dcea01" + ], + "index": "pypi", + "version": "==2.2.0" + }, + "widgetsnbextension": { + "hashes": [ + "sha256:003f716d930d385be3fd9de42dd9bf008e30053f73bddde235d14fbeaeff19af", + "sha256:eaaaf434fb9b08bd197b2a14ffe45ddb5ac3897593d43c69287091e5f3147bf7" + ], + "markers": "python_version >= '3.7'", + "version": "==4.0.5" + }, + "xarray": { + "hashes": [ + "sha256:3a365ce09127fc841ba88baa63f37ca61376ffe389a6c5e66d52f2c88c23a62b", + "sha256:da0f0d7719b5ee95143a34804e3afb82cb8abbe5c10f9750a9dbed156ad30f00" + ], + "index": "pypi", + "version": "==0.19.0" + }, + "y-py": { + "hashes": [ + "sha256:05f805b58422d5d7c8e7e8e2141d1c3cac4daaa4557ae6a9b84b141fe8d6289e", + "sha256:065f90501cf008375d70be6ce72dd41745e09d088f0b545f5f914d2c3f04f7ae", + "sha256:0c0e333c20b0a6ce4a5851203d45898ab93f16426c342420b931e190c5b71d3d", + "sha256:13b9d2959d9a26536b6ad118fb026ff19bd79da52e4addf6f3a562e7c01d516e", + "sha256:1906f13e8d5ebfbd9c7948f57bc6f6f53b451b19c99350f42a0f648147a8acfe", + "sha256:1f54625b9ed4e787872c45d3044dcfd04c0da4258d9914f3d32308830b35246c", + "sha256:202b2a3e42e0a1eaedee26f8a3bc73cd9f994c4c2b15511ea56b9838178eb380", + "sha256:2532ea5aefb223fd688c93860199d348a7601d814aac9e8784d816314588ddeb", + "sha256:25637e3d011ca6f877a24f3083ff2549d1d619406d7e8a1455c445527205046c", + "sha256:2692c808bf28f797f8d693f45dc86563ac3b1626579f67ce9546dca69644d687", + "sha256:27c1e9a866146d250e9e16d99fe22a40c82f5b592ab85da97e5679fc3841c7ce", + "sha256:2ffebe5e62cbfee6e24593927dedba77dc13ac4cfb9c822074ab566b1fb63d59", + "sha256:50cfa0532bcee27edb8c64743b49570e28bb76a00cd384ead1d84b6f052d9368", + "sha256:55098440e32339c2dc3d652fb36bb77a4927dee5fd4ab0cb1fe12fdd163fd4f5", + "sha256:5dbd8d177ec7b9fef4a7b6d22eb2f8d5606fd5aac31cf2eab0dc18f0b3504c7c", + "sha256:63ef8e5b76cd54578a7fd5f72d8c698d9ccd7c555c7900ebfd38a24d397c3b15", + "sha256:73200c59bb253b880825466717941ac57267f2f685b053e183183cb6fe82874d", + "sha256:7353af0e9c1f42fbf0ab340e253eeb333d58c890fa91d3eadb1b9adaf9336732", + "sha256:742c486d5b792c4ad76e09426161302edddca85efe826fa01dcee50907326cd7", + "sha256:753aaae817d658a1e9d271663439d8e83d9d8effa45590ecdcadc600c7cf77e3", + "sha256:76b3480e7037ac9390c450e2aff9e46e2c9e61520c0d88afe228110ec728adc5", + "sha256:800e73d2110b97a74c52db2c8ce03a78e96f0d66a7e0c87d8254170a67c2db0e", + "sha256:85585e669d7679126e4a04e4bc0a063a641175a74eecfe47539e8da3e5b1da6e", + "sha256:8d4dfc276f988175baaa4ab321c3321a16ce33db3356c9bc5f4dea0db3de55aa", + "sha256:91be189fae8ba242528333e266e38d65cae3d9a09fe45867fab8578a3ddf2ea2", + "sha256:9484a3fc33f812234e58a5ee834b42bb0a628054d61b5c06c323aa56c12e557d", + "sha256:9513ae81fcc805671ae134c4c7421ca322acf92ce8b33817e1775ea8c0176973", + "sha256:95d13b38c9055d607565b77cbae12e2bf0c1671c5cb8f2ee2e1230d41d2d6d34", + "sha256:9983e99e3a61452b39ffce98206c7e4c6d260f4e917c8fe53fb54aaf25df89a3", + "sha256:9a59603cf42c20d02ee5add2e3d0ce48e89c480a2a02f642fb77f142c4f37958", + "sha256:a57d81260e048caacf43a2f851766687f53e8a8356df6947fb0eee7336a7e2de", + "sha256:a7977eeaceaeb0dfffcc5643c985c337ebc33a0b1d792ae0a9b1331cdd97366f", + "sha256:add793f5f5c7c7a3eb1b09ffc771bdaae10a0bd482a370bf696b83f8dee8d1b4", + "sha256:ae82a6d9cbaff8cb7505e81b5b7f9cd7756bb7e7110aef7914375fe56b012a90", + "sha256:af6df5ec1d66ee2d962026635d60e84ad35fc01b2a1e36b993360c0ce60ae349", + "sha256:afa9a11aa2880dd8689894f3269b653e6d3bd1956963d5329be9a5bf021dab62", + "sha256:b0ed760e6aa5316227a0ba2d5d29634a4ef2d72c8bc55169ac01664e17e4b536", + "sha256:b44473bb32217c78e18db66f497f6c8be33e339bab5f52398bb2468c904d5140", + "sha256:b67dad339f9b6701f74ff7a6e901c7909eca4eea02cf955b28d87a42650bd1be", + "sha256:bc9052a814e8b7ec756371a191f38de68b956437e0bb429c2dd503e658f298f9", + "sha256:c1f5f287cc7ae127ed6a2fb1546e631b316a41d087d7d2db9caa3e5f59906dcf", + "sha256:c3ae6d22b7cc599220a26b06da6ead9fd582eea5fdb6273b06fa3f060d0a26a7", + "sha256:c42f3a6cd20153925b00c49af855a3277989d411bb8ea849095be943ee160821", + "sha256:c7ca64a2a97f708569dcabd55865915943e30267bf6d26c4d212d005951efe62", + "sha256:caf9b1feb69379d424a1d3d7c899b8e0389a3fb3131d39c3c03dcc3d4a93dbdc", + "sha256:cb68445414940efe547291340e91604c7b8379b60822678ef29f4fc2a0e11c62", + "sha256:cc8e5f38842a4b043c9592bfa9a740147ddb8fac2d7a5b7bf6d52466c090ec23", + "sha256:cd6f373dbf592ad83aaf95c16abebc8678928e49bd509ebd593259e1908345ae", + "sha256:d2da2a9e28dceab4832945a745cad507579f52b4d0c9e2f54ae156eb56875861", + "sha256:d373c6bb8e21d5f7ec0833b76fa1ab480086ada602ef5bbf4724a25a21a00b6a", + "sha256:d722d6a27230c1f395535da5cee6a9a16497c6343afd262c846090075c083009", + "sha256:db1ac7f2d1862eb4c448cf76183399d555a63dbe2452bafecb1c2f691e36d687", + "sha256:df78a0409dca11554a4b6442d7a8e61f762c3cfc78d55d98352392869a6b9ae0", + "sha256:e30fe2491d095c6d695a2c96257967fd3e2497f0f777030c8492d03c18d46e2a", + "sha256:e370ce076781adea161b04d2f666e8b4f89bc7e8927ef842fbb0283d3bfa73e0", + "sha256:ecd3cb0d13ac92e7b9235d1024dba9af0788161246f12dcf1f635d634ccb206a", + "sha256:ed0fd5265905cc7e23709479bc152d69f4972dec32fa322d20cb77f749707e78", + "sha256:f6d87d0c2e87990bc00c049742d36a5dbbb1510949459af17198728890ee748a", + "sha256:f7434c77cd23592973ed63341b8d337e6aebaba5ed40d7f22e2d43dfd0c3a56e", + "sha256:f8b67ae37af8aac6160fda66c0f73bcdf65c06da9022eb76192c3fc45cfab994", + "sha256:f8f238144a302f17eb26b122cad9382fcff5ec6653b8a562130b9a5e44010098", + "sha256:fa685f7e43ce490dfb1e392ac48f584b75cd21f05dc526c160d15308236ce8a0", + "sha256:fce5feb57f6231376eb10d1fb68c60da106ffa0b520b3129471c466eff0304cc", + "sha256:fdafb93bfd5532b13a53c4090675bcd31724160017ecc73e492dc1211bc0377a", + "sha256:fe70d0134fe2115c08866f0cac0eb5c0788093872b5026eb438a74e1ebafd659", + "sha256:ff3ddedaa95284f4f22a92b362f658f3d92f272d8c0fa009051bd5490c4d5a04" + ], + "version": "==0.5.9" + }, + "ypy-websocket": { + "hashes": [ + "sha256:491b2cc4271df4dde9be83017c15f4532b597dc43148472eb20c5aeb838a5b46", + "sha256:9049d5a7d61c26c2b5a39757c9ffcbe2274bf3553adeea8de7fe1c04671d4145" + ], + "markers": "python_version >= '3.7'", + "version": "==0.8.2" + }, + "zipp": { + "hashes": [ + "sha256:188834565033387710d046e3fe96acfc9b5e86cbca7f39ff69cf21a4128198b7", + "sha256:9e5421e176ef5ab4c0ad896624e87a7b2f07aca746c9b2aa305952800cb8eecb" + ], + "markers": "python_version >= '3.7'", + "version": "==3.14.0" } }, "develop": { + "asttokens": { + "hashes": [ + "sha256:4622110b2a6f30b77e1473affaa97e711bc2f07d3f10848420ff1898edbe94f3", + "sha256:6b0ac9e93fb0335014d382b8fa9b3afa7df546984258005da0b9e7095b3deb1c" + ], + "version": "==2.2.1" + }, "backcall": { "hashes": [ "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", @@ -717,43 +2303,50 @@ }, "decorator": { "hashes": [ - "sha256:7b12e7c3c6ab203a29e157335e9122cb03de9ab7264b137594103fd4a683b374", - "sha256:e59913af105b9860aa2c8d3272d9de5a56a4e608db9a2f167a8480b323d529a7" + "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", + "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186" ], "markers": "python_version >= '3.5'", - "version": "==5.1.0" + "version": "==5.1.1" + }, + "executing": { + "hashes": [ + "sha256:0314a69e37426e3608aada02473b4161d4caf5a4b244d1d0c48072b8fee7bacc", + "sha256:19da64c18d2d851112f09c287f8d3dbbdf725ab0e569077efb6cdcbd3497c107" + ], + "version": "==1.2.0" }, "ipython": { "hashes": [ - "sha256:4f69d7423a5a1972f6347ff233e38bbf4df6a150ef20fbb00c635442ac3060aa", - "sha256:a658beaf856ce46bc453366d5dc6b2ddc6c481efd3540cb28aa3943819caac9f" + "sha256:b13a1d6c1f5818bd388db53b7107d17454129a70de2b87481d555daede5eb49e", + "sha256:b38c31e8fc7eff642fc7c597061fff462537cf2314e3225a19c906b7b0d8a345" ], - "index": "pypi", - "version": "==7.29.0" + "markers": "python_version >= '3.8'", + "version": "==8.10.0" }, "jedi": { "hashes": [ - "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d", - "sha256:74137626a64a99c8eb6ae5832d99b3bdd7d29a3850fe2aa80a4126b2a7d949ab" + "sha256:203c1fd9d969ab8f2119ec0a3342e0b49910045abe6af0a3ae83a5764d54639e", + "sha256:bae794c30d07f6d910d32a7048af09b5a39ed740918da923c6b780790ebac612" ], "markers": "python_version >= '3.6'", - "version": "==0.18.1" + "version": "==0.18.2" }, "matplotlib-inline": { "hashes": [ - "sha256:a04bfba22e0d1395479f866853ec1ee28eea1485c1d69a6faf00dc3e24ff34ee", - "sha256:aed605ba3b72462d64d475a21a9296f400a19c4f74a31b59103d2a99ffd5aa5c" + "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311", + "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304" ], "markers": "python_version >= '3.5'", - "version": "==0.1.3" + "version": "==0.1.6" }, "parso": { "hashes": [ - "sha256:12b83492c6239ce32ff5eed6d3639d6a536170723c6f3f1506869f1ace413398", - "sha256:a8c4922db71e4fdb90e0d0bc6e50f9b273d3397925e5e60a717e719201778d22" + "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0", + "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75" ], "markers": "python_version >= '3.6'", - "version": "==0.8.2" + "version": "==0.8.3" }, "pexpect": { "hashes": [ @@ -772,41 +2365,63 @@ }, "prompt-toolkit": { "hashes": [ - "sha256:449f333dd120bd01f5d296a8ce1452114ba3a71fae7288d2f0ae2c918764fa72", - "sha256:48d85cdca8b6c4f16480c7ce03fd193666b62b0a21667ca56b4bb5ad679d1170" + "sha256:6a2948ec427dfcc7c983027b1044b355db6aaa8be374f54ad2015471f7d81c5b" ], - "markers": "python_full_version >= '3.6.2'", - "version": "==3.0.22" + "markers": "python_version >= '3.7'", + "version": "==3.0.37" }, "ptyprocess": { "hashes": [ "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" ], + "markers": "os_name != 'nt'", "version": "==0.7.0" }, + "pure-eval": { + "hashes": [ + "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", + "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" + ], + "version": "==0.2.2" + }, "pygments": { "hashes": [ - "sha256:b8e67fe6af78f492b3c4b3e2970c0624cbf08beb1e493b2c99b9fa1b67a20380", - "sha256:f398865f7eb6874156579fdf36bc840a03cab64d1cde9e93d68f46a425ec52c6" + "sha256:b3ed06a9e8ac9a9aae5a6f5dbe78a8a58655d17b43b93c078f094ddc476ae297", + "sha256:fa7bd7bd2771287c0de303af8bfdfc731f51bd2c6a47ab69d117138893b82717" ], - "markers": "python_version >= '3.5'", - "version": "==2.10.0" + "markers": "python_version >= '3.6'", + "version": "==2.14.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "stack-data": { + "hashes": [ + "sha256:32d2dd0376772d01b6cb9fc996f3c8b57a357089dec328ed4b6553d037eaf815", + "sha256:cbb2a53eb64e5785878201a97ed7c7b94883f48b87bfb0bbe8b623c74679e4a8" + ], + "version": "==0.6.2" }, "traitlets": { "hashes": [ - "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7", - "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033" + "sha256:9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8", + "sha256:f6cde21a9c68cf756af02035f72d5a723bf607e862e7be33ece505abf4a3bad9" ], "markers": "python_version >= '3.7'", - "version": "==5.1.1" + "version": "==5.9.0" }, "wcwidth": { "hashes": [ - "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784", - "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83" + "sha256:795b138f6875577cd91bba52baf9e445cd5118fd32723b460e30a0af30ea230e", + "sha256:a5220780a404dbe3353789870978e472cfe477761f06ee55077256e509b156d0" ], - "version": "==0.2.5" + "version": "==0.2.6" } } } diff --git a/README.md b/README.md index 24cee3ce7..8c7c7570a 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,17 @@ -## Cahaba: Flood Inundation Mapping for U.S. National Water Model +## Inundation Mapping: Flood Inundation Mapping for U.S. National Water Model -Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Water Center (NWC). +This repository includes flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Water Center (NWC). This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids. This repository also includes functionality to generate flood inundation maps (FIMs) and evaluate FIM accuracy. -#### For more information, see the [Cahaba Wiki](https://github.com/NOAA-OWP/cahaba/wiki). +#### For more information, see the [Inundation Mapping Wiki](https://github.com/NOAA-OWP/cahaba/wiki). + +--- + +# FIM Version 4 ## Accessing Data through ESIP S3 Bucket -The latest national generated HAND data and a subset of the inputs can be found in an Amazon S3 Bucket hosted by [Earth Science Information Partners (ESIP)](https://www.esipfed.org/). These data can be accessed using the AWS CLI tools. +The latest national generated HAND data and a subset of the inputs can be found in an Amazon S3 Bucket hosted by [Earth Science Information Partners (ESIP)](https://www.esipfed.org/). These data can be accessed using the AWS CLI tools. You will need permission from ESIP to access this data. Please contact Carson Pruitt (carson.pruitt@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) for assistance. AWS Region: `US East (N. Virginia) us-east-1` @@ -32,8 +36,11 @@ aws s3 ls s3://noaa-nws-owp-fim/ --request-payer requester Download a directory of outputs for a HUC8: ``` -aws s3 cp --recursive s3://noaa-nws-owp-fim/hand_fim/fim_3_0_21_0/outputs/fr/12090301 12090301 --request-payer requester +aws s3 cp --recursive s3://noaa-nws-owp-fim/hand_fim/outputs/fim_4_0_18_02/12090301 /your_local_folder_name/12090301 --request-payer requester ``` +By adjusting pathing, you can also download entire directories such as the fim_4_0_18_0 folder. +**Note**: There may be newer editions than fim_4_0_18_0, and it is recommended to adjust the command above for the latest version. + ## Running the Code ### Input Data @@ -44,7 +51,7 @@ Input data can be found on the ESIP S3 Bucket (see "Accessing Data through ESIP ### Installation 1. Install Docker : [Docker](https://docs.docker.com/get-docker/) -2. Build Docker Image : `docker build -f Dockerfile.dev -t : ` +2. Build Docker Image : `docker build -f Dockerfile -t : ` 3. Create FIM group on host machine: - Linux: `groupadd -g 1370800178 fim` 4. Change group ownership of repo (needs to be redone when a new file occurs in the repo): @@ -54,18 +61,40 @@ Input data can be found on the ESIP S3 Bucket (see "Accessing Data through ESIP This software is configurable via parameters found in the `config` directory. Copy files before editing and remove "template" pattern from the filename. Make sure to set the config folder group to 'fim' recursively using the chown command. Each development version will include a calibrated parameter set of manning’s n values. - `params_template.env` -- `mannings_default.json` - - must change filepath in `params_template.env` in `manning_n` variable name -- `params_calibrated.env` - - runs calibrated mannings parameters from `mannings_calibrated.json` + +This system has an optional tool called the `calibration database tool`. In order to use this system, you have three options: +1. Install the calibration database service. +2. Disable it by providing the `-skipcal` command line option to `fim_pipeline.sh` or `fim_preprocessing.sh`. +3. Disable it in the `params_template.env` file. See [calibration tool README](https://github.com/NOAA-OWP/inundation-mapping/blob/dev/tools/calibration-db/README.md) for more details. + +### Start/run the Docker Container + +Since all of the dependencies are managed in utilizing a Docker container, we must issue the [`docker run`](https://docs.docker.com/engine/reference/run/#clean-up---rm) command to start a container as the run-time environment. The container is launched from a Docker Image which was built in [Installation](#installation) step 2. The correct input file pathing is necessary for the `/data` volume mount (`-v`) for the ``. The `` should contain a subdirectory named `/inputs` (similar to `s3://noaa-nws-owp-fim/hand_fim`). If the pathing is set correctly, we do not need to adjust the `params_template.env` file, and can use the default file paths provided. + +```bash +docker run --rm -it --name -v /:/foss_fim -v /:/outputs -v :/data : +``` +For example: +```bash +docker run --rm -it --name robs_container -v /home/projects/inundation-mapping/:/foss_fim -v /home/projects/fim/outputs/:/outputs -v /home/projects/fim/inputs/:/data fim_4:dev_20230224_ad87a74 +``` ### Produce HAND Hydrofabric ``` -fim_run.sh -u -c /foss_fim/config/ -n +fim_pipeline.sh -u -n ``` -- `-u` can be a single huc, a series passed in quotes, or a line-delimited file - i. To run entire domain of available data use one of the ```/data/inputs/included_huc[4,6,8].lst``` files -- Outputs can be found under ```/data/outputs/``` +- There are a wide number of options and defaulted values, for details run ```fim_pipeline.sh -h```. +- Manditory arguments: + - `-u` can be a single huc, a series passed in quotes space delimited, or a line-delimited (.lst) file. To run the entire domain of available data use one of the ```/data/inputs/included_huc8.lst``` files or a HUC list file of your choice. Depending on the performance of your server, especially the number of CPU cores, running the full domain can take multiple days. + - `-n` is a name of your run (only alphanumeric) +- Outputs can be found under ```/outputs/```. + +Processing of HUC's in FIM4 comes in three pieces. You can run `fim_pipeline.sh` which automatically runs all of three major section, but you can run each of the sections independently if you like. The three sections are: +- `fim_pre_processing.sh` : This section must be run first as it creates the basic output folder for the run. It also creates a number of key files and folders for the next two sections. +- `fim_process_unit_wb.sh` : This script processes one and exactly one HUC8 plus all of it's related branches. While it can only process one, you can run this script multiple times, each with different HUC (or overwriting a HUC). When you run `fim_pipeline.sh`, it automatically iterates when more than one HUC number has been supplied either by command line arguments or via a HUC list. For each HUC provided, `fim_pipeline.sh` will `fim_process_unit_wb.sh`. Using the `fim_process_unit_wb.sh` script allows for a run / rerun of a HUC, or running other HUCs at different times / days or even different docker containers. +- `fim_post_processing.sh` : This section takes all of the HUCs that have been processed, aggregates key information from each HUC directory and looks for errors across all HUC folders. It also processes the group in sub-steps such as usgs guages processesing, rating curve adjustments and more. Naturally, running or re-running this script can only be done after running `fim_pre_processing.sh` and at least one run of `fim_process_unit_wb.sh`. + +Running the `fim_pipeline.sh` is a quicker process than running all three steps independently. ### Testing in Other HUCs To test in HUCs other than the provided HUCs, the following processes can be followed to acquire and preprocess additional NHDPlus rasters and vectors. After these steps are run, the "Produce HAND Hydrofabric" step can be run for the new HUCs. @@ -73,6 +102,8 @@ To test in HUCs other than the provided HUCs, the following processes can be fol ``` /foss_fim/src/acquire_and_preprocess_inputs.py -u ``` + Note: This tool is deprecated, updates will be coming soon. + - `-u` can be a single HUC4, series of HUC4s (e.g. 1209 1210), path to line-delimited file with HUC4s. - Please run `/foss_fim/src/acquire_and_preprocess_inputs.py --help` for more information. - See United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR) [site](https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution) for more information @@ -81,14 +112,15 @@ To test in HUCs other than the provided HUCs, the following processes can be fol ``` /foss_fim/src/preprocess_rasters.py ``` + Note: This tool is deprecated, updates will be coming soon. ---- ### Evaluating Inundation Map Performance -After `fim_run.sh` completes, you can evaluate the model's skill. The evaluation benchmark datasets are available through ESIP in the `test_cases` directory. +After `fim_pipeline.sh` completes, or combinations of the three major steps described above, you can evaluate the model's skill. The evaluation benchmark datasets are available through ESIP in the `test_cases` directory. To evaluate model skill, run the following: ``` -python /foss_fim/tools/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs] +python /foss_fim/tools/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs (cores and/or procs)] ``` More information can be found by running: @@ -99,30 +131,44 @@ python /foss_fim/tools/synthesize_test_cases.py --help ---- ### Managing Dependencies -Dependencies are managed via [Pipenv](https://pipenv.pypa.io/en/latest/). To add new dependencies, from the projects's top-level directory: +Dependencies are managed via [Pipenv](https://pipenv.pypa.io/en/latest/). -```bash -pipenv install ipython --dev -``` +When you execute `docker build` from the `Installation` section above, all of the dependencies you need are included. This includes dependencies for you to work in JupyterLab for testing purposes. -The `--dev` flag adds development dependencies, omit it if you want to add a production dependency. If the environment looks goods after adding dependencies, lock it with: +While very rare, you may want to add more dependencies. You can follow the following steps: -```bash -pipenv lock -``` +- From inside your docker container, run the following command: + ```bash + pipenv install --dev + ``` + The `--dev` flag adds development dependencies, omit it if you want to add a production dependency. + + This will automatically update the Pipfile in the root of your docker container directory. If the environment looks goods after adding dependencies, lock it with: -and include both `Pipfile` and `Pipfile.lock` in your commits. The docker image installs the environment from the lock file. + ```bash + pipenv lock + ``` + + This will update the `Pipfile.lock`. Copy the new updated `Pipfile` and `Pipfile.lock` in the source directory and include both in your git commits. The docker image installs the environment from the lock file. + +**Make sure you test it heavily including create new docker images and that it continues to work with the code.** If you are on a machine that has a particularly slow internet connection, you may need to increase the timeout of pipenv. To do this simply add `PIPENV_INSTALL_TIMEOUT=10000000` in front of any of your pipenv commands. + +---- +## Citing This Work + +Please cite this work in your research and projects according to the CITATION.cff file found in the root of this repository. + ---- ### Known Issues & Getting Help -Please see the issue tracker on GitHub and the [Cahaba Wiki](https://github.com/NOAA-OWP/cahaba/wiki/Cahaba-Wiki-Home) for known issues and getting help. +Please see the issue tracker on GitHub and the [Inundation Mapping Wiki](https://github.com/NOAA-OWP/inundation-mapping/wiki/Known-Shortcomings-and-Opportunities-for-Improvement) for known issues and getting help. ### Getting Involved -NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Brad Bates (bradford.bates@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. +NOAA's National Water Center welcomes anyone to contribute to the Inundation Mapping repository to improve flood inundation mapping capabilities. Please contact Carson Pruitt (carson.pruitt@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. ### Open Source Licensing Info 1. [TERMS](docs/TERMS.md) diff --git a/api/.gitignore b/api/.gitignore deleted file mode 100644 index 2eea525d8..000000000 --- a/api/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.env \ No newline at end of file diff --git a/api/frontend/.env-template b/api/frontend/.env-template deleted file mode 100644 index afd955443..000000000 --- a/api/frontend/.env-template +++ /dev/null @@ -1,2 +0,0 @@ -DATA_PATH= -SOCKET_URL= \ No newline at end of file diff --git a/api/frontend/docker-compose-dev.yml b/api/frontend/docker-compose-dev.yml deleted file mode 100644 index 1f7b0ca2d..000000000 --- a/api/frontend/docker-compose-dev.yml +++ /dev/null @@ -1,52 +0,0 @@ -version: '3.5' -services: - fim_frontend_gui: - image: fim_frontend_gui - build: - context: ./gui - container_name: fim_frontend_gui - restart: always - env_file: - - .env - expose: - - "5000" - networks: - - fim - volumes: - - ./gui/templates/:/opt/gui/templates/ - - ./gui/gui.py:/opt/gui/gui.py - fim_frontend_output_handler: - image: fim_frontend_output_handler - build: - context: ./output_handler - container_name: fim_frontend_output_handler - restart: always - external_links: - - fim_node_connector - env_file: - - .env - networks: - - fim - volumes: - - ${DATA_PATH}:/data/ - - ./output_handler/output_handler.py:/opt/output_handler/output_handler.py - fim_nginx: - image: nginx - container_name: fim_nginx - restart: always - depends_on: - - fim_frontend_gui - external_links: - - fim_node_connector - ports: - - "80:80" - volumes: - - ./nginx-dev.conf:/etc/nginx/nginx.conf:ro - networks: - - fim - command: [nginx] - -networks: - fim: - name: fim - \ No newline at end of file diff --git a/api/frontend/docker-compose-prod.yml b/api/frontend/docker-compose-prod.yml deleted file mode 100644 index b408fe054..000000000 --- a/api/frontend/docker-compose-prod.yml +++ /dev/null @@ -1,47 +0,0 @@ -version: '3.5' -services: - fim_frontend_gui: - image: fim_frontend_gui - build: - context: ./gui - container_name: fim_frontend_gui - restart: always - env_file: - - .env - expose: - - "5000" - networks: - - fim - fim_frontend_output_handler: - image: fim_frontend_output_handler - build: - context: ./output_handler - container_name: fim_frontend_output_handler - restart: always - external_links: - - fim_node_connector - env_file: - - .env - networks: - - fim - volumes: - - ${DATA_PATH}:/data/ - fim_nginx: - image: nginx - container_name: fim_nginx - restart: always - depends_on: - - fim_frontend_gui - external_links: - - fim_node_connector - ports: - - "80:80" - volumes: - - ./nginx-prod.conf:/etc/nginx/nginx.conf:ro - networks: - - fim - command: [nginx] - -networks: - fim: - name: fim \ No newline at end of file diff --git a/api/frontend/gui/Dockerfile b/api/frontend/gui/Dockerfile deleted file mode 100644 index fb5d0750e..000000000 --- a/api/frontend/gui/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM python:3.8.5-slim-buster - -ENV PYTHONUNBUFFERED 1 -RUN mkdir -p /opt/gui -WORKDIR /opt/gui - -COPY requirements.txt . -RUN pip install -r requirements.txt --no-cache-dir -COPY . /opt/gui - -EXPOSE 5000 - -RUN chmod +x /opt/gui/entrypoint.sh -ENTRYPOINT ["/opt/gui/entrypoint.sh"] diff --git a/api/frontend/gui/entrypoint.sh b/api/frontend/gui/entrypoint.sh deleted file mode 100755 index fbf7d3145..000000000 --- a/api/frontend/gui/entrypoint.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -cd /opt/gui/ -echo "Starting Gunicorn" -exec gunicorn --bind 0.0.0.0:5000 --reload wsgi:app \ No newline at end of file diff --git a/api/frontend/gui/gui.py b/api/frontend/gui/gui.py deleted file mode 100644 index 456622d72..000000000 --- a/api/frontend/gui/gui.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -from gevent import monkey -monkey.patch_all() - -from flask import Flask, render_template, request - -SOCKET_URL = os.environ.get('SOCKET_URL') - -app = Flask(__name__) - -@app.route('/') -def main(): - return render_template('index.html', socket_url=SOCKET_URL) - -if __name__ == '__main__': - app.run("0.0.0.0", port=5000) \ No newline at end of file diff --git a/api/frontend/gui/requirements.txt b/api/frontend/gui/requirements.txt deleted file mode 100644 index a78a884a3..000000000 --- a/api/frontend/gui/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -flask==1.1.2 -flask-socketio==5.0.0 -gevent==20.9.0 -gunicorn==20.0.4 \ No newline at end of file diff --git a/api/frontend/gui/templates/index.html b/api/frontend/gui/templates/index.html deleted file mode 100644 index 421050dbd..000000000 --- a/api/frontend/gui/templates/index.html +++ /dev/null @@ -1,560 +0,0 @@ - - - Cahaba API - - - - - - - - -
-
-
Release
-
FIM Run
-
Calibration
-
Pre-processing
-
- -
-
-

Basic

-
- - - - -
-

Configuration

-
-
- - -
-
- - -
-
-

Extent

-
-
- - -
-
- - -
-
-
- - -
-
- - -
-
- -
-
-
Not Connected
-
- - -
-
- - - - - - - - - - - - - -
NameTime ElapsedStatusOutputs Saved
-
- - - - - - \ No newline at end of file diff --git a/api/frontend/gui/wsgi.py b/api/frontend/gui/wsgi.py deleted file mode 100644 index b9303d13e..000000000 --- a/api/frontend/gui/wsgi.py +++ /dev/null @@ -1,4 +0,0 @@ -from gui import app - -if __name__ == "__main__": - app.run() \ No newline at end of file diff --git a/api/frontend/nginx-dev.conf b/api/frontend/nginx-dev.conf deleted file mode 100644 index 97e37c1f0..000000000 --- a/api/frontend/nginx-dev.conf +++ /dev/null @@ -1,72 +0,0 @@ -user nginx; -worker_processes 1; -pid /var/run/nginx.pid; -daemon off; - -events { - worker_connections 512; - # multi_accept on; -} - -http { - sendfile on; - tcp_nopush on; - tcp_nodelay on; - proxy_connect_timeout 300; - proxy_send_timeout 300; - proxy_read_timeout 90m; - send_timeout 300; - keepalive_timeout 65; - types_hash_max_size 2048; - - include /etc/nginx/mime.types; - default_type application/octet-stream; - - log_format upstream_time '$remote_addr - $remote_user [$time_local] ' - '"$request" $status $body_bytes_sent ' - '"$http_referer" "$http_user_agent"' - 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"'; - - access_log /var/log/nginx/access.log upstream_time; - error_log /var/log/nginx/error.log warn; - - server { - listen 80; - client_header_buffer_size 64k; - large_client_header_buffers 4 64k; - server_name _; - root /var/www/; - - gzip on; - gzip_types application/json; - proxy_http_version 1.1; - - location /stats/nginx { - stub_status on; - } - - # Node side (these should only be used if the frontend is on the same machine as the connector) - location / { - proxy_pass http://fim_node_connector:6000/; - proxy_set_header Host $host; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /socket.io { - proxy_http_version 1.1; - proxy_buffering off; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "Upgrade"; - proxy_pass http://fim_node_connector:6000/socket.io; - proxy_set_header Host $host; - proxy_set_header X-Forwarded-Proto $scheme; - } - - # Frontend Side - location /api { - proxy_pass http://fim_frontend_gui:5000/; - proxy_set_header Host $host; - proxy_set_header X-Forwarded-Proto $scheme; - } - } -} diff --git a/api/frontend/nginx-prod.conf b/api/frontend/nginx-prod.conf deleted file mode 100644 index 878d423fc..000000000 --- a/api/frontend/nginx-prod.conf +++ /dev/null @@ -1,55 +0,0 @@ -user nginx; -worker_processes 1; -pid /var/run/nginx.pid; -daemon off; - -events { - worker_connections 512; - # multi_accept on; -} - -http { - sendfile on; - tcp_nopush on; - tcp_nodelay on; - proxy_connect_timeout 300; - proxy_send_timeout 300; - proxy_read_timeout 90m; - send_timeout 300; - keepalive_timeout 65; - types_hash_max_size 2048; - - include /etc/nginx/mime.types; - default_type application/octet-stream; - - log_format upstream_time '$remote_addr - $remote_user [$time_local] ' - '"$request" $status $body_bytes_sent ' - '"$http_referer" "$http_user_agent"' - 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"'; - - access_log /var/log/nginx/access.log upstream_time; - error_log /var/log/nginx/error.log warn; - - server { - listen 80; - client_header_buffer_size 64k; - large_client_header_buffers 4 64k; - server_name _; - root /var/www/; - - gzip on; - gzip_types application/json; - proxy_http_version 1.1; - - location /stats/nginx { - stub_status on; - } - - # Frontend Side - location /api { - proxy_pass http://fim_frontend_gui:5000/; - proxy_set_header Host $host; - proxy_set_header X-Forwarded-Proto $scheme; - } - } -} diff --git a/api/frontend/output_handler/Dockerfile b/api/frontend/output_handler/Dockerfile deleted file mode 100644 index 68498a6d3..000000000 --- a/api/frontend/output_handler/Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -FROM python:3.8.5-slim-buster - -ENV PYTHONUNBUFFERED 1 -RUN mkdir -p /opt/output_handler -WORKDIR /opt/output_handler - -COPY requirements.txt . -RUN pip install -r requirements.txt --no-cache-dir -COPY . /opt/output_handler - -RUN chmod +x /opt/output_handler/entrypoint.sh -ENTRYPOINT ["/opt/output_handler/entrypoint.sh"] diff --git a/api/frontend/output_handler/entrypoint.sh b/api/frontend/output_handler/entrypoint.sh deleted file mode 100755 index 248541b09..000000000 --- a/api/frontend/output_handler/entrypoint.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -umask 002 -cd /opt/output_handler/ -echo "Starting Output Handler" -python ./output_handler.py \ No newline at end of file diff --git a/api/frontend/output_handler/output_handler.py b/api/frontend/output_handler/output_handler.py deleted file mode 100644 index 1d803a696..000000000 --- a/api/frontend/output_handler/output_handler.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import time - -import socketio - -SOCKET_URL = os.environ.get('SOCKET_URL') - -def handle_outputs(data): - job_name = data['job_name'] - directory_path = data['directory_path'] - file_name = data['file_name'] - file_chunk = data['file_chunk'] - chunk_index = data['chunk_index'] - - # Create folder if it doesn't yet exist and set writing mode - mode = 'ab' - if chunk_index == 0: - mode = 'wb' - try: - os.makedirs(directory_path) - except: - pass - - # Write binary data to file - with open(f"{directory_path}/{file_name}", mode) as binary_file: - print(f"Writing chunk {chunk_index} for file {directory_path}/{file_name}") - binary_file.write(file_chunk) - - sio.emit('output_handler_finished_file_chunk', {'job_name': job_name, 'file_path': f"{directory_path}/{file_name}"}) - -sio = socketio.Client() - -@sio.event -def connect(): - print("Output Handler Connected!") - sio.emit('output_handler_connected') - -@sio.event -def disconnect(): - print('disconnected from server') - -@sio.on('new_job_outputs') -def ws_new_job_outputs(data): - handle_outputs(data) - -sio.connect(SOCKET_URL) \ No newline at end of file diff --git a/api/frontend/output_handler/requirements.txt b/api/frontend/output_handler/requirements.txt deleted file mode 100644 index a2217b4e9..000000000 --- a/api/frontend/output_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -python-engineio[client]==4.0.0 -python-socketio[client]==5.0.3 \ No newline at end of file diff --git a/api/node/.env-template b/api/node/.env-template deleted file mode 100644 index 352f2d2ca..000000000 --- a/api/node/.env-template +++ /dev/null @@ -1,6 +0,0 @@ -DATA_PATH= -DOCKER_IMAGE_PATH= -SOCKET_URL= -FRONTEND_URL= -GITHUB_REPO=https://github.com/NOAA-OWP/cahaba.git -MAX_ALLOWED_CPU_CORES= diff --git a/api/node/connector/Dockerfile b/api/node/connector/Dockerfile deleted file mode 100644 index 091fdb364..000000000 --- a/api/node/connector/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM docker:20.10.2-dind - -RUN apk add --no-cache python3 python3-dev py3-pip build-base openssl-dev libffi-dev git - -ENV PYTHONUNBUFFERED 1 -RUN mkdir -p /opt/connector -WORKDIR /opt/connector - -COPY requirements.txt . -RUN pip3 install -r requirements.txt --no-cache-dir -COPY . /opt/connector - -EXPOSE 6000 - -RUN chmod +x /opt/connector/entrypoint.sh -ENTRYPOINT ["/opt/connector/entrypoint.sh"] diff --git a/api/node/connector/connector.py b/api/node/connector/connector.py deleted file mode 100644 index 117448d4b..000000000 --- a/api/node/connector/connector.py +++ /dev/null @@ -1,227 +0,0 @@ -import eventlet -eventlet.monkey_patch() - -import os -import re -import time -import random -import logging -import subprocess -from datetime import date - -from flask import Flask, request -from flask_socketio import SocketIO, emit - -DATA_PATH = os.environ.get('DATA_PATH') -DOCKER_IMAGE_PATH = os.environ.get('DOCKER_IMAGE_PATH') -SOCKET_URL = os.environ.get('SOCKET_URL') -FRONTEND_URL = os.environ.get('FRONTEND_URL') -GITHUB_REPO = os.environ.get('GITHUB_REPO') - -app = Flask(__name__) -socketio = SocketIO(app, cors_allowed_origins=[SOCKET_URL, FRONTEND_URL, "http://fim_node_connector:6000"]) - -shared_data = { - 'handler_sid': None, - 'updater_sid': None -} - -@app.route('/') -def main(): - return '

Nothing to see here....

' - -@socketio.on('connect') -def ws_conn(): - print('user connected!') - emit('is_connected', True) - -@socketio.on('disconnect') -def ws_disconn(): - print('user disconnected!') - emit('is_connected', False) - -@socketio.on('update') -def ws_update(data): - emit('client_update', data, broadcast=True) - -@socketio.on('output_handler_connected') -def ws_output_handler_connected(): - print('handler_sid: ', request.sid) - shared_data['handler_sid'] = request.sid - emit('retry_saving_files', room=shared_data['updater_sid']) - -@socketio.on('updater_connected') -def ws_updater_connected(): - print('updater_sid: ', request.sid) - shared_data['updater_sid'] = request.sid - emit('retry_saving_files', room=shared_data['updater_sid']) - -@socketio.on('ready_for_output_handler') -def ws_ready_for_output_handler(data): - job_name = data['job_name'] - path = data['path'] - chunk_index = data['chunk_index'] - - if shared_data['handler_sid'] == None: - print("output handler not connected!") - emit('retry_saving_files') - return - - # Split up path into parts for the output handler - path_parts = re.search(rf"(.+)/(.+)", path) - directory_path = path_parts.group(1) - file_name = path_parts.group(2) - - file_read_start = time.time() - with open(path, "rb") as binary_file: - # Read and emit file chunk by chunk (50MB at a time) - binary_file.seek(chunk_index * 52428800) - file_chunk = binary_file.read(52428800) - - if len(file_chunk) == 0: - print('End of File') - emit('file_saved', { - 'job_name': job_name, - 'file_path': path - }, room=shared_data['updater_sid']) - return - - print("Sending to output handler", path, "Chunk:", chunk_index) - emit('new_job_outputs', { - 'job_name': job_name, - 'directory_path': directory_path, - 'file_name': file_name, - 'file_chunk': file_chunk, - 'chunk_index': chunk_index - }, room=shared_data['handler_sid']) - -@socketio.on('output_handler_finished_file_chunk') -def output_handler_finished_file_chunk(data): - job_name = data['job_name'] - file_path = data['file_path'] - - print('done saving chunk', job_name, file_path) - emit('file_chunk_saved', { - 'job_name': job_name, - 'file_path': file_path, - }, room=shared_data['updater_sid']) - -@socketio.on('new_job') -def ws_new_job(job_params): - job_type = job_params['job_type'] - - if job_type == 'fim_run': - validation_errors = [] - - # Get Preset Option - preset = job_params['preset'] - - # Validate Hucs Name Option - if preset == 'custom': - hucs_raw = job_params['hucs'].replace(',', ' ').split() - parallel_jobs = len(hucs_raw) - hucs_type = len(hucs_raw[0]) - hucs = ' '.join(hucs_raw) - invalid_hucs = re.search('[^0-9 ]', hucs) - if invalid_hucs: validation_errors.append('Invalid Huc(s)') - else: - hucs = f"/data/inputs/huc_lists/{preset}" - parallel_jobs = 0 - hucs_type = 0 - - # Validate Git Branch Option - branch = '' - branch_exists = subprocess.run(['git', 'ls-remote', '--heads', GITHUB_REPO, job_params['git_branch'].replace(' ', '_')], stdout=subprocess.PIPE).stdout.decode('utf-8') - if branch_exists: branch = job_params['git_branch'].replace(' ', '_') - else: validation_errors.append('Git Branch Does Not Exist') - - # Validate Extent Option - valid_extents = ['FR', 'MS'] - extents = [] - for extent in job_params['extents']: - if extent in valid_extents: - extents.append(extent) - else: - validation_errors.append('Invalid Extent Option') - - # Validate Configuration Option - config_path = '' - if job_params['configuration'] == 'default': config_path = './foss_fim/config/params_template.env' - elif job_params['configuration'] == 'calibrated': config_path = './foss_fim/config/params_calibrated.env' - else: validation_errors.append('Invalid Configuration Option') - - # Validate Dev Run Option - if job_params['dev_run'] : dev_run = True - else: dev_run = False - - # Validate Viz Run Option - if job_params['viz_run'] : viz_run = True - else: viz_run = False - - if len(validation_errors) == 0: - for extent in extents: - # Validate Job Name Option - job_name = f"apijob_{job_params['job_name'].replace(' ', '_')[0:50]}_fim_run_{extent.lower()}{'_c' if job_params['configuration'] == 'calibrated' else ''}{'_v' if viz_run == True else ''}_apijob_{branch}_{date.today().strftime('%d%m%Y')}_{random.randint(0, 99999)}" - print(f"adding job {job_name} {branch} {preset} {hucs} {parallel_jobs} {hucs_type} {extent.lower()} {config_path} {dev_run} {viz_run}") - emit('add_job_to_queue', { - 'job_type': 'fim_run', - 'job_name': job_name, - 'branch': branch, - 'hucs': hucs, - 'parallel_jobs': parallel_jobs, - 'hucs_type': hucs_type, - 'extent': extent, - 'config_path': config_path, - 'dev_run': dev_run, - 'viz_run': viz_run, - }, room=shared_data['updater_sid']) - print('fim_run job added') - emit('job_added', 'fim_run') - else: - emit('validation_errors', validation_errors) - - elif job_type == 'release': - job_version_major = job_params['job_version_major'] - job_version_minor = job_params['job_version_minor'] - job_version_patch = job_params['job_version_patch'] - - # TODO: validate version number - - job_name_base = f"fim_3_{job_version_major}_{job_version_minor}_{job_version_patch}" - - prev_job_version_major = job_params['prev_job_version_major'] - prev_job_version_minor = job_params['prev_job_version_minor'] - prev_job_version_patch = job_params['prev_job_version_patch'] - - prev_version_base = f"fim_3_{prev_job_version_major}_{prev_job_version_minor}_{prev_job_version_patch}" - - huc_lists = ['/data/inputs/huc_lists/included_huc8.lst', '/data/inputs/huc_lists/included_huc8_ms.lst'] - extents = ['FR', 'MS'] - - for hucs, extent in zip(huc_lists, extents): - # Validate Job Name Option - prev_version = f"{prev_version_base}_{extent.lower()}_c" - job_name = f"apijob_{job_name_base}_{extent.lower()}_c_apijob_dev_{date.today().strftime('%d%m%Y')}_{random.randint(0, 99999)}" - print(f"adding job {job_name} {hucs} {extent.lower()}") - emit('add_job_to_queue', { - 'job_type': 'release', - 'job_name': job_name, - 'hucs': hucs, - 'extent': extent, - 'previous_major_fim_version': prev_version - }, room=shared_data['updater_sid']) - print('release job added') - emit('job_added', 'release') - - @socketio.on('cancel_job') - def ws_cancel_job(job_params): - # Validate Job Name Option - job_name = job_params['job_name'] - - emit('remove_job_from_queue', {'job_name': job_name}, room=shared_data['updater_sid']) - print('job canceled') - emit('job_canceled', 'fim_run') - - -if __name__ == '__main__': - socketio.run(app, host="0.0.0.0", port="6000") diff --git a/api/node/connector/entrypoint.sh b/api/node/connector/entrypoint.sh deleted file mode 100755 index d6d853d6a..000000000 --- a/api/node/connector/entrypoint.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -cd /opt/connector/ -echo "Starting Connector" -python3 ./connector.py diff --git a/api/node/connector/requirements.txt b/api/node/connector/requirements.txt deleted file mode 100644 index e0b0b19de..000000000 --- a/api/node/connector/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -flask==1.1.2 -flask-socketio==5.0.0 -eventlet==0.31.0 diff --git a/api/node/docker-compose-dev.yml b/api/node/docker-compose-dev.yml deleted file mode 100644 index 9823afad9..000000000 --- a/api/node/docker-compose-dev.yml +++ /dev/null @@ -1,38 +0,0 @@ -version: '3.5' -services: - fim_node_connector: - image: fim_node_connector - build: - context: ./connector - container_name: fim_node_connector - env_file: - - .env - restart: always - expose: - - "6000" - networks: - fim: - aliases: - - fimnodeconnector - volumes: - - ${DATA_PATH}:/data/ - - /var/run/docker.sock:/var/run/docker.sock - - ./connector/connector.py:/opt/connector/connector.py - fim_node_updater: - image: fim_node_updater - build: - context: ./updater - container_name: fim_node_updater - restart: always - depends_on: - - fim_node_connector - networks: - - fim - volumes: - - ${DATA_PATH}:/data/ - - /var/run/docker.sock:/var/run/docker.sock - - ./updater/updater.py:/opt/updater/updater.py -networks: - fim: - name: fim - \ No newline at end of file diff --git a/api/node/docker-compose-prod.yml b/api/node/docker-compose-prod.yml deleted file mode 100644 index f9787ab59..000000000 --- a/api/node/docker-compose-prod.yml +++ /dev/null @@ -1,53 +0,0 @@ -version: '3.5' -services: - fim_node_connector: - image: fim_node_connector - build: - context: ./connector - container_name: fim_node_connector - env_file: - - .env - restart: always - expose: - - "6000" - networks: - fim: - aliases: - - fimnodeconnector - volumes: - - ${DATA_PATH}:/data/ - - /var/run/docker.sock:/var/run/docker.sock - fim_node_updater: - image: fim_node_updater - build: - context: ./updater - container_name: fim_node_updater - env_file: - - .env - restart: always - depends_on: - - fim_node_connector - networks: - - fim - volumes: - - ${DATA_PATH}:/data/ - - /var/run/docker.sock:/var/run/docker.sock - fim_nginx: - image: nginx - container_name: fim_nginx - restart: always - depends_on: - - fim_node_connector - ports: - - "80:80" - volumes: - - ./nginx.conf:/etc/nginx/nginx.conf:ro - networks: - - fim - command: [nginx] - -networks: - fim: - name: fim - - \ No newline at end of file diff --git a/api/node/nginx.conf b/api/node/nginx.conf deleted file mode 100644 index c2fc935bd..000000000 --- a/api/node/nginx.conf +++ /dev/null @@ -1,64 +0,0 @@ -user nginx; -worker_processes 1; -pid /var/run/nginx.pid; -daemon off; - -events { - worker_connections 512; - # multi_accept on; -} - -http { - sendfile on; - tcp_nopush on; - tcp_nodelay on; - proxy_connect_timeout 300; - proxy_send_timeout 300; - proxy_read_timeout 90m; - send_timeout 300; - keepalive_timeout 65; - types_hash_max_size 2048; - - include /etc/nginx/mime.types; - default_type application/octet-stream; - - log_format upstream_time '$remote_addr - $remote_user [$time_local] ' - '"$request" $status $body_bytes_sent ' - '"$http_referer" "$http_user_agent"' - 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"'; - - access_log /var/log/nginx/access.log upstream_time; - error_log /var/log/nginx/error.log warn; - - server { - listen 80; - client_header_buffer_size 64k; - large_client_header_buffers 4 64k; - server_name _; - root /var/www/; - - gzip on; - gzip_types application/json; - proxy_http_version 1.1; - - location /stats/nginx { - stub_status on; - } - - location / { - proxy_pass http://fim_node_connector:6000/; - proxy_set_header Host $host; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /socket.io { - proxy_http_version 1.1; - proxy_buffering off; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "Upgrade"; - proxy_pass http://fim_node_connector:6000/socket.io; - proxy_set_header Host $host; - proxy_set_header X-Forwarded-Proto $scheme; - } - } -} \ No newline at end of file diff --git a/api/node/updater/Dockerfile b/api/node/updater/Dockerfile deleted file mode 100644 index d62a77652..000000000 --- a/api/node/updater/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM docker:20.10.2-dind - -RUN apk add --no-cache python3 python3-dev py3-pip build-base openssl-dev libffi-dev git - -ENV PYTHONUNBUFFERED 1 -RUN mkdir -p /opt/updater -WORKDIR /opt/updater - -COPY requirements.txt . -RUN pip3 install -r requirements.txt --no-cache-dir -COPY . /opt/updater - -RUN chmod +x /opt/updater/entrypoint.sh -ENTRYPOINT ["/opt/updater/entrypoint.sh"] diff --git a/api/node/updater/entrypoint.sh b/api/node/updater/entrypoint.sh deleted file mode 100755 index 0f74f2541..000000000 --- a/api/node/updater/entrypoint.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -cd /opt/updater/ -echo "Starting Update Loop" -python3 ./updater.py diff --git a/api/node/updater/requirements.txt b/api/node/updater/requirements.txt deleted file mode 100644 index a2217b4e9..000000000 --- a/api/node/updater/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -python-engineio[client]==4.0.0 -python-socketio[client]==5.0.3 \ No newline at end of file diff --git a/api/node/updater/updater.py b/api/node/updater/updater.py deleted file mode 100644 index 90fc0da71..000000000 --- a/api/node/updater/updater.py +++ /dev/null @@ -1,530 +0,0 @@ -import os -import re -import glob -import math -import time -import json -import shutil -import logging -import subprocess - -import socketio - -DATA_PATH = os.environ.get('DATA_PATH') -DOCKER_IMAGE_PATH = os.environ.get('DOCKER_IMAGE_PATH') -GITHUB_REPO = os.environ.get('GITHUB_REPO') -MAX_ALLOWED_CPU_CORES = int(os.environ.get('MAX_ALLOWED_CPU_CORES')) - -shared_data = { - 'connected': False, - 'current_saving_job': '' -} - -buffer_jobs = [] -buffer_remove_jobs = [] -current_jobs = {} -if os.path.exists('/data/outputs/current_jobs.json'): - with open('/data/outputs/current_jobs.json') as f: - current_jobs = json.load(f) - for job_name in current_jobs.keys(): - if 'is_actively_saving' in current_jobs[job_name] and current_jobs[job_name]['is_actively_saving'] == True: - shared_data['current_saving_job'] = current_jobs[job_name] - - -# Get all the current running jobs from the list of docker containers, store that data in a dictionary -# along with any other needed metadata (like if it's still running, doing post processing, copying outputs -# to its destination, etc), and then update the websocket server of the status of the jobs. -def update_loop(): - while True: - # If there are no current jobs, just check every 10 seconds till there is - if len(current_jobs.keys()) == 0: sio.sleep(10) - - while len(buffer_jobs) > 0: - new_job = buffer_jobs.pop() - current_jobs[new_job['job_name']] = new_job - - while len(buffer_remove_jobs) > 0: - job_to_remove = buffer_remove_jobs.pop() - if job_to_remove['job_name'] in current_jobs: - current_jobs[job_to_remove['job_name']]['status'] = 'Cancelled' - - # Get list of current docker containers that are fim run jobs - # docker ps --all --filter=name=apijob --format='{{.Names}} {{.State}}' - containers_raw = os.popen("docker ps --all --filter=name=apijob --format='{{.Names}} {{.State}}'").read().splitlines() - containers_split = [ line.split() for line in containers_raw ] - container_states = { name: state for (name, state) in containers_split } - - jobs_to_delete = [] - for job_name in current_jobs.keys(): - sio.sleep(0) - if job_name in container_states: - current_jobs[job_name]['container_state'] = container_states[job_name] - - # If the user chooses to cancel the job early - if current_jobs[job_name]['status'] == 'Cancelled': - # If the docker container is running, stop and remove it - if current_jobs[job_name]['time_elapsed'] > 0 and current_jobs[job_name]['container_state'] != 'exited': - subprocess.call(f"docker container stop {job_name}", shell=True) - subprocess.call(f"docker container rm {job_name}", shell=True) - - print("output_handler finished, deleted temp source files and output files") - temp_path = f"/data/temp/{job_name}" - if os.path.isdir(temp_path): - shutil.rmtree(temp_path) - - outputs_path = f"/data/outputs/{current_jobs[job_name]['nice_name']}" - if os.path.isdir(outputs_path): - shutil.rmtree(outputs_path) - - jobs_to_delete.append(job_name) - - active_statuses = [ - 'In Progress', - 'Ready for Synthesize Test Cases', - 'Running Synthesize Test Cases', - 'Ready for Eval Plots', - 'Running Eval Plots', - 'Ready for Generate Categorical FIM', - 'Running Generate Categorical FIM', - 'Ready to Save File', - 'Saving File' - ] - # TODO: separate list for queuing so that one job can save and another run - - # Update the time elapsed for all jobs that are currently in progress or saving outputs - if current_jobs[job_name]['status'] in active_statuses: - current_jobs[job_name]['time_elapsed'] = math.ceil(time.time() - current_jobs[job_name]['time_started']) - - # TODO: While job is in progress, keep track of how many hucs are done and overall progress % - - # Once resources recome available, start a new job that is in queue - - if current_jobs[job_name]['status'] == 'In Queue': - current_jobs[job_name]['time_started'] = time.time() - - total_active_cores = 0 - for j in current_jobs.keys(): - if current_jobs[j]['status'] in active_statuses: - # This is to account for the fact that HUC6's take a lot more resources to run. - # (not necessarily cpu cores but rather RAM, so this artificially reduces how many jobs can run when HUC6's - # are running) - # HACK: this is more of a temporary solution until we no longer need to run HUC6's - if current_jobs[j]['hucs_type'] == '6': - total_active_cores += current_jobs[j]['parallel_jobs'] * 5 - else: - total_active_cores += current_jobs[j]['parallel_jobs'] - - # Machine has enough resources to run a new job - potential_active_cores = 0 - if current_jobs[job_name]['hucs_type'] == '6': - potential_active_cores = current_jobs[job_name]['parallel_jobs'] * 5 + total_active_cores - else: - potential_active_cores = current_jobs[job_name]['parallel_jobs'] + total_active_cores - - # print(f"Checking whether a new job can start {potential_active_cores} <= {MAX_ALLOWED_CPU_CORES}") - # print(potential_active_cores <= MAX_ALLOWED_CPU_CORES) - if potential_active_cores <= MAX_ALLOWED_CPU_CORES: - job_name = current_jobs[job_name]['job_name'] - nice_name = current_jobs[job_name]['nice_name'] - branch = current_jobs[job_name]['branch'] - hucs = current_jobs[job_name]['hucs'] - parallel_jobs = current_jobs[job_name]['parallel_jobs'] - extent = current_jobs[job_name]['extent'] - config_path = current_jobs[job_name]['config_path'] - dev_run = current_jobs[job_name]['dev_run'] - viz_run = current_jobs[job_name]['viz_run'] - - # Clone github repo, with specific branch, to a temp folder - print(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name} && chmod -R 777 {job_name} && cp .env {job_name}/tools/.env') - subprocess.call(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name} && chmod -R 777 {job_name} && cp .env {job_name}/tools/.env', shell=True) - - # Kick off the new job as a docker container with the new cloned repo as the volume - print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {nice_name} -o {'' if dev_run else '-p'} {'-v' if viz_run else ''} -j {parallel_jobs}") - subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {nice_name} -o {'' if dev_run else '-p'} {'-v' if viz_run else ''} -j {parallel_jobs}", shell=True) - current_jobs[job_name]['status'] = 'In Progress' - - # Once the Docker container is done, either save outputs or run release - if current_jobs[job_name]['status'] == 'In Progress' and current_jobs[job_name]['container_state'] == 'exited': - - # Get container exit code, get the docker log, and then remove container - exit_code_raw = os.popen(f"docker inspect {job_name}" + " --format='{{.State.ExitCode}}'").read().splitlines() - - print("Exit code") - print(exit_code_raw) - print(exit_code_raw[0]) - try: - print(int(exit_code_raw[0])) - except: - pass - - exit_code = int(exit_code_raw[0]) - current_jobs[job_name]['exit_code'] = exit_code - subprocess.call(f"docker logs {job_name} >& /data/outputs/{current_jobs[job_name]['nice_name']}/logs/docker.log", shell=True) - subprocess.call(f"docker container rm {job_name}", shell=True) - - if current_jobs[job_name]['job_type'] == 'fim_run': - for path, folders, files in os.walk(f"/data/outputs/{current_jobs[job_name]['nice_name']}"): - for file in files: - current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = 0 - - current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) - current_jobs[job_name]['status'] = 'Ready to Save File' - elif current_jobs[job_name]['job_type'] == 'release': - # Move outputs to previous_fim and set them to be copied to the dev machine - if os.path.isdir(f"/data/previous_fim/{current_jobs[job_name]['nice_name']}"): - shutil.rmtree(f"/data/previous_fim/{current_jobs[job_name]['nice_name']}") - if os.path.isdir(f"/data/outputs/{current_jobs[job_name]['nice_name']}"): shutil.move(f"/data/outputs/{current_jobs[job_name]['nice_name']}", '/data/previous_fim') - for path, folders, files in os.walk(f"/data/previous_fim/{current_jobs[job_name]['nice_name']}"): - for file in files: - current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = 0 - current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) - current_jobs[job_name]['status'] = 'Ready for Synthesize Test Cases' - - if current_jobs[job_name]['status'] == 'Ready for Synthesize Test Cases': - job_name = current_jobs[job_name]['job_name'] - nice_name = current_jobs[job_name]['nice_name'] - parallel_jobs = current_jobs[job_name]['parallel_jobs'] - - # Kick off the new job as a docker container to run eval metrics - print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/synthesize_test_cases.py -c PREV --fim-version {nice_name} --job-number {parallel_jobs} -m /data/test_cases/metrics_library/all_official_versions.csv") - subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/synthesize_test_cases.py -c PREV --fim-version {nice_name} --job-number {parallel_jobs} -m /data/test_cases/metrics_library/all_official_versions.csv", shell=True) - current_jobs[job_name]['container_state'] = 'running' - current_jobs[job_name]['status'] = 'Running Synthesize Test Cases' - - # Once the Docker container is done, save outputs - if current_jobs[job_name]['status'] == 'Running Synthesize Test Cases' and current_jobs[job_name]['container_state'] == 'exited': - # Get container exit code, get the docker log, and then remove container - exit_code_raw = os.popen(f"docker inspect {job_name}" + " --format='{{.State.ExitCode}}'").read().splitlines() - - print("Exit code") - print(exit_code_raw) - print(exit_code_raw[0]) - try: - print(int(exit_code_raw[0])) - except: - pass - - exit_code = int(exit_code_raw[0]) - current_jobs[job_name]['exit_code'] = exit_code - subprocess.call(f"docker logs {job_name} >& /data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/synthesize_test_cases_docker.log", shell=True) - subprocess.call(f"docker container rm {job_name}", shell=True) - - current_jobs[job_name]['output_files_saved']['/data/test_cases/metrics_library/all_official_versions.csv'] = 0 - current_jobs[job_name]['output_files_saved'][f"/data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/synthesize_test_cases_docker.log"] = 0 - current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) - current_jobs[job_name]['status'] = 'Ready for Eval Plots' - - if current_jobs[job_name]['status'] == 'Ready for Eval Plots': - job_name = current_jobs[job_name]['job_name'] - nice_name = current_jobs[job_name]['nice_name'] - previous_major_fim_version = current_jobs[job_name]['previous_major_fim_version'] - - # Kick off the new job as a docker container to run eval plots - print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/eval_plots.py -m /data/test_cases/metrics_library/all_official_versions.csv -w /data/test_cases/metrics_library/all_official_versions_viz -v {previous_major_fim_version} {nice_name} -sp") - subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/eval_plots.py -m /data/test_cases/metrics_library/all_official_versions.csv -w /data/test_cases/metrics_library/all_official_versions_viz -v {previous_major_fim_version} {nice_name} -sp", shell=True) - current_jobs[job_name]['container_state'] = 'running' - current_jobs[job_name]['status'] = 'Running Eval Plots' - - # Once the Docker container is done, save outputs - if current_jobs[job_name]['status'] == 'Running Eval Plots' and current_jobs[job_name]['container_state'] == 'exited': - # Get container exit code, get the docker log, and then remove container - exit_code_raw = os.popen(f"docker inspect {job_name}" + " --format='{{.State.ExitCode}}'").read().splitlines() - - print("Exit code") - print(exit_code_raw) - print(exit_code_raw[0]) - try: - print(int(exit_code_raw[0])) - except: - pass - - exit_code = int(exit_code_raw[0]) - current_jobs[job_name]['exit_code'] = exit_code - subprocess.call(f"docker logs {job_name} >& /data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/eval_plots_docker.log", shell=True) - subprocess.call(f"docker container rm {job_name}", shell=True) - - current_jobs[job_name]['output_files_saved'][f"/data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/eval_plots_docker.log"] = 0 - for path, folders, files in os.walk('/data/test_cases/metrics_library/all_official_versions_viz'): - for file in files: - current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = 0 - current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) - - current_jobs[job_name]['status'] = 'Ready for Generate Categorical FIM' - - if current_jobs[job_name]['status'] == 'Ready for Generate Categorical FIM': - job_name = current_jobs[job_name]['job_name'] - nice_name = current_jobs[job_name]['nice_name'] - parallel_jobs = current_jobs[job_name]['parallel_jobs'] - - # Kick off the new job as a docker container to run CatFIM - print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/generate_categorical_fim.py -f {nice_name} -j {parallel_jobs}") - subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/generate_categorical_fim.py -f /data/previous_fim/{nice_name} -j {parallel_jobs}", shell=True) - current_jobs[job_name]['container_state'] = 'running' - current_jobs[job_name]['status'] = 'Running Generate Categorical FIM' - - # Once the Docker container is done, save outputs - if current_jobs[job_name]['status'] == 'Running Generate Categorical FIM' and current_jobs[job_name]['container_state'] == 'exited': - # Get container exit code, get the docker log, and then remove container - exit_code_raw = os.popen(f"docker inspect {job_name}" + " --format='{{.State.ExitCode}}'").read().splitlines() - - print("Exit code") - print(exit_code_raw) - print(exit_code_raw[0]) - try: - print(int(exit_code_raw[0])) - except: - pass - - exit_code = int(exit_code_raw[0]) - current_jobs[job_name]['exit_code'] = exit_code - subprocess.call(f"docker logs {job_name} >& /data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/generate_categorical_fim.log", shell=True) - subprocess.call(f"docker container rm {job_name}", shell=True) - - os.makedirs(f"/data/catfim/{current_jobs[job_name]['nice_name']}_temp") - if os.path.isdir(f"/data/catfim/{current_jobs[job_name]['nice_name']}"): - for path in glob.glob(f"/data/catfim/{current_jobs[job_name]['nice_name']}/**/mapping/*", recursive=True): - if not os.path.isdir(path): - shutil.move(path, f"/data/catfim/{current_jobs[job_name]['nice_name']}_temp") - filename = os.path.basename(path) - current_jobs[job_name]['output_files_saved'][os.path.join(f"/data/catfim/{current_jobs[job_name]['nice_name']}", filename)] = 0 - shutil.rmtree(f"/data/catfim/{current_jobs[job_name]['nice_name']}") - shutil.move(f"/data/catfim/{current_jobs[job_name]['nice_name']}_temp", f"/data/catfim/{current_jobs[job_name]['nice_name']}") - - current_jobs[job_name]['output_files_saved'][f"/data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/generate_categorical_fim.log"] = 0 - current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) - current_jobs[job_name]['status'] = 'Ready to Save File' - - # Trigger connector to transmit the outputs to the output_handler - # If the output_handler is offline, it will keep retrying until the output_handler is online - if current_jobs[job_name]['status'] == 'Ready to Save File' and (shared_data['current_saving_job'] == '' or shared_data['current_saving_job'] == current_jobs[job_name]): - print(f"{job_name} ready for output handler") - - shared_data['current_saving_job'] = current_jobs[job_name] - current_jobs[job_name]['is_actively_saving'] = True - output_to_save = {} - for path in current_jobs[job_name]['output_files_saved']: - if current_jobs[job_name]['output_files_saved'][path] != -1: - output_to_save = {'path': path, 'chunk_index': current_jobs[job_name]['output_files_saved'][path]} - - if output_to_save != {}: - if shared_data['connected']: - sio.emit('ready_for_output_handler', { - 'nice_name': current_jobs[job_name]['nice_name'], - 'job_name': job_name, - 'path': output_to_save['path'], - 'chunk_index': output_to_save['chunk_index'] - }) - current_jobs[job_name]['status'] = 'Saving File' - - # Once the output_handler is done getting the outputs and the connector deletes the temp repo source, - # mark as completed - if current_jobs[job_name]['status'] == 'Saving File': - is_done = True - for path in current_jobs[job_name]['output_files_saved']: - if current_jobs[job_name]['output_files_saved'][path] != -1: - is_done = False - break - - if is_done: - print("output_handler finished, deleted temp source files and output files") - temp_path = f"/data/temp/{job_name}" - if os.path.isdir(temp_path): - shutil.rmtree(temp_path) - - outputs_path = f"/data/outputs/{current_jobs[job_name]['nice_name']}" - if current_jobs[job_name]['job_type'] == 'release': - outputs_path = f"/data/previous_fim/{current_jobs[job_name]['nice_name']}" - destination = f"/data/viz/{current_jobs[job_name]['nice_name']}" - - if os.path.isdir(destination): - shutil.rmtree(destination) - if os.path.isdir(f"{outputs_path}/aggregate_fim_outputs"): shutil.move(f"{outputs_path}/aggregate_fim_outputs", destination) - if os.path.isdir(f"{outputs_path}/logs"): shutil.move(f"{outputs_path}/logs", f"{destination}/logs") - if os.path.isdir(f"/data/catfim/{current_jobs[job_name]['nice_name']}"): shutil.move(f"/data/catfim/{current_jobs[job_name]['nice_name']}", f"{destination}/catfim") - - if os.path.isdir(outputs_path): - shutil.rmtree(outputs_path) - if current_jobs[job_name]['job_type'] == 'release': - if os.path.isdir(f"/data/catfim/{current_jobs[job_name]['nice_name']}"): - shutil.rmtree(f"/data/catfim/{current_jobs[job_name]['nice_name']}") - try: - os.makedirs(outputs_path) - except: - pass - - current_jobs[job_name]['status'] = 'Completed' if current_jobs[job_name]['exit_code'] == 0 else 'Error' - - shared_data['current_saving_job'] = '' - current_jobs[job_name]['is_actively_saving'] = False - print(f"{job_name} completed") - # TODO: Insert Slack notification here for finished job - - # Remove job from list after it's been completed for more than 15 minutes - if (current_jobs[job_name]['status'] == 'Completed' or current_jobs[job_name]['status'] == 'Error') and \ - time.time() >= current_jobs[job_name]['time_started'] + current_jobs[job_name]['time_elapsed'] + 900: - print(f"{job_name} removed from job list") - jobs_to_delete.append(job_name) - - for job in jobs_to_delete: - del current_jobs[job] - - presets_list = [] - for path, folders, files in os.walk(f"/data/inputs/huc_lists"): - for file in files: - presets_list.append(file) - - # Send updates to the connector and write job progress to file - job_updates = [ { - 'job_name': job['job_name'], - 'nice_name': job['nice_name'], - 'status': job['status'], - 'exit_code': job['exit_code'], - 'time_elapsed': job['time_elapsed'], - 'total_output_files_length': job['total_output_files_length'], - 'current_output_files_saved_length': job['current_output_files_saved_length'], - } for job in current_jobs.values()] - - if shared_data['connected']: sio.emit('update', {'jobUpdates': job_updates, 'presetsList': presets_list}) - with open('/data/outputs/current_jobs.json.temp', 'w') as f: - json.dump(current_jobs, f) - shutil.move('/data/outputs/current_jobs.json.temp', '/data/outputs/current_jobs.json') - -sio = socketio.Client() - -@sio.event -def connect(): - print("Update Loop Connected!") - sio.emit('updater_connected') - shared_data['connected'] = True - -@sio.event -def disconnect(): - print('disconnected from server') - shared_data['connected'] = False - -@sio.on('add_job_to_queue') -def ws_add_job_to_queue(data): - job_type = data['job_type'] - if job_type == 'fim_run': - job_name = data['job_name'] - branch = data['branch'] - hucs = data['hucs'] - parallel_jobs = data['parallel_jobs'] - hucs_type = data['hucs_type'] - extent = data['extent'] - config_path = data['config_path'] - dev_run = data['dev_run'] - viz_run = data['viz_run'] - - # This is a preset list instead of a custom list of hucs - if hucs_type == 0: - if os.path.exists(hucs): - with open(hucs, "r") as preset_file: - hucs_raw = preset_file.read().splitlines() - parallel_jobs = len(hucs_raw) - hucs_type = len(hucs_raw[0]) - - parallel_jobs = parallel_jobs if parallel_jobs <= MAX_ALLOWED_CPU_CORES else MAX_ALLOWED_CPU_CORES - - buffer_jobs.append({ - 'job_type': job_type, - 'job_name': job_name, - 'branch': branch, - 'hucs': hucs, - 'parallel_jobs': parallel_jobs, - 'hucs_type': hucs_type, - 'extent': extent, - 'config_path': config_path, - 'dev_run': dev_run, - 'viz_run': viz_run, - 'nice_name': re.search(r"apijob_(.+)_apijob.+", job_name).group(1), - 'status': 'In Queue', - 'time_started': 0, - 'time_elapsed': 0, - 'output_files_saved': {}, - 'total_output_files_length': 0, - 'current_output_files_saved_length': 0, - 'output_files_saved': {}, - 'container_state': 'running', - 'exit_code': 0, - 'is_actively_saving': False - }) - elif job_type == 'release': - job_name = data['job_name'] - hucs = data['hucs'] - extent = data['extent'] - branch = 'dev' - config_path = './foss_fim/config/params_template.env' - dev_run = False - viz_run = True - previous_major_fim_version = data['previous_major_fim_version'] - - if os.path.exists(hucs): - with open(hucs, "r") as preset_file: - hucs_raw = preset_file.read().splitlines() - parallel_jobs = len(hucs_raw) - hucs_type = len(hucs_raw[0]) - - parallel_jobs = parallel_jobs if parallel_jobs <= MAX_ALLOWED_CPU_CORES else MAX_ALLOWED_CPU_CORES - - buffer_jobs.append({ - 'job_type': job_type, - 'job_name': job_name, - 'branch': branch, - 'hucs': hucs, - 'parallel_jobs': parallel_jobs, - 'hucs_type': hucs_type, - 'extent': extent, - 'config_path': config_path, - 'dev_run': dev_run, - 'viz_run': viz_run, - 'nice_name': re.search(r"apijob_(.+)_apijob.+", job_name).group(1), - 'status': 'In Queue', - 'time_started': 0, - 'time_elapsed': 0, - 'output_files_saved': {}, - 'total_output_files_length': 0, - 'current_output_files_saved_length': 0, - 'output_files_saved': {}, - 'container_state': 'running', - 'exit_code': 0, - 'is_actively_saving': False, - 'previous_major_fim_version': previous_major_fim_version - }) - -@sio.on('remove_job_from_queue') -def ws_remove_job_from_queue(data): - job_name = data['job_name'] - buffer_remove_jobs.append({'job_name': job_name}) - -# If the output_handler is offline, try the saving process again -@sio.on('retry_saving_files') -def ws_retry_saving_files(): - print('saving files failed, retrying') - for job_name in current_jobs: - if current_jobs[job_name]['status'] == "Saving File": - for path in current_jobs[job_name]['output_files_saved']: - if current_jobs[job_name]['output_files_saved'][path] != -1: - current_jobs[job_name]['output_files_saved'][path] = 0 - - current_jobs[job_name]['status'] = 'Ready to Save File' - -@sio.on('file_chunk_saved') -def ws_file_chunk_saved(data): - job_name = data['job_name'] - file_path = data['file_path'] - - current_jobs[job_name]['output_files_saved'][file_path] += 1 - current_jobs[job_name]['status'] = 'Ready to Save File' - -@sio.on('file_saved') -def ws_file_saved(data): - job_name = data['job_name'] - file_path = data['file_path'] - - current_jobs[job_name]['output_files_saved'][file_path] = -1 - current_jobs[job_name]['current_output_files_saved_length'] += 1 - current_jobs[job_name]['status'] = 'Ready to Save File' - -sio.connect('http://fim_node_connector:6000/') -update_loop() diff --git a/config/aws_s3_put_fim3_hydrovis_whitelist.lst b/config/aws_s3_put_fim3_hydrovis_whitelist.lst new file mode 100644 index 000000000..b81404832 --- /dev/null +++ b/config/aws_s3_put_fim3_hydrovis_whitelist.lst @@ -0,0 +1,4 @@ +hydroTable{}.csv +gw_catchments_reaches_filtered_addedAttributes{}.tif +rem_zeroed_masked{}.tif +usgs_elev_table.csv \ No newline at end of file diff --git a/config/aws_s3_put_fim4_hydrovis_whitelist.lst b/config/aws_s3_put_fim4_hydrovis_whitelist.lst new file mode 100644 index 000000000..0b1345ef3 --- /dev/null +++ b/config/aws_s3_put_fim4_hydrovis_whitelist.lst @@ -0,0 +1,5 @@ +hydroTable{}.csv +gw_catchments_reaches_filtered_addedAttributes{}.tif +rem_zeroed_masked{}.tif +usgs_elev_table.csv +crosswalk_table.csv \ No newline at end of file diff --git a/config/calb_db_keys_template.env b/config/calb_db_keys_template.env new file mode 100755 index 000000000..a6287bb5b --- /dev/null +++ b/config/calb_db_keys_template.env @@ -0,0 +1,12 @@ +#!/bin/bash + +# SENSITIVE INFO!!! NOT FOR PUBLIC DISSEMENATION + +# Notes: +# - The DB_Host name is the name of the server running the calibration db. +# - More than one computer can call across the network to one calibraion db server. +# - You can change the name of the database if you like, but not recommended. +CALIBRATION_DB_HOST= +CALIBRATION_DB_NAME=calibration +CALIBRATION_DB_USER_NAME= +CALIBRATION_DB_PASS= diff --git a/config/deny_branch_unittests.lst b/config/deny_branch_unittests.lst new file mode 100644 index 000000000..aec38c3b9 --- /dev/null +++ b/config/deny_branch_unittests.lst @@ -0,0 +1,63 @@ +# List of files for branches to delete +# Use comment to allow list the file +# Use curly braces to denote branch_id +agree_binary_bufgrid.tif +agree_bufgrid.tif +agree_bufgrid_allo.tif +agree_bufgrid_dist.tif +agree_bufgrid_zerod.tif +agree_smogrid.tif +agree_smogrid_allo.tif +agree_smogrid_dist.tif +agree_smogrid_zerod.tif +catch_list_{}.txt +coordFile_{}.txt +crosswalk_table_{}.csv +#demDerived_reaches_{}.dbf +#demDerived_reaches_{}.prj +#demDerived_reaches_{}.shp +#demDerived_reaches_{}.shx +#demDerived_reaches_split_{}.gpkg +#demDerived_reaches_split_filtered_{}.gpkg +#demDerived_reaches_split_filtered_addedAttributes_crosswalked_{}.gpkg +demDerived_reaches_split_points_{}.gpkg +demDerived_streamPixels_{}.tif +demDerived_streamPixels_ids_{}.tif +demDerived_streamPixels_ids_{}_allo.tif +demDerived_streamPixels_ids_{}_dist.tif +dem_burned_{}.tif +dem_burned_filled_{}.tif +dem_lateral_thalweg_adj_{}.tif +#dem_meters_{}.tif +#dem_thalwegCond_{}.tif +flowaccum_d8_burned_filled_{}.tif +#flowdir_d8_burned_filled_{}.tif +flowdir_d8_burned_filled_flows_{}.tif +flows_grid_boolean_{}.tif +flows_points_pixels_{}.gpkg +gw_catchments_pixels_{}.tif +#gw_catchments_reaches_{}.gpkg +gw_catchments_reaches_{}.tif +#gw_catchments_reaches_filtered_addedAttributes_{}.gpkg +#gw_catchments_reaches_filtered_addedAttributes_{}.tif +#gw_catchments_reaches_filtered_addedAttributes_crosswalked_{}.gpkg +headwaters_{}.tif +#hydroTable_{}.csv +idFile_{}.txt +LandSea_subset_{}.tif +nld_rasterized_elev_{}.tif +nwm_catchments_proj_subset_levelPaths_{}.gpkg +nwm_subset_streams_levelPaths_{}.gpkg +nwm_subset_streams_levelPaths_dissolved_headwaters_{}.gpkg +rem_{}.tif +#rem_zeroed_masked_{}.tif +slopes_d8_dem_meters_{}.tif +slopes_d8_dem_meters_masked_{}.tif +sn_catchments_reaches_{}.tif +src_{}.json +src_base_{}.csv +#src_full_crosswalked_{}.csv +stage_{}.txt +streamOrder_{}.tif +treeFile_{}.txt +#usgs_elev_table.csv diff --git a/config/deny_branch_zero.lst b/config/deny_branch_zero.lst new file mode 100644 index 000000000..1da9b4fcc --- /dev/null +++ b/config/deny_branch_zero.lst @@ -0,0 +1,63 @@ +# List of files for branch zero to delete +# Use comment to allow list the file +# Use curly braces to denote branch_id +agree_binary_bufgrid.tif +agree_bufgrid.tif +agree_bufgrid_allo.tif +agree_bufgrid_dist.tif +agree_bufgrid_zerod.tif +agree_smogrid.tif +agree_smogrid_allo.tif +agree_smogrid_dist.tif +agree_smogrid_zerod.tif +catch_list_{}.txt +coordFile_{}.txt +crosswalk_table_{}.csv +demDerived_reaches_{}.dbf +demDerived_reaches_{}.prj +demDerived_reaches_{}.shp +demDerived_reaches_{}.shx +demDerived_reaches_split_{}.gpkg +demDerived_reaches_split_filtered_{}.gpkg +#demDerived_reaches_split_filtered_addedAttributes_crosswalked_{}.gpkg +demDerived_reaches_split_points_{}.gpkg +demDerived_streamPixels_{}.tif +demDerived_streamPixels_ids_{}.tif +demDerived_streamPixels_ids_{}_allo.tif +demDerived_streamPixels_ids_{}_dist.tif +dem_burned_{}.tif +dem_burned_filled_{}.tif +dem_lateral_thalweg_adj_{}.tif +dem_meters_{}.tif +dem_thalwegCond_{}.tif +flowaccum_d8_burned_filled_{}.tif +#flowdir_d8_burned_filled_{}.tif +flowdir_d8_burned_filled_flows_{}.tif +flows_grid_boolean_{}.tif +flows_points_pixels_{}.gpkg +gw_catchments_pixels_{}.tif +gw_catchments_reaches_{}.gpkg +gw_catchments_reaches_{}.tif +#gw_catchments_reaches_filtered_addedAttributes_{}.gpkg +#gw_catchments_reaches_filtered_addedAttributes_{}.tif +#gw_catchments_reaches_filtered_addedAttributes_crosswalked_{}.gpkg +headwaters_{}.tif +#hydroTable_{}.csv +idFile_{}.txt +#LandSea_subset_{}.tif +nld_rasterized_elev_{}.tif +nwm_catchments_proj_subset_levelPaths_{}.gpkg +nwm_subset_streams_levelPaths_{}.gpkg +nwm_subset_streams_levelPaths_dissolved_headwaters_{}.gpkg +rem_{}.tif +#rem_zeroed_masked_{}.tif +slopes_d8_dem_meters_{}.tif +slopes_d8_dem_meters_masked_{}.tif +sn_catchments_reaches_{}.tif +src_{}.json +src_base_{}.csv +#src_full_crosswalked_{}.csv +stage_{}.txt +streamOrder_{}.tif +treeFile_{}.txt +#usgs_elev_table.csv diff --git a/config/deny_branches.lst b/config/deny_branches.lst new file mode 100644 index 000000000..13af6b41a --- /dev/null +++ b/config/deny_branches.lst @@ -0,0 +1,64 @@ +# List of files for branches to delete +# Use comment to allow list the file +# Use curly braces to denote branch_id +agree_binary_bufgrid.tif +agree_bufgrid.tif +agree_bufgrid_allo.tif +agree_bufgrid_dist.tif +agree_bufgrid_zerod.tif +agree_smogrid.tif +agree_smogrid_allo.tif +agree_smogrid_dist.tif +agree_smogrid_zerod.tif +catch_list_{}.txt +coordFile_{}.txt +crosswalk_table_{}.csv +demDerived_reaches_{}.dbf +demDerived_reaches_{}.prj +demDerived_reaches_{}.shp +demDerived_reaches_{}.shx +demDerived_reaches_split_{}.gpkg +demDerived_reaches_split_filtered_{}.gpkg +#demDerived_reaches_split_filtered_addedAttributes_crosswalked_{}.gpkg +demDerived_reaches_split_points_{}.gpkg +demDerived_streamPixels_{}.tif +demDerived_streamPixels_ids_{}.tif +demDerived_streamPixels_ids_{}_allo.tif +demDerived_streamPixels_ids_{}_dist.tif +dem_burned_{}.tif +dem_burned_filled_{}.tif +dem_lateral_thalweg_adj_{}.tif +dem_meters_{}.tif +dem_thalwegCond_{}.tif +flowaccum_d8_burned_filled_{}.tif +flowdir_d8_burned_filled_{}.tif +flowdir_d8_burned_filled_flows_{}.tif +flows_grid_boolean_{}.tif +flows_points_pixels_{}.gpkg +gw_catchments_pixels_{}.tif +gw_catchments_reaches_{}.gpkg +gw_catchments_reaches_{}.tif +gw_catchments_reaches_filtered_addedAttributes_{}.gpkg +#gw_catchments_reaches_filtered_addedAttributes_{}.tif +#gw_catchments_reaches_filtered_addedAttributes_crosswalked_{}.gpkg +headwaters_{}.tif +#hydroTable_{}.csv +idFile_{}.txt +LandSea_subset_{}.tif +levee_levelpaths.csv +nld_rasterized_elev_{}.tif +nwm_catchments_proj_subset_levelPaths_{}.gpkg +nwm_subset_streams_levelPaths_{}.gpkg +nwm_subset_streams_levelPaths_dissolved_headwaters_{}.gpkg +rem_{}.tif +#rem_zeroed_masked_{}.tif +slopes_d8_dem_meters_{}.tif +slopes_d8_dem_meters_masked_{}.tif +sn_catchments_reaches_{}.tif +src_{}.json +src_base_{}.csv +#src_full_crosswalked_{}.csv +stage_{}.txt +streamOrder_{}.tif +treeFile_{}.txt +#usgs_elev_table.csv diff --git a/config/deny_unit.lst b/config/deny_unit.lst new file mode 100644 index 000000000..cb7e76754 --- /dev/null +++ b/config/deny_unit.lst @@ -0,0 +1,32 @@ +# List of files for units to delete +# Use comment to allow list the file +agree_binary_bufgrid.tif +agree_bufgrid.tif +agree_bufgrid_allo.tif +agree_bufgrid_dist.tif +agree_bufgrid_zerod.tif +agree_smogrid.tif +agree_smogrid_allo.tif +agree_smogrid_dist.tif +agree_smogrid_zerod.tif +#3d_nld_subset_levees_burned.gpkg +#branch_id.lst +#branch_polygons.gpkg +dem_meters.tif +#LandSea_subset.gpkg +#LeveeProtectedAreas_subset.gpkg +#nld_subset_levees.gpkg +#nwm_catchments_proj_subset.gpkg +#nwm_catchments_proj_subset_levelPaths.gpkg +nwm_headwaters.gpkg +#nwm_lakes_proj_subset.gpkg +#nwm_subset_streams.gpkg +#nwm_subset_streams_levelPaths.gpkg +#nwm_subset_streams_levelPaths_dissolved.gpkg +#nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg +#usgs_elev_table.csv +#usgs_subset_gages.gpkg +#wbd.gpkg +#wbd8_clp.gpkg +wbd_buffered.gpkg +#wbd_buffered_streams.gpkg diff --git a/config/mannings_calibrated.json b/config/mannings_calibrated.json deleted file mode 100644 index f75627ff4..000000000 --- a/config/mannings_calibrated.json +++ /dev/null @@ -1 +0,0 @@ -{"1": 0.125, "2": 0.125, "3": 0.125, "4": 0.125, "5": 0.125, "6": 0.125, "7": 0.125, "8": 0.105, "9": 0.1, "10": 0.05, "11": 0.06, "12": 0.06, "13": 0.06, "14": 0.06} \ No newline at end of file diff --git a/config/mannings_default.json b/config/mannings_default.json deleted file mode 100644 index d10b72607..000000000 --- a/config/mannings_default.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "1" : 0.06, - "2" : 0.06, - "3" : 0.06, - "4" : 0.06, - "5" : 0.06, - "6" : 0.06, - "7" : 0.06, - "8" : 0.06, - "9" : 0.06, - "10" : 0.06, - "11" : 0.06, - "12" : 0.06, - "13" : 0.06, - "14" : 0.06 -} diff --git a/config/params_template.env b/config/params_template.env index 41df712c6..5c0754368 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -4,6 +4,7 @@ export negative_burn_value=1000 export agree_DEM_buffer=70 export wbd_buffer=5000 +export levee_buffer=1000 export thalweg_lateral_elev_threshold=3 #### geospatial parameters #### @@ -12,7 +13,7 @@ export ms_buffer_dist=7000 export lakes_buffer_dist_meters=20 #### rating curve parameters #### -export manning_n="/foss_fim/config/mannings_default.json" +export manning_n=0.06 export stage_min_meters=0 export stage_interval_meters=0.3048 export stage_max_meters=25 @@ -20,38 +21,36 @@ export slope_min=0.001 export min_catchment_area=0.25 export min_stream_length=0.5 -#### bathy SRC estimation parameters #### -export bathy_src_toggle=True # Toggle to run BARC routine (True=on; False=off) -export bankfull_input_table="data/inputs/bathymetry/nwm_flow_bieger_qreg.csv" # input file location with feature_id and channel geometry attributes -# Option 1: Bieger et al. 2015 discharge regression --> nwm_flow_bieger_qreg.csv -# Option 2: Bieger et al. 2015 drainage area regression (obtained from Wieczorek (2018) database) --> BANKFULL_CONUS.txt -# Option 3: NWM Route Link bankfull geometry (Blackburn-Lynch regression) --> nwm_route_link_geom_BED.csv -export src_plot_option="False" # optional toggle to create SRC comparison plots for each hydroid (Warning: longer run times) -export surf_area_thalweg_ratio_flag=10 # Flag: Surface area ratio value to identify possible thalweg notch "jump" (SA x+1 / SA x) -export thalweg_stg_search_max_limit=3 # Threshold: Stage value limit below which to look for the surface area ratio flag (only flag thalweg notch below this threshold) -export bathy_xs_area_chg_flag=1 # Flag: Cross section area limit to cap the amount of bathy XS area added to the SRC. Limits the bathy_calc_xs_area/ BANKFULL_XSEC_AREA to the specified threshold -export bankful_xs_area_ratio_flag=10 # Flag: Identify bogus BARC adjusted values where the regression bankfull XS Area/SRC bankfull area is > threshold (topwidth crosswalk issues or bad bankfull regression data points??) -export thalweg_hyd_radius_flag=10 # Flag: Idenitify possible erroneous BARC-adjusted hydraulic radius values. BARC discharge values greater than the specified threshold and within the thal_stg_limit are set to 0 -export ignore_streamorders=10 # Ignore BARC calculations for streamorders >= this value (10 is Mississippi R) +#### branch parameters #### +export branch_id_attribute=levpa_id +export branch_buffer_distance_meters=7000 +export branch_timeout=4000 # pass int or float. To make a percentage of median, pass a '%' at the end. +export branch_zero_id="0" + +#### mask levee-protected areas from DEM +export mask_leveed_area_toggle=True # Toggle to mask levee-protected areas from DEM +export levee_id_attribute=SYSTEM_ID #### estimating bankfull stage in SRCs #### export src_bankfull_toggle="True" # Toggle to run identify_bankfull routine (True=on; False=off) -export src_bankfull_plot_option="False" # optional toggle to create SRC comparison plots for each hydroid (Warning: longer run times) -export bankfull_flows_file="data/inputs/rating_curve/bankfull_flows/nwm_v2_0_recurr_1_5_cms.csv" # input file location with nwm feature_id and recurrence flow values +export bankfull_flows_file="/data/inputs/rating_curve/bankfull_flows/nwm_high_water_threshold_cms.csv" # input file location with nwm feature_id and recurrence flow values + +#### applying channel/overbank subdivision routine to SRCs #### +export src_subdiv_toggle="True" # Toggle to run composite roughness src routine (True=on; False=off) +export vrough_suffix="" # text to append to output log and hydrotable file names (use for testing/debugging) +export vmann_input_file="/data/inputs/rating_curve/variable_roughness/mannings_global_06_12.csv" # input file location with nwm feature_id and channel roughness and overbank roughness attributes -#### applying variable/composite roughness curve to SRCs #### -export src_vrough_toggle="True" # Toggle to run composite roughness src routine (True=on; False=off) -export src_vrough_plot_option="False" # optional toggle to create SRC comparison plots for each hydroid (Warning: longer run times) -export vrough_suffix="_vmann" # text to append to output log and src_full_crosswalked file names -export vmann_input_file="data/inputs/rating_curve/variable_roughness/mannings_global_06_011.csv" # input file location with nwm feature_id and channel roughness and overbank roughness attributes -export bankfull_attribute="chann_volume_ratio" # src_full_crosswalked_bankfull.csv attribute (column id) containing the channel vs overbank ratio values (generated in the identify_src_bankfull.py) +#### apply SRC adjustments using USGS rating curve database (calibration db) #### +export src_adjust_usgs="True" # Toggle to run src adjustment routine (True=on; False=off) +export nwm_recur_file="/data/inputs/rating_curve/nwm_recur_flows/nwm21_17C_recurrence_flows_cfs.csv" # input file location with nwm feature_id and recurrence flow values + +#### apply SRC adjustments using observed FIM/flow point database #### +export src_adjust_spatial="True" # Toggle to run src adjustment routine (True=on; False=off) +export fim_obs_pnt_data="/data/inputs/rating_curve/water_edge_database/usgs_nws_benchmark_points_cleaned.gpkg" +#### path to env file with sensitive paths for accessing postgres database #### +export CALB_DB_KEYS_FILE="/data/config/calb_db_keys.env" #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed export ncores_fd=1 # mpi number of cores for flow directions -export default_max_jobs=1 # default number of max concurrent jobs to run export memfree=0G # min free memory required to start a new job or keep youngest job alive - -#### logging parameters #### -export startDiv="\n##########################################################################\n" -export stopDiv="\n##########################################################################" diff --git a/config/symbology/esri/agreement_raster.lyr b/config/symbology/esri/agreement_raster.lyr index d6e27030d..7142cd6eb 100644 Binary files a/config/symbology/esri/agreement_raster.lyr and b/config/symbology/esri/agreement_raster.lyr differ diff --git a/config/symbology/qgis/agreement_raster.qml b/config/symbology/qgis/agreement_raster.qml index 327ecdeff..7c74b4258 100644 --- a/config/symbology/qgis/agreement_raster.qml +++ b/config/symbology/qgis/agreement_raster.qml @@ -27,7 +27,7 @@ - + diff --git a/data/README.md b/data/README.md new file mode 100644 index 000000000..d6c573a69 --- /dev/null +++ b/data/README.md @@ -0,0 +1,16 @@ +## Source Data Folder + +This folder is for python and other files to communicate with external data sources. + +The first implementation is for communication to AWS (Amazon Web Service) and has a sub-folder to match. + +The first AWS api implementation in the form of talking to AWS S3 buckets has also been adeed and a sub-folder to match. + +Generally the pattern will be expected as: + +- `src/data` + - `{a root data source name}`: Such as AWS, or possibily NWM, NHD, 3Dep or whatever. + - `A service class`: A python file for any basic API or service you are communication with. In this case there is one for S3. Any communication for S3 buckets will be in this file, including get, pull, get bucket lists, etc. If other AWS api is added, you would generally have an other python file. For example, if we start communicating interactively with ECR or Lambda, there is an argument for it being a new python file. + - `A parent class`: In the case of AWS, one thing all aws interactions have some things in commons, such as the need to authenticate. By having a base class, all AWS child (inherited) classes automatically have that code available. Using inheritance helps keep standardization of how we communicate with AWS. Helper or common utility classes can also be added if required. One example that has been added is that many AWS communication need an boto3 client object. A one line call by child classes mean the child class has a boto3 client object if required. Later, if we need a boto3 resource object, that can be added as well. Note: Broad utilities methods that are greater than just AWS, will likely be added at the higher folders (such src/utils or src/data/). + +The new `s3.py` is constucted so it can be used as a CLI tool (input param args), but any function can be called directly. You will notice that a starting system of passing in a parameter for `action_type` has been added. At this time, only the option of `put` is available. If a need arises later to pull data from S3, the existing command line parameters will not need to change. Only minor adjustements to the __ main __ and add new methods for "get". If another python file calls directly over to this python file, it will not go through the __ main __ method but straight to the "get" or "put" or whatever. diff --git a/api/README.md b/data/__init__.py similarity index 100% rename from api/README.md rename to data/__init__.py diff --git a/data/aws/.gitignore b/data/aws/.gitignore new file mode 100644 index 000000000..a3ad1a24c --- /dev/null +++ b/data/aws/.gitignore @@ -0,0 +1,2 @@ +*.env +!aws_creds_template.env \ No newline at end of file diff --git a/data/aws/aws_base.py b/data/aws/aws_base.py new file mode 100644 index 000000000..0dadce969 --- /dev/null +++ b/data/aws/aws_base.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + +# standard library imports (https://docs.python.org/3/library/ +# (imports first, then "froms", in alpha order) +import os +import subprocess +import sys + +sys.path.append('/foss_fim/src') +from utils.shared_functions import FIM_Helpers as fh + +# third party imports +# (imports first, then "froms", in alpha order) +from dotenv import load_dotenv + +''' +This implements all common variables related when communicating to AWS +''' +class AWS_Base(object): + + def __init__(self, path_to_cred_env_file, *args, **kwargs): + + ''' + Overview + ---------- + This will load the aws credentials enviroment file. + For now, we will feed it via an env file. Eventuallly, it should be + changed to ~/.aws/credentials (and maybe profiles) + + The aws_credentials_file will be loaded and an aws client + will be created ready for use. + + Parameters + ---------- + path_to_cred_env_file : str + File path of the aws_credentials_file as an .env + + is_verbose : bool + If True, then debugging output will be included + + ''' + + if (not os.path.exists(path_to_cred_env_file)): + raise FileNotFoundError("AWS credentials file not found") + + load_dotenv(path_to_cred_env_file) + + if kwargs: + is_verbose = kwargs.pop("is_verbose",None) + + self.is_verbose = is_verbose + + # TODO: validate service name with AWS (which will help + # validate the connection) + + + def get_aws_cli_credentials(self): + + ''' + Overview + ---------- + To run aws cli commands (subprocess), aws creds need to be set up + in the command environment. This method will take care of that + via bash "exports" + + Returns + ----------- + A string that can be concatenated to the front of a subprocess cmd + and includes the export creds. + + ''' + + fh.vprint("getting aws credential string", self.is_verbose, True) + + cmd = "export AWS_ACCESS_KEY_ID=" + os.getenv('AWS_ACCESS_KEY') + cmd += " && export AWS_SECRET_ACCESS_KEY=" + os.getenv('AWS_SECRET_ACCESS_KEY') + cmd += " && export AWS_DEFAULT_REGION=" + os.getenv('AWS_REGION') + + return cmd + + + def create_aws_cli_include_argument(self, whitelist_file_names): + ''' + Overview + ---------- + Creates a string valid for aws_cli include commands. + + When using an "include", this string will automatically add + --exclude "*" , without it, the includes will not work. + + If the whitelist_file_names is empty, then an empty string will be returned. + + Parameters + ---------- + whitelist_file_names : list + a list of file names to be whitelisted. The file names should already be adjusted + to the "*" pattern already and stripped as applicable. + + Returns + ---------- + A string that can be added straight into a aws cli command. + + example: export AWS_ACCESS_KEY_ID=A{somekey}Q && export + AWS_SECRET_ACCESS_KEY=KW(examplekey)80 && + export AWS_DEFAULT_REGION=us-west-1 + ''' + + if (whitelist_file_names is None) or (len(whitelist_file_names) == 0): + return "" # empty string + + # For there to be "includes", for aws cli, you must have exclude "all" + cli_whitelist = '--exclude "*"' + + for whitelist_file_name in whitelist_file_names: + if not whitelist_file_name.startswith("*"): + whitelist_file_name = "*" + whitelist_file_name + + whitelist_file_name = whitelist_file_name.replace("{}", "*") + + cli_whitelist += f' --include "{whitelist_file_name}"' + + fh.vprint(f"cli include string is {cli_whitelist}", self.is_verbose, True) + + return cli_whitelist diff --git a/data/aws/aws_creds_template.env b/data/aws/aws_creds_template.env new file mode 100644 index 000000000..b236ef3f1 --- /dev/null +++ b/data/aws/aws_creds_template.env @@ -0,0 +1,3 @@ +AWS_REGION= +AWS_ACCESS_KEY= +AWS_SECRET_ACCESS_KEY= \ No newline at end of file diff --git a/data/aws/s3.py b/data/aws/s3.py new file mode 100644 index 000000000..adc2f23b0 --- /dev/null +++ b/data/aws/s3.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 + +# standard library imports (https://docs.python.org/3/library/ +# (imports first, then "froms", in alpha order) +from operator import truediv +import os +from pickle import FALSE +import sys +from datetime import datetime + +# third party imports +# (imports first, then "froms", in alpha order) +import argparse +import subprocess + +sys.path.append('/foss_fim/src') +from aws_base import * +from utils.shared_functions import FIM_Helpers as fh + +''' +This file is for communicating to any AWS S3 buckets. +For now, it can only push to a bucket (such as hydroviz) + +Note: For now, we will add a parameter for the aws credentials .env file. +This means the .env file does not need to automatically be with the source code. +Later, this will be changed to aws credentials profiles (or ~/aws/credentials (folders)) +''' + +class S3(AWS_Base): + + def put_to_bucket(self, src_folder_path, + aws_target_path, whitelist_file_path = None): + + ''' + Overview + ---------- + Push a data folder to hydroviz visualization (primarily for now) + + If the aws_target_folder_path folder does not exist, it will be created. If it does exist, files will + be just added or overwritten (no pre-clean) + + The source folder will copy all contents (files and subfolders) + ie) aws_target_path = s3://some-address-us-east-1/test_uploads/220818 + src_folder_path = /outputs/fim_0_34_1_fr/(some files and folders) + + Becomes: some-aws-address/test_uploads/220818/(some files and folders) + + Note: + - This is build on aws cli as boto3 can push up single files but is very slow. + As of Aug 2022, they do not have a bulk boto3 recursive folder load + + Parameters + ---------- + + - src_folder_path : str + folder path of files to be copied up + + - aws_target_path : str + A s3 bucket and folder locations in AWS (can not be blank) + + - whitelist_file_path : str + A file with a set of line delimited file names that can be copied up to S3. + Note: make sure the list file is unix encoded and not windows encoded. + If None, then all files / folders will be pushed up + Note: wildcard variables of * is available. + + ''' + whitelist_file_names = [] # if blank, it is assumed to load all + + # --- Validate incoming values + # test the whitelist path and load it into a collection + if (whitelist_file_path is not None) and (len(whitelist_file_path.strip()) > 1): + + # if this list has items, then we load those items only + whitelist_file_names = fh.load_list_file(whitelist_file_path.strip()) + + if (self.is_verbose) and (len(whitelist_file_names) > 0): + print("whitelist entries") + print(whitelist_file_names) + + # test src_folder_path + if (not os.path.exists(src_folder_path)): + raise ValueError("Invalid local_folder_path (src directory)") + src_folder_path = src_folder_path.rstrip('/') # trailing slashes + + # test aws target folder + if (aws_target_path is None) or (len(aws_target_path.strip()) == 0): + raise ValueError("aws target folder path not supplied") + + aws_target_path = aws_target_path.strip() # spaces + aws_target_path = aws_target_path.strip('/') # leading/trailing slashes + + + # --- Upload the files + print("************************************") + print("--- Start uploading files ") + fh.print_current_date_time() + start_dt = datetime.now() + + self.bulk_upload(src_folder_path, aws_target_path, whitelist_file_names) + + print("--- End uploading files") + fh.print_current_date_time() + end_dt = datetime.now() + fh.print_date_time_duration(start_dt, end_dt) + + print("************************************") + + + def bulk_upload(self, src_folder_path, aws_target_path, whitelist_file_names = []): + + ''' + Overview + ---------- + Files will be loaded keeping their folder structure and will be loaded + via Bash AWS CLI. It is preferred to load via boto3 but as of Aug 2022, it does + not have a bulk loader, only a single file loader and it is very slow. + + With CLI, we have a bit of trouble with outputs, but will track it as best we can + + Parameters + ---------- + - src_folder_path : str + fully pathed location of where the files are being copied from + + - aws_target_path : str + The full url including subfolder path + For example: s3://example-us-east-1/test_upload/test_2 + + - whitelist_file_names : list (but can be empty) + A list of file names to be included in the transfer. If the file name includes + a {}, it will be adjusted for aws cli format automatically. + + Returns + ----------------- + True (successful copy ) or False (unsuccessful) + + ''' + + is_copy_successful = False + + cmd = self.get_aws_cli_credentials() + + # used cp (copy and replace. could have used 'sync' which looks for updates only) + cmd += f' && aws s3 cp --recursive {src_folder_path}' + + if not aws_target_path.startswith("s3://"): + aws_target_path = "s3://" + aws_target_path + + cmd += f' {aws_target_path}' + + if (whitelist_file_names is not None) and (len(whitelist_file_names) > 1): + cmd += f' {self.create_aws_cli_include_argument(whitelist_file_names)} ' + + fh.vprint(f"cmd is {cmd}", self.is_verbose, True) + print("") + + process = subprocess.Popen(cmd, shell = True, bufsize = 1, + stdout = subprocess.PIPE, + stderr = subprocess.STDOUT, + errors = 'replace' + ) + + while True: + + realtime_output = process.stdout.readline() + if realtime_output == '' and process.poll() is not None: + break + if realtime_output: + # AWS spits out a tons of "completes" + if (not realtime_output.startswith("Completed")): + print(realtime_output.strip(), flush=False) + sys.stdout.flush() + + is_copy_successful = True # catching of correct error not quite working + + return is_copy_successful + + +if __name__ == '__main__': + + # Sample Usage + #python3 /foss_fim/data/aws/s3.py -a put -c /data/config/aws_creds.env -s /data/previous_fim/fim_4_0_13_1 -t "s3://example-us-east-1/fim_4_0_13_1" + + # You can leave the -w flag off to load all files/folders from a directory + # but default is to -w /foss_fim/config/aws_s3_put_fim4_hydrovis_whitelist.lst + + # this works for hydroviz but can work with other s3 sites as well (ie esip) + + parser = argparse.ArgumentParser(description='Communication with aws s3 data services') + parser.add_argument('-a','--action_type', + help='value of get or put (defaults to put)', + default="put", required=False) + parser.add_argument('-c','--aws_cred_env_file', + help='path to aws credentials env file', required=True) + parser.add_argument('-s','--local_folder_path', + help='folder path of all files to be saved to or from', required=True) + parser.add_argument('-t','--aws_target_path', + help='s3 bucket address and folder', required=True) + parser.add_argument('-w','--whitelist_file_path', + help='A file with the last of file names to be copied up (line delimited)', + default='/foss_fim/config/aws_s3_put_fim4_hydrovis_whitelist.lst', + required=False) + parser.add_argument('-v','--is_verbose', + help='Adding this flag will give additional tracing output', + required=False, default=False, action='store_true') + + args = vars(parser.parse_args()) + + if (args['action_type'] == "put"): + + s3 = S3(path_to_cred_env_file = args['aws_cred_env_file'], + is_verbose = args['is_verbose']) + s3.put_to_bucket(src_folder_path = args['local_folder_path'], + aws_target_path = args['aws_target_path'], + whitelist_file_path = args['whitelist_file_path']) + + elif (args["action_type"] == "get"): + raise Exception("Error: get method not yet implemented or available") + else: + raise Exception("Error: action type value invalid. Current options are: 'put' (more coming soon) ") + + diff --git a/data/create_vrt_file.py b/data/create_vrt_file.py new file mode 100644 index 000000000..0e3c1a96b --- /dev/null +++ b/data/create_vrt_file.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import os +import sys + +from datetime import datetime +from osgeo import gdal + +sys.path.append('/foss_fim/src') +from utils.shared_functions import FIM_Helpers as fh + +def create_vrt_file(src_directory, vrt_file_name): + + ''' + Overview + ---------- + Takes all .tif files in a given directory and creates a vrt from them. + Note: This assumes all .tifs share a common directory. + + Parameters + ---------- + - src_directory (str): + Location where the .tifs are at + - vrt_file_name (str): + The name of the vrt file to be created. Note: it will be in the same + directory as the tifs. + ''' + + # ------------------- + # Validation + if (not os.path.exists(src_directory)): + raise ValueError(f'src_directory value of {src_directory}'\ + ' not set to a valid path') + + if (vrt_file_name is None) or (vrt_file_name == ""): + raise ValueError(f'vrt_file_name not defined.') + + if (not vrt_file_name.endswith(".vrt")): + vrt_file_name += ".vrt" + + # ------------------- + + target_vrt_file_path = os.path.join(src_directory, vrt_file_name) + + # ------------------- + # setup logs + start_time = datetime.now() + fh.print_start_header('Creating vrt file', start_time) + + __setup_logger(src_directory) + logging.info(f"saving vrt to {target_vrt_file_path}") + + # ------------------- + # processing + + tif_file_names = fh.get_file_names(src_directory, '.tif') + + __create_vrt(tif_file_names, target_vrt_file_path) + + end_time = datetime.now() + fh.print_end_header('Loading 3dep dems', start_time, end_time) + logging.info(fh.print_date_time_duration(start_time, end_time)) + + +def __create_vrt(tif_file_names, target_vrt_file_path): + + logging.info("Files included:") + for file_name in tif_file_names: + logging.info(f" - {file_name}") + + result = gdal.BuildVRT(target_vrt_file_path, tif_file_names) + logging.info(result) + + +def __setup_logger(output_folder_path): + + start_time = datetime.now() + file_dt_string = start_time.strftime("%Y_%m_%d-%H_%M_%S") + log_file_name = f"vrt_build-{file_dt_string}.log" + + log_file_path = os.path.join(output_folder_path, log_file_name) + + file_handler = logging.FileHandler(log_file_path) + file_handler.setLevel(logging.INFO) + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + + logger = logging.getLogger() + logger.addHandler(file_handler) + logger.addHandler(console_handler) + logger.setLevel(logging.DEBUG) + + logging.info(f'Started : {start_time.strftime("%m/%d/%Y %H:%M:%S")}') + logging.info("----------------") + + +if __name__ == '__main__': + + # Sample Usage: python3 /foss_fim/data/create_vrt_file.py -s /data/inputs/3dep_dems/10m_5070/ -n "fim_seamless_3dep_dem_10m_5070.vrt" + + parser = argparse.ArgumentParser(description='Create a vrt using all tifs in a given directory') + + parser.add_argument('-s','--src_directory', help='A directory of where the .tif files '\ + 'files exist. If the -f (tif-file) param is empty then all .tif files '\ + 'in this directory will be used.', + required=True) + + parser.add_argument('-n','--vrt_file_name', help='Name of the vrt file (name only) to be created. ' \ + 'Note: it will be created in the source directory.', + required=True) + + args = vars(parser.parse_args()) + + create_vrt_file(**args) diff --git a/data/esri.py b/data/esri.py new file mode 100644 index 000000000..83e26341c --- /dev/null +++ b/data/esri.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +import requests +import pandas as pd +import geopandas as gpd +from tqdm import tqdm + +class ESRI_REST(object): + """ + This class was built for querying ESRI REST endpoints for the purpose of downloading datasets. + See /data/nld/levee_download.py for an example useage. + """ + + def __init__(self, query_url, params, verbose=True): + self.query_url = query_url + self.params = params + self.verbose = verbose + self.exceededTransferLimit = True + self.feature_count = 0 + + @classmethod + def query(cls, query_url:str, save_file:str=None, **kwargs): + ''' + Classmethod for easily queries on ESRI feature services. See /data/nld/levee_download.py for example usage. + + Parameters + ---------- + query_url: str + URL to query. This should have the layer # and 'query' at the end, e.g. + https://ags03.sec.usace.army.mil/server/rest/services/NLD2_PUBLIC/FeatureServer/15/query + save_file: str + Optional. Location to save the output geopackage. This method will not return + a geodataframe if this parameter is set. + **kwargs + All kwargs get passed to the service query. Here's an example of some standard ones: + `f="json", where="1=1", returnGeometry="true", outFields="*", outSR="5070"` + + Returns + ------- + gdf_complete: geopandas.GeoDataFrame + GeoDataFrame containing all of the features returned by the query. `None` is returned if the + save_file parameter is set. + ''' + # Query the input URL using the kwargs as URL parameters + rest_call = cls(query_url, kwargs) + gdf_complete = rest_call._query_rest() + # Save geodataframe as geopackage + if save_file: + gdf_complete.to_file(save_file, driver="GPKG", index=False) + else: + return gdf_complete + + def _query_rest(self): + ''' + Method that sets up multiple REST calls when there are more features than the transfer limit + set by the feature service and concatenates each response into a single GeoDataFrame. + + Returns + ------- + gdf_complete: geopandas.GeoDataFrame + GeoDataFrame containing all of the features returned by the query. + ''' + gdf_list = [] + record_count = 0 + backup_counter = 0 + if self.verbose: + self._meta_query() + print("-------------------------------------") + print(f"Service name: {self.metadata['name']}") + print(f"Features returned: {self.feature_count}") + if 'resultRecordCount' in self.params.keys(): + print(f"Request max record count: {self.params['resultRecordCount']}") + print(f"Total API calls: {-(self.feature_count//-self.params['resultRecordCount'])}") + else: + print(f"Service max record count: {self.metadata['maxRecordCount']}") + print(f"Total API calls: {-(self.feature_count//-self.metadata['maxRecordCount'])}") + # Call the REST API repeatedly until all of the features have been collected, i.e. the transfer + # limit has no longer been exceeded + with tqdm(total=self.feature_count, desc='Feature download progress', disable=not self.verbose) as pbar: + while (self.exceededTransferLimit) and (backup_counter < 9999): + # Set the resultOffset to the number of records that's already been downloaded + self.params['resultOffset'] = record_count + sub_gdf = self._sub_query(self.params) + gdf_list.append(sub_gdf) + record_count += len(sub_gdf) + backup_counter += 1 + pbar.update(len(sub_gdf)) + # Concatenate all responses into a single geodataframe + gdf_complete = pd.concat(gdf_list) + return gdf_complete + + def _sub_query(self, params): + ''' + This method calls the REST API. + + Parameters + ---------- + params: dict + Parameters for the rest query. + + Returns + ------- + sub_gdf: geopandas.GeoDataFrame + GeoDataFrame containing features returned by the query. FYI This may not be the complete + dataset if the 'exceededTransferLimit' == True + ''' + self._api_call(self.query_url, params) + # Set exceededTransferLimit if there need to be another sub_request + r_dict = self.response.json() + if 'exceededTransferLimit' in r_dict.keys(): + self.exceededTransferLimit = r_dict['exceededTransferLimit'] + # This very nondescript error was returned when querying a polygon layer. + # Setting resultRecordCount to a lower value fixed the error. + elif 'error' in r_dict.keys(): + print("There was an error with the query. It may have been caused by requesting too many features. Try setting resultRecordCount to a lower value.") + raise Exception(r_dict['error']['message'], f"code: {r_dict['error']['code']}") + else: + self.exceededTransferLimit = False + # Read the response into a GeoDataFrame + sub_gdf = gpd.read_file(self.response.text) + return sub_gdf + + def _api_call(self, url, params=None): + ''' + Helper method for calling the API and checking that the response is ok. + + + ''' + self.response = requests.get(url, params=params) + if not self.response.ok: + raise Exception(f"The following URL recieved a bad response.\n{self.response.url}") + + def _meta_query(self): + + # Get the service metadata + self._api_call(self.query_url[:self.query_url.rfind('query')], self.params) + self.metadata = self.response.json() + # Get the record count returned by the query + params = self.params.copy() + params['returnCountOnly'] = "true" + self._api_call(self.query_url, params) + self.feature_count = self.response.json()['count'] \ No newline at end of file diff --git a/data/nld/levee_download.py b/data/nld/levee_download.py new file mode 100755 index 000000000..c7f1f41e9 --- /dev/null +++ b/data/nld/levee_download.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 + +import sys +import os +import re +import geopandas as gpd +from tqdm import tqdm +from datetime import datetime +from shapely.geometry import LineString, MultiLineString + +sys.path += ['/foss_fim/src', '/foss_fim/data', '/foss_fim/tools'] +from utils.shared_variables import DEFAULT_FIM_PROJECTION_CRS +from tools_shared_variables import INPUTS_DIR +from esri import ESRI_REST + +epsg_code = re.search('\d+$', DEFAULT_FIM_PROJECTION_CRS).group() +today = datetime.now().strftime('%y%m%d') +nld_vector_output = os.path.join(INPUTS_DIR, 'nld_vectors', f'System_Routes_NLDFS_5070_{today}.gpkg') +processed_nld_vector = os.path.join(INPUTS_DIR, 'nld_vectors', f'3d_nld_preprocessed_{today}.gpkg') +nld_protected_areas = os.path.join(INPUTS_DIR, 'nld_vectors', f'Leveed_Areas_NLDFS_5070_{today}.gpkg') + +def download_nld_lines(): + ''' + First main function call for this module. Downloads levees from the National Levee Database + ESRI service, save the raw output for use in the levee masking algorithm, processes + the levees to remove lines and vertices that have no elevation data, and saves the preprocessed + levee geopackage for use in levee burning. NOTE: In order for the files generated by this script to be used, update the date (today) in bash_variables.env for input_NLD, input_levees_preprocessed, and input_nld_levee_protected_areas. + ''' + + # Query REST service to download levee 'system routes' + print("Downloading levee lines from the NLD...") + nld_url = "https://ags03.sec.usace.army.mil/server/rest/services/NLD2_PUBLIC/FeatureServer/15/query" + levees = ESRI_REST.query(nld_url, + f="json", where="1=1", returnGeometry="true", outFields="*", outSR=epsg_code, returnZ="true") + + # Write levees to a single geopackage + levees.to_file(nld_vector_output, index=False, driver='GPKG') + print(f"Levees written to file:\n{nld_vector_output}") + + # Spatial join to huc2 + print('Spatial join levees to HUC-2') + huc2 = gpd.read_file(os.path.join(INPUTS_DIR, 'wbd', 'WBD_National_EPSG_5070.gpkg'), layer='WBDHU2') + levees = gpd.sjoin(levees, huc2[['HUC2', 'geometry']], how='left') + + # Preprocess levees to remove features and vertices with no elevation + print('Preprocess levees to remove features and vertices with no elevation ') + process_levee_lines(levees, out_levees=processed_nld_vector) + + +def process_levee_lines(levee_gdf:gpd.GeoDataFrame, out_levees:str): + ''' + Function for processing levee lines prior to rasterization and burning into + the DEM. NOTE: Do not use the output of this function for the levee protected + area masking. This dataset will be incomplete since it filters out some levees + that have no z-values. + + Parameters + ---------- + levee_lines: gpd.GeoDataFrame + Raw NLD vectors file. + out_levees: str + Path to right preprocessed levees. + ''' + + # Filter vertices that have z-values less than the minimum from levee geometry + tqdm.pandas(desc='Removing null elevations') + levee_gdf['geometry'] = levee_gdf.progress_apply(lambda row: remove_nulls(row.geometry, row.HUC2), axis=1) + # Remove levees that have empty geometries resulting from the previous filter + levee_gdf = levee_gdf[~levee_gdf.is_empty] + levee_gdf.to_file(out_levees, index=False, driver='GPKG') + print(f"Preprocessed levees written to \n{out_levees}") + + +def remove_nulls(geom:LineString, huc:str): + ''' + Removes vertices from shapely LineString `geom` if they are less than `min_z`. + + Parameters + ---------- + geom: shapely.geometry.LineString + Shapely geometry from which to filter vertices. + huc: str + HUC. Can be any digit 2 or greater. + + Returns + ------- + out_geom: shapely.geometry.LineString + Filtered geometry. + ''' + # Set min z based on HUC2 + huc2 = huc[:2] # works with any HUC digit code + if huc2 in ['01','02','03','12']: # Coastal HUCs may have values near 0 + min_z = 0.01 + elif huc2 == '08': # Louisana below sea level + min_z = -10.0 + else: + min_z = 1.0 # Default set to 1 ft + # Loop through the vertices + out_geom = [] + part_geom = [] + skipped_vert = 0 + max_skipped_vert = 5 + for coord in geom.coords: + skip_flag = False + if coord[2] > min_z: + # Convert units from feet to meters + part_geom.append(tuple([coord[0], coord[1], coord[2]*0.3048])) + elif skipped_vert < max_skipped_vert: + # Allows a few (5) vertices to be skipped without forcing a multipart break. + # This enables short sections of roads that cross levees to have the levee elevations + # burned in to account for temporary flood walls not captured in the data. + skip_flag = True + skipped_vert += 1 + elif (len(part_geom) > 1) and (not skip_flag): # Create a multipart geometry when there's a break in z-values + out_geom.append(LineString(part_geom)) + part_geom = [] + skipped_vert = 0 + # Append the last segment + if len(part_geom) > 1: + out_geom.append(LineString(part_geom)) + # Compile LineString geometries into one multipart geometry + if len(out_geom) >= 2: + return MultiLineString(out_geom) + elif (len(out_geom) == 1) and (len(out_geom[0].coords) > 1): + return MultiLineString(out_geom) + else: + return None + +def download_nld_poly(): + ''' + Second main function call for this module. Downloads levee protected areas from the National Levee Database + ESRI service and saves the raw output for use in the levee masking algorithm. + ''' + # Query REST service to download levee 'system routes' + print("Downloading levee protected areas from the NLD...") + nld_area_url = "https://ags03.sec.usace.army.mil/server/rest/services/NLD2_PUBLIC/FeatureServer/14/query" + # FYI to whomever takes the time to read this code, the resultRecordCount had to be set on this query because + # the service was returning an error that turned out to be caused by the size of the request. Running the + # default max record count of 5000 was too large for polygons, so using resultRecordCount=2000 prevents the error. + leveed_areas = ESRI_REST.query(nld_area_url, + f="json", where="1=1", returnGeometry="true", outFields="*", outSR=epsg_code, resultRecordCount=2000) + + # Write levees to a single geopackage + leveed_areas.to_file(nld_protected_areas, index=False, driver='GPKG') + print(f"Levees written to file:\n{nld_protected_areas}") + + +if __name__ == '__main__': + + download_nld_lines() + + download_nld_poly() diff --git a/tools/preprocess_ahps_nws.py b/data/nws/preprocess_ahps_nws.py similarity index 97% rename from tools/preprocess_ahps_nws.py rename to data/nws/preprocess_ahps_nws.py index 8c5a5f5ec..9bba37573 100644 --- a/tools/preprocess_ahps_nws.py +++ b/data/nws/preprocess_ahps_nws.py @@ -1,16 +1,26 @@ #!/usr/bin/env python3 +import argparse +import geopandas as gpd import numpy as np -from pathlib import Path +import os import pandas as pd -import geopandas as gpd import rasterio -from tools_shared_functions import mainstem_nwm_segs, get_metadata, aggregate_wbd_hucs, get_thresholds, get_datum, ngvd_to_navd_ft, get_rating_curve, select_grids, get_nwm_segs, flow_data, process_extent, process_grid, raster_to_feature -import argparse -from dotenv import load_dotenv -import os -import traceback import sys -sys.path.append('/foss_fim/src') +import traceback + +from dotenv import load_dotenv +from pathlib import Path +sys.path.append('/foss_fim/tools') +from tools_shared_functions import mainstem_nwm_segs, get_metadata, aggregate_wbd_hucs, get_thresholds, get_datum, ngvd_to_navd_ft, get_rating_curve, select_grids, get_nwm_segs, flow_data, process_extent, process_grid, raster_to_feature + + +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## def get_env_paths(): @@ -353,6 +363,15 @@ def preprocess_nws(source_dir, destination, reference_raster): if __name__ == '__main__': + + ######################################################## + ''' + Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. + ''' + + ######################################################## + #Parse arguments parser = argparse.ArgumentParser(description = 'Create preprocessed USGS benchmark datasets at AHPS locations.') parser.add_argument('-s', '--source_dir', help = 'Workspace where all source data is located.', required = True) diff --git a/data/usgs/acquire_and_preprocess_3dep_dems.py b/data/usgs/acquire_and_preprocess_3dep_dems.py new file mode 100644 index 000000000..d69b0eea3 --- /dev/null +++ b/data/usgs/acquire_and_preprocess_3dep_dems.py @@ -0,0 +1,400 @@ +#!/usr/bin/env python3 + +import argparse +import glob +import logging +import os +import subprocess +import sys +import traceback +import geopandas as gpd + +from concurrent.futures import ProcessPoolExecutor, as_completed, wait +from datetime import datetime +#from tqdm import tqdm + +sys.path.append('/foss_fim/src') +import utils.shared_variables as sv +import utils.shared_functions as sf + +from utils.shared_functions import FIM_Helpers as fh + +# local constants (until changed to input param) +# This URL is part of a series of vrt data available from USGS via an S3 Bucket. +# for more info see: "http://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/". The odd folder numbering is +# a translation of arc seconds with 13m being 1/3 arc second or 10 meters. +__USGS_3DEP_10M_VRT_URL = r'/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt' # 10m = 13 (1/3 arc second) + + +def acquire_and_preprocess_3dep_dems(extent_file_path, + target_output_folder_path = '', + number_of_jobs = 1, + retry = False): + + ''' + Overview + ---------- + This will download 3dep rasters from USGS using USGS vrts. + By default USGS 3Dep stores all their rasters in lat/long (northing and easting). + By us downloading the rasters using WBD HUC4 clips and gdal, we an accomplish a few extra + steps. + 1) Ensure the projection types that are downloaded are consistant and controlled. + We are going to download them as NAD83 basic (espg: 4269) which is consistant + with other data sources, even though FIM defaults to ESRI:102039. We will + change that as we add the clipped version per HUC8. + 2) ensure we are adjusting blocksizes, compression and other raster params + 3) Create the 3dep rasters in the size we want (default at HUC4 for now) + + Notes: + - As this is a very low use tool, all values such as the USGS vrt path, output + folder paths, huc unit level (huc4), etc are all hardcoded + + Parameters + ---------- + - extent_file_path (str): + Location of where the extent files that are to be used as clip extent against + the USGS 3Dep vrt url. + ie) \data\inputs\wbd\HUC4 + + - target_output_folder_path (str): + The output location of the new 3dep dem files. When the param is not submitted, + it will be sent to /data/input/usgs/3dep_dems/10m/. + + - number_of_jobs (int): + This program supports multiple procs if multiple procs/cores are available. + + - retry (True / False): + If retry is True and the file exists (either the raw downloaded DEM and/or) + the projected one, then skip it + ''' + # ------------------- + # Validation + total_cpus_available = os.cpu_count() - 1 + if number_of_jobs > total_cpus_available: + raise ValueError('The number of jobs {number_of_jobs}'\ + 'exceeds your machine\'s available CPU count minus one. '\ + 'Please lower the number of jobs '\ + 'values accordingly.'.format(number_of_jobs) + ) + + if (not os.path.exists(extent_file_path)): + raise ValueError(f'extent_file_path value of {extent_file_path}'\ + ' not set to a valid path') + + if (target_output_folder_path is None) or (target_output_folder_path == ""): + target_output_folder_path = os.environ['usgs_3dep_dems_10m'] + + if (not os.path.exists(target_output_folder_path)): + raise ValueError(f"Output folder path {target_output_folder_path} does not exist" ) + + # ------------------- + # setup logs + start_time = datetime.now() + fh.print_start_header('Loading 3dep dems', start_time) + + #print(f"Downloading to {target_output_folder_path}") + __setup_logger(target_output_folder_path) + logging.info(f"Downloading to {target_output_folder_path}") + + + # ------------------- + # processing + + # Get the WBD .gpkg files (or clip extent) + extent_file_names = fh.get_file_names(extent_file_path, 'gpkg') + msg = f"Extent files coming from {extent_file_path}" + print(msg) + logging.info(msg) + + # download dems, setting projection, block size, etc + __download_usgs_dems(extent_file_names, target_output_folder_path, number_of_jobs, retry) + + polygonize(target_output_folder_path) + + end_time = datetime.now() + fh.print_end_header('Loading 3dep dems', start_time, end_time) + print(f'---- NOTE: Remember to scan the log file for any failures') + logging.info(fh.print_date_time_duration(start_time, end_time)) + + +def __download_usgs_dems(extent_files, output_folder_path, number_of_jobs, retry): + + ''' + Process: + ---------- + download the actual raw (non reprojected files) from the USGS + based on stated embedded arguments + + Parameters + ---------- + - fl (object of fim_logger (must have been created)) + - remaining params are defined in acquire_and_preprocess_3dep_dems + + Notes + ---------- + - pixel size set to 10 x 10 (m) + - block size (256) (sometimes we use 512) + - cblend 6 add's a small buffer when pulling down the tif (ensuring seamless + overlap at the borders.) + + ''' + + print(f"==========================================================") + print(f"-- Downloading USGS DEMs Starting") + + base_cmd = 'gdalwarp {0} {1}' + base_cmd += ' -cutline {2} -crop_to_cutline -ot Float32 -r bilinear' + base_cmd += ' -of "GTiff" -overwrite -co "BLOCKXSIZE=256" -co "BLOCKYSIZE=256"' + base_cmd += ' -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -tr 10 10' + base_cmd += ' -t_srs {3} -cblend 6' + + with ProcessPoolExecutor(max_workers=number_of_jobs) as executor: + + executor_dict = {} + + for idx, extent_file in enumerate(extent_files): + + download_dem_args = { + 'extent_file': extent_file, + 'output_folder_path': output_folder_path, + 'download_url': __USGS_3DEP_10M_VRT_URL, + 'base_cmd':base_cmd, + 'retry': retry + } + + try: + future = executor.submit(download_usgs_dem_file, **download_dem_args) + executor_dict[future] = extent_file + except Exception as ex: + + summary = traceback.StackSummary.extract( + traceback.walk_stack(None)) + print(f"*** {ex}") + print(''.join(summary.format())) + + logging.critical(f"*** {ex}") + logging.critical(''.join(summary.format())) + + sys.exit(1) + + # Send the executor to the progress bar and wait for all tasks to finish + sf.progress_bar_handler(executor_dict, f"Downloading USGG 3Dep Dems") + + print(f"-- Downloading USGS DEMs Completed") + logging.info(f"-- Downloading USGS DEMs Completed") + print(f"==========================================================") + + +def download_usgs_dem_file(extent_file, + output_folder_path, + download_url, + base_cmd, + retry): + + ''' + Process: + ---------- + Downloads just one dem file from USGS. This is setup as a method + to allow for multi-processing. + + + Parameters: + ---------- + - extent_file (str) + When the dem is downloaded, it is clipped against this extent (.gkpg) file. + - output_folder_path (str) + Location of where the output file will be stored + - download_url (str) + URL for the USGS download site (note: Should include '/vsicurl/' at the + front of the URL) + - base_cmd (str) + The basic GDAL command with string formatting wholes for key values. + See the cmd variable below. + ie) + base_cmd = 'gdalwarp {0} {1}' + base_cmd += ' -cutline {2} -crop_to_cutline -ot Float32 -r bilinear' + base_cmd += ' -of "GTiff" -overwrite -co "BLOCKXSIZE=256" -co "BLOCKYSIZE=256"' + base_cmd += ' -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -tr 10 10' + base_cmd += ' -t_srs {3} -cblend 6' + - retry (bool) + If True, and the file exists (and is over 0k), downloading will be skipped. + + ''' + + basic_file_name = os.path.basename(extent_file).split('.')[0] + target_file_name_raw = f"{basic_file_name}_dem.tif" # as downloaded + target_path_raw = os.path.join(output_folder_path, + target_file_name_raw) + + # File might exist from a previous failed run. If it was aborted or failed + # on a previous attempt, it's size less than 1mg, so delete it. + # + # IMPORTANT: + # + # it might be compromised on a previous run but GREATER 1mg (part written). + # That scenerio is not handled as we can not tell if it completed. + + if (retry) and (os.path.exists(target_path_raw)): + if (os.stat(target_path_raw).st_size < 1000000): + os.remove(target_path_raw) + else: + msg = f" - Downloading -- {target_file_name_raw} - Skipped (already exists (see retry flag))" + print(msg) + logging.info(msg) + return + + msg = f" - Downloading -- {target_file_name_raw} - Started" + print(msg) + logging.info(msg) + + cmd = base_cmd.format(download_url, + target_path_raw, + extent_file, + sv.DEFAULT_FIM_PROJECTION_CRS) + #PREP_PROJECTION_EPSG + #fh.vprint(f"cmd is {cmd}", self.is_verbose, True) + #print(f"cmd is {cmd}") + + # didn't use Popen becuase of how it interacts with multi proc + # was creating some issues. Run worked much better. + process = subprocess.run(cmd, shell = True, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + check = True, + universal_newlines=True) + + msg = process.stdout + print(msg) + logging.info(msg) + + if (process.stderr != ""): + if ("ERROR" in process.stderr.upper()): + msg = f" - Downloading -- {target_file_name_raw}"\ + f" ERROR -- details: ({process.stderr})" + print(msg) + logging.error(msg) + os.remove(target_path_raw) + else: + msg = f" - Downloading -- {target_file_name_raw} - Complete" + print(msg) + logging.info(msg) + + +def polygonize(target_output_folder_path): + """ + Create a polygon of 3DEP domain from individual HUC6 DEMS which are then dissolved into a single polygon + """ + dem_domain_file = os.path.join(target_output_folder_path, 'HUC6_dem_domain.gpkg') + + msg = f" - Polygonizing -- {dem_domain_file} - Started" + print(msg) + logging.info(msg) + + dem_files = glob.glob(os.path.join(target_output_folder_path, '*_dem.tif')) + dem_gpkgs = gpd.GeoDataFrame() + + for n, dem_file in enumerate(dem_files): + edge_tif = f'{os.path.splitext(dem_file)[0]}_edge.tif' + edge_gpkg = f'{os.path.splitext(edge_tif)[0]}.gpkg' + + # Calculate a constant valued raster from valid DEM cells + if not os.path.exists(edge_tif): + subprocess.run(['gdal_calc.py', '-A', dem_file, f'--outfile={edge_tif}', '--calc=where(A > -900, 1, 0)', '--co', 'BIGTIFF=YES', '--co', 'NUM_THREADS=ALL_CPUS', '--co', 'TILED=YES', '--co', 'COMPRESS=LZW', '--co', 'SPARSE_OK=TRUE', '--type=Byte', '--quiet']) + + # Polygonize constant valued raster + subprocess.run(['gdal_polygonize.py', '-8', edge_tif, '-q', '-f', 'GPKG', edge_gpkg]) + + gdf = gpd.read_file(edge_gpkg) + + if n == 0: + dem_gpkgs = gdf + else: + dem_gpkgs = dem_gpkgs.append(gdf) + + os.remove(edge_tif) + + dem_gpkgs['DN'] = 1 + dem_dissolved = dem_gpkgs.dissolve(by='DN') + dem_dissolved.to_file(dem_domain_file, driver='GPKG') + + if not os.path.exists(dem_domain_file): + msg = f" - Polygonizing -- {dem_domain_file} - Failed" + print(msg) + logging.error(msg) + else: + msg = f" - Polygonizing -- {dem_domain_file} - Complete" + print(msg) + logging.info(msg) + + +def __setup_logger(output_folder_path): + + start_time = datetime.now() + file_dt_string = start_time.strftime("%Y_%m_%d-%H_%M_%S") + log_file_name = f"3Dep_downloaded-{file_dt_string}.log" + + log_file_path = os.path.join(output_folder_path, log_file_name) + + file_handler = logging.FileHandler(log_file_path) + file_handler.setLevel(logging.INFO) + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + + logger = logging.getLogger() + logger.addHandler(file_handler) + logger.setLevel(logging.DEBUG) + + logging.info(f'Started : {start_time.strftime("%m/%d/%Y %H:%M:%S")}') + logging.info("----------------") + + +if __name__ == '__main__': + + # Parse arguments. + + # sample usage (min params): + # - python3 /foss_fim/data/usgs/acquire_and_preprocess_3dep_dems.py -e /data/inputs/wbd/HUC6_ESPG_5070/ -t /data/inputs/3dep_dems/10m_5070/ -r -j 20 + + # Notes: + # - This is a very low use tool. So for now, this only can load 10m (1/3 arc second) and is using + # hardcoded paths for the wbd gpkg to be used for clipping (no buffer for now). + # Also hardcoded usgs 3dep urls, etc. Minor + # upgrades can easily be made for different urls, output folder paths, huc units, etc + # as/if needed (command line params) + # - The output path can be adjusted in case of a test reload of newer data for 3dep. + # The default is /data/input/usgs/3dep_dems/10m/ + # - While you can (and should use more than one job number (if manageable by your server)), + # this tool is memory intensive and needs more RAM then it needs cores / cpus. Go ahead and + # anyways and increase the job number so you are getting the most out of your RAM. Or + # depending on your machine performance, maybe half of your cpus / cores. This tool will + # not fail or freeze depending on the number of jobs / cores you select. + + + # IMPORTANT: + # (Sept 2022): we do not process HUC2 of 22 (misc US pacific islands). + # We left in HUC2 of 19 (alaska) as we hope to get there in the semi near future + # They need to be removed from the input src clip directory in the first place. + # They can not be reliably removed in code. + + parser = argparse.ArgumentParser(description='Acquires and preprocesses USGS 3Dep dems') + + parser.add_argument('-e','--extent_file_path', help='location the gpkg files that will'\ + ' are being used as clip regions (aka.. huc4_*.gpkg or whatever).'\ + ' All gpkgs in this folder will be used.', required=True) + + parser.add_argument('-j','--number_of_jobs', help='Number of (jobs) cores/processes to used.', + required=False, default=1, type=int) + + parser.add_argument('-r','--retry', help='If included, it will skip files that already exist.'\ + ' Default is all will be loaded/reloaded.', + required=False, action='store_true', default=False) + + parser.add_argument('-t','--target_output_folder_path', help='location of where the 3dep files'\ + ' will be saved', required=False, default='') + + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + acquire_and_preprocess_3dep_dems(**args) + diff --git a/tools/preprocess_ahps_usgs.py b/data/usgs/preprocess_ahps_usgs.py similarity index 99% rename from tools/preprocess_ahps_usgs.py rename to data/usgs/preprocess_ahps_usgs.py index e34725b4d..1c8b78be3 100644 --- a/tools/preprocess_ahps_usgs.py +++ b/data/usgs/preprocess_ahps_usgs.py @@ -1,17 +1,18 @@ #!/usr/bin/env python3 +import argparse import numpy as np -from pathlib import Path +import os import pandas as pd import rasterio import requests -from tools_shared_functions import mainstem_nwm_segs, get_metadata, aggregate_wbd_hucs, get_thresholds, get_datum, ngvd_to_navd_ft, get_rating_curve, select_grids, get_nwm_segs, flow_data, process_extent, process_grid, raster_to_feature -import argparse -from dotenv import load_dotenv -import os import sys -sys.path.append('/foss_fim/src') import traceback +from dotenv import load_dotenv +from pathlib import Path + +sys.path.append('/foss_fim/tools') +from tools_shared_functions import mainstem_nwm_segs, get_metadata, aggregate_wbd_hucs, get_thresholds, get_datum, ngvd_to_navd_ft, get_rating_curve, select_grids, get_nwm_segs, flow_data, process_extent, process_grid, raster_to_feature def get_env_paths(): load_dotenv() diff --git a/tools/preprocess_download_usgs_grids.py b/data/usgs/preprocess_download_usgs_grids.py similarity index 99% rename from tools/preprocess_download_usgs_grids.py rename to data/usgs/preprocess_download_usgs_grids.py index 332a30ed1..8d85a6e1e 100644 --- a/tools/preprocess_download_usgs_grids.py +++ b/data/usgs/preprocess_download_usgs_grids.py @@ -1,13 +1,14 @@ #!/usr/bin/env python3 -import urllib.request -from pathlib import Path -from dotenv import load_dotenv -import os import argparse +import os +import pandas as pd import requests -from collections import defaultdict import urllib -import pandas as pd +import urllib.request + +from collections import defaultdict +from dotenv import load_dotenv +from pathlib import Path load_dotenv() USGS_DOWNLOAD_URL = os.getenv("USGS_DOWNLOAD_URL") diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 3ce00ed1d..37d4dcd07 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,11 +1,1779 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.3.3.4 - 2023-03-17 - [PR#849](https://github.com/NOAA-OWP/inundation-mapping/pull/849) + +This hotfix addresses an error in inundate_nation.py relating to projection CRS. + +## Changes + +- `tools/inundate_nation.py`: #782 CRS projection change likely causing issue with previous projection configuration + +

+ +## v4.3.3.3 - 2023-03-20 - [PR#854](https://github.com/NOAA-OWP/inundation-mapping/pull/854) + +At least one site (e.g. TRYM7) was not been getting mapped in Stage-Based CatFIM, despite having all of the acceptable accuracy codes. This was caused by a data type issue in the `acceptable_coord_acc_code_list` in `tools_shared_variables.py` having the accuracy codes of 5 and 1 as a strings instead of an integers. + +### Changes + +- `/tools/tools_shared_variables.py`: Added integers 5 and 1 to the acceptable_coord_acc_code_list, kept the '5' and '1' strings as well. + +

+ +## v4.3.3.2 - 2023-03-20 - [PR#851](https://github.com/NOAA-OWP/inundation-mapping/pull/851) + +Bug fix to change `.split()` to `os.path.splitext()` + +### Changes + +- `src/stream_branches.py`: Change 3 occurrences of `.split()` to `os.path.splitext()` + +

+ +## v4.3.3.1 - 2023-03-20 - [PR#855](https://github.com/NOAA-OWP/inundation-mapping/pull/855) + +Bug fix for KeyError in `src/associate_levelpaths_with_levees.py` + +### Changes + +- `src/associate_levelpaths_with_levees.py`: Adds check if input files exist and handles empty GeoDataFrame(s) after intersecting levee buffers with leveed areas. + +

+ +## v4.3.3.0 - 2023-03-02 - [PR#831](https://github.com/NOAA-OWP/inundation-mapping/pull/831) + +Addresses bug wherein multiple CatFIM sites in the flow-based service were displaying the same NWS LID. This merge also creates a workaround solution for a slowdown that was observed in the WRDS location API, which may be a temporary workaround, until WRDS addresses the slowdown. + +### Changes + +- `tools/generate_categorical_fim_mapping.py`: resets the list of tifs to format for each LID within the loop that does the map processing, instead of only once before the start of the loop. +- `tools/tools_shared_functions.py`: + - adds a try-except block around code that attempted to iterate on an empty list when the API didn't return relevant metadata for a given feature ID (this is commented out, but may be used in the future once WRDS slowdown is addressed). + - Uses a passed NWM flows geodataframe to determine stream order. +- `/tools/generate_categorical_fim_flows.py`: + - Adds multiprocessing to flows generation and uses `nwm_flows.gpkg` instead of the WRDS API to determine stream order of NWM feature_ids. + - Adds duration print messages. +- `/tools/generate_categorical_fim.py`: + - Refactor to allow for new NWM filtering scheme. + - Bug fix in multiprocessing calls for interval map production. + - Adds duration print messages. + +

+ +## v4.3.2.0 - 2023-03-15 - [PR#845](https://github.com/NOAA-OWP/inundation-mapping/pull/845) + +This merge revises the methodology for masking levee-protected areas from inundation. It accomplishes two major tasks: (1) updates the procedure for acquiring and preprocessing the levee data to be burned into the DEM and (2) revises the way levee-protected areas are masked from branches. + +(1) There are now going to be two different levee vector line files in each HUC. One (`nld_subset_levees_burned.gpkg`) for the levee elevation burning and one (`nld_subset_levees.gpkg`) for the levee-level-path assignment and masking workflow. + +(2) Levee-protected areas are masked from inundation based on a few methods: + - Branch 0: All levee-protected areas are masked. + - Other branches: Levee-protected areas are masked from the DEMs of branches for level path(s) that the levee is protecting against by using single-sided buffers alongside each side of the levee to determine which side the levee is protecting against (the side opposite the associated levee-protected area). + +### Additions + +- `.gitignore`: Adds `.private` folder for unversioned code. +- `data/` + - `esri.py`: Class for querying and downloading ESRI feature services. + - `nld/` + - `levee_download.py`: Module that handles downloading and preprocessing levee lines and protected areas from the National Levee Database. +- `src/associate_levelpaths_with_levees.py`: Associates level paths with levees using single-sided levee buffers and writes to CSV to be used by `src/mask_dem.py` + +### Changes + +- `.config/` + - `deny_branch_zero.lst`: Adds `dem_meters_{}.tif`. + - `deny_branches.lst`: Adds `levee_levelpaths.csv` and removes `nld_subset_levees_{}.tif`. + - `deny_unit.lst`: Adds `dem_meters.tif`. + - `params_template.env`: Adds `levee_buffer` parameter for levee buffer size/distance in meters and `levee_id_attribute`. +- `src/` + - `bash_variables.env`: Updates `input_nld_levee_protected_areas` and adds `input_NLD` (moved from `run_unit_wb.sh`) and `input_levees_preprocessed` environment. .variables + - `burn_in_levees.py`: Removed the unit conversion from feet to meters because it's now being done in `levee_download.py`. + - `clip_vectors_to_wbd.py`: Added the new levee lines for the levee-level-path assignment and masking workflow. + - `delineate_hydros_and_produce_HAND.sh`: Updates input arguments. + - `mask_dem.py`: Updates to use `levee_levelpaths.csv` (output from `associate_levelpaths_with_levees.py`) to mask branch DEMs. + - `run_by_branch.sh`: Clips `dem_meters.tif` to use for branches instead of `dem_meters_0.tif` since branch 0 is already masked. + - `run_unit_wb.sh`: Added inputs to `clip_vectors_to_wbd.py`. Added `associate_levelpaths_with_levees.py`. Processes `dem_meters.tif` and then makes a copy for branch 0. Moved `deny_unit.lst` cleanup to after branch processing. + +### Removals +- `data/nld/preprocess_levee_protected_areas.py`: Deprecated. + +

+ +## v4.3.1.0 - 2023-03-10 - [PR#834](https://github.com/NOAA-OWP/inundation-mapping/pull/834) + +Change all occurances of /data/outputs to /outputs to honor the correct volume mount directory specified when executing docker run. + +### Changes + +- `Dockerfile` - updated comments in relation to `projectDir=/foss_fim` +- `fim_pipeline.sh` - updated comments in relation to `projectDir=/foss_fim` +- `fim_pre_processing.sh` -updated comments in relation to `projectDir=/foss_fim` +- `fim_post_processing.sh` - updated comments in relation to `projectDir=/foss_fim` +- `README.md` - Provide documentation on starting the Docker Container, and update docs to include additional command line option for calibration database tool. + +- `src/` + - `usgs_gage_crosswalk.py` - added newline character to shorten commented example usage + - `usgs_gage_unit_setup.py` - `/data/outputs/` => `/outputs/` + +- `tools/` + - `cache_metrics.py` - `/data/outputs/` => `/outputs/` + - `copy_test_case_folders.py` - `/data/outputs/` => `/outputs/` + - `run_test_case.py` - `/data/outputs/` => `/outputs/` + +- `unit_tests/*_params.json` - `/data/outputs/` => `/outputs/` + +- `unit_tests/split_flows_test.py` - `/data/outputs/` => `/outputs/` + +

+ +## v4.3.0.1 - 2023-03-06 - [PR#841](https://github.com/NOAA-OWP/inundation-mapping/pull/841) + +Deletes intermediate files generated by `src/agreedem.py` by adding them to `config/deny_*.lst` + +- `config/` + - `deny_branch_zero.lst`, `deny_branches.lst`, `deny_branch_unittests.lst`: Added `agree_binary_bufgrid.tif`, `agree_bufgrid_zerod.tif`, and `agree_smogrid_zerod.tif` + - `deny_unit.lst`: Added `agree_binary_bufgrid.tif`, `agree_bufgrid.tif`, `agree_bufgrid_allo.tif`, `agree_bufgrid_dist.tif`, `agree_bufgrid_zerod.tif`, `agree_smogrid.tif`, `agree_smogrid_allo.tif`, `agree_smogrid_dist.tif`, `agree_smogrid_zerod.tif` + +

+ +## v4.3.0.0 - 2023-02-15 - [PR#814](https://github.com/NOAA-OWP/inundation-mapping/pull/814) + +Replaces GRASS with Whitebox. This addresses several issues, including Windows permissions and GRASS projection issues. Whitebox also has a slight performance benefit over GRASS. + +### Removals + +- `src/r_grow_distance.py`: Deletes file + +### Changes + +- `Dockerfile`: Removes GRASS, update `$outputDataDir` from `/data/outputs` to `/outputs` +- `Pipfile` and `Pipfile.lock`: Adds Whitebox and removes GRASS +- `src/` + - `agreedem.py`: Removes `r_grow_distance`; refactors to use with context and removes redundant raster reads. + - `adjust_lateral_thalweg.py` and `agreedem.py`: Refactors to use `with` context and removes redundant raster reads + - `unique_pixel_and_allocation.py`: Replaces GRASS with Whitebox and remove `r_grow_distance` + - `gms/` + - `delineate_hydros_and_produce_HAND.sh` and `run_by_unit.sh`: Removes GRASS parameter + - `mask_dem.py`: Removes unnecessary line + +

+ +## v4.2.1.0 - 2023-02-21 - [PR#829](https://github.com/NOAA-OWP/inundation-mapping/pull/829) + +During the merge from remove-fim3 PR into dev, merge conflicts were discovered in the unit_tests folders and files. Attempts to fix them at that time failed, so some files were removed, other renamed, other edited to get the merge to work. Here are the fixes to put the unit tests system back to par. + +Note: some unit tests are now temporarily disabled due to dependencies on other files / folders which may not exist in other environments. + +Also.. the Changelog.md was broken and is being restored here. + +Also.. a minor text addition was added to the acquire_and_preprocess_3dep_dems.py files (not directly related to this PR) + +For file changes directly related to unit_test folder and it's file, please see [PR#829](https://github.com/NOAA-OWP/inundation-mapping/pull/829) + +Other file changes: + +### Changes +- `Pipfile.lock` : rebuilt and updated as a safety pre-caution. +- `docs` + - `CHANGELOG.md`: additions to this file for FIM 4.2.0.0 were not merged correctly. (re-added just below in the 4.2.0.0 section) +- `data` + - `usgs` + - `acquire_and_preprocess_3dep_dems.py`: Added text on data input URL source. + +

+ +## v4.2.0.1 - 2023-02-16 - [PR#827](https://github.com/NOAA-OWP/inundation-mapping/pull/827) + +FIM 4.2.0.0. was throwing errors for 14 HUCs that did not have any level paths. These are HUCs that have only stream orders 1 and 2 and are covered under branch zero, but no stream orders 3+ (no level paths). This has now been changed to not throw an error but continue to process of the HUC. + +### Changes + +- `src` + - `run_unit_wb.sh`: Test if branch_id.lst exists, which legitimately might not. Also a bit of text cleanup. + +

+ +## v4.2.0.0 - 2023-02-16 - [PR#816](https://github.com/NOAA-OWP/inundation-mapping/pull/816) + +This update removes the remaining elements of FIM3 code. It further removes the phrases "GMS" as basically the entire FIM4 model. FIM4 is GMS. With removing FIM3, it also means remove concepts of "MS" and "FR" which were no longer relevant in FIM4. There are only a few remaining places that will continue with the phrase "GMS" which is in some inundation files which are being re-evaluated. Some deprecated files have been removed and some subfolders removed. + +There are a lot of duplicate explanations for some of the changes, so here is a shortcut system. + +- desc 1: Remove or rename values based on phrase "GMS, MS and/or FR" +- desc 2: Moved file from the /src/gms folder to /src or /tools/gms_tools to /tools +- desc 3: No longer needed as we now use the `fim_pipeline.sh` processing model. + +### Removals + +- `data` + - `acquire_and_preprocess_inputs.py`: No longer needed +- `gms_pipeline.sh` : see desc 3 +- `gms_run_branch.sh` : see desc 3 +- `gms_run_post_processing.sh` : see desc 3 +- `gms_run_unit.sh` : see desc 3 +- `src` + - `gms` + - `init.py` : folder removed, no longer needed. + - `aggregate_branch_lists.py`: no longer needed. Newer version already exists in src directory. + - `remove_error_branches.py` : see desc 3 + - `run_by_unit.sh` : see desc 3 + - `test_new_crosswalk.sh` : no longer needed + - `time_and_tee_run_by_branch.sh` : see desc 3 + - `time_and_tee_run_by_unit.sh` : see desc 3 + - `output_cleanup.py` : see desc 3 + - `tools/gms_tools` + - `init.py` : folder removed, no longer needed. + +### Changes + +- `config` + - `deny_branch_unittests.lst` : renamed from `deny_gms_branch_unittests.lst` + - `deny_branch_zero.lst` : renamed from `deny_gms_branch_zero.lst` + - `deny_branches.lst` : renamed from `deny_gms_branches.lst` + - `deny_unit.lst` : renamed from `deny_gms_unit.lst` + - `params_template.env` : see desc 1 + +- `data` + - `nws` + - `preprocess_ahps_nws.py`: Added deprecation note: If reused, it needs review and/or upgrades. + - `acquire_and_preprocess_3dep_dems.py` : see desc 1 + - `fim_post_processing.sh` : see desc 1, plus a small pathing change. + - `fim_pre_processing.sh` : see desc 1 + - ` src` + - `add_crosswalk.py` : see desc 1. Also cleaned up some formatting and commented out a code block in favor of a better way to pass args from "__main__" + - `bash_variables.env` : see desc 1 + - `buffer_stream_branches.py` : see desc 2 + - `clip_rasters_to_branches.py` : see desc 2 + - `crosswalk_nwm_demDerived.py` : see desc 1 and desc 2 + - `delineate_hydros_and_produce_HAND.sh` : see desc 1 and desc 2 + - `derive_level_paths.py` : see desc 1 and desc 2 + - `edit_points.py` : see desc 2 + - `filter_inputs_by_huc.py`: see desc 1 and desc 2 + - `finalize_srcs.py`: see desc 2 + - `generate_branch_list.py` : see desc 1 + - `make_rem.py` : see desc 2 + - `make_dem.py` : see desc 2 + - `outputs_cleanup.py`: see desc 1 + - `process_branch.sh`: see desc 1 + - `query_vectors_by_branch_polygons.py`: see desc 2 + - `reset_mannings.py` : see desc 2 + - `run_by_branch.sh`: see desc 1 + - `run_unit_wb.sh`: see desc 1 + - `stream_branches.py`: see desc 2 + - `subset_catch_list_by_branch_id.py`: see desc 2 + - `toDo.md`: see desc 2 + - `usgs_gage_aggregate.py`: see desc 1 + - `usgs_gage_unit_setup.py` : see desc 1 + - `utils` + - `fim_enums.py` : see desc 1 + +- `tools` + - `combine_crosswalk_tables.py` : see desc 2 + - `compare_ms_and_non_ms_metrics.py` : see desc 2 + - `compile_comp_stats.py`: see desc 2 and added note about possible deprecation. + - `compile_computation_stats.py` : see desc 2 and added note about possible deprecation. + - `composite_inundation.py` : see desc 1 : note.. references a file called inundate_gms which retains it's name for now. + - `consolidate_metrics.py`: added note about possible deprecation. + - `copy_test_case_folders.py`: see desc 1 + - `eval_plots.py` : see desc 1 + - `evaluate_continuity.py`: see desc 2 + - `find_max_catchment_breadth.py` : see desc 2 + - `generate_categorical_fim_mapping.py` : see desc 1 + - `inundate_gms.py`: see desc 1 and desc 2. Note: This file has retained its name with the phrase "gms" in it as it might be upgraded later and there are some similar files with similar names. + - `inundate_nation.py` : see desc 1 + - `inundation.py`: text styling change + - `make_boxes_from_bounds.py`: text styling change + - `mosaic_inundation.py`: see desc 1 and desc 2 + - `overlapping_inundation.py`: see desc 2 + - `plots.py` : see desc 2 + - `run_test_case.py`: see desc 1 + - `synthesize_test_cases.py`: see desc 1 + +- `unit_tests` + - `README.md`: see desc 1 + - `__template_unittests.py`: see desc 1 + - `check_unit_errors_params.json` and `check_unit_errors_unittests.py` : see desc 1 + - `derive_level_paths_params.json` and `derive_level_paths_unittests.py` : see desc 1 and desc 2 + - `filter_catchments_and_add_attributes_unittests.py`: see desc 1 + - `outputs_cleanup_params.json` and `outputs_cleanup_unittests.py`: see desc 1 and desc 2 + - `split_flows_unittests.py` : see desc 1 + - `tools` + - `inundate_gms_params.json` and `inundate_gms_unittests.py`: see desc 1 and desc 2 + +

+ +## v4.1.3.0 - 2023-02-13 - [PR#812](https://github.com/NOAA-OWP/inundation-mapping/pull/812) + +An update was required to adjust host name when in the AWS environment + +### Changes + +- `fim_post_processing.sh`: Added an "if isAWS" flag system based on the input command args from fim_pipeline.sh or + +- `tools/calibration-db` + - `README.md`: Minor text correction. + +

+ +## v4.1.2.0 - 2023-02-15 - [PR#808](https://github.com/NOAA-OWP/inundation-mapping/pull/808) + +Add `pytest` package and refactor existing unit tests. Update parameters to unit tests (`/unit_tests/*_params.json`) to valid paths. Add leading slash to paths in `/config/params_template.env`. + +### Additions + +- `/unit_tests` + - `__init__.py` - needed for `pytest` command line executable to pick up tests. + - `pyproject.toml` - used to specify which warnings are excluded/filtered. + - `/gms` + - `__init__.py` - needed for `pytest` command line executable to pick up tests. + - `/tools` + - `__init__.py` - needed for `pytest` command line executable to pick up tests. + - `inundate_gms_params.json` - file moved up into this directory + - `inundate_gms_test.py` - file moved up into this directory + - `inundation_params.json` - file moved up into this directory + - `inundation_test.py` - file moved up into this directory + +### Removals + +- `/unit_tests/tools/gms_tools/` directory removed, and files moved up into `/unit_tests/tools` + +### Changes + +- `Pipfile` - updated to include pytest as a dependency +- `Pipfile.lock` - updated to include pytest as a dependency + +- `/config` + - `params_template.env` - leading slash added to paths + +- `/unit_tests/` - All of the `*_test.py` files were refactored to follow the `pytest` paradigm. + - `*_params.json` - valid paths on `fim-dev1` provided + - `README.md` - updated to include documentation on pytest. + - `unit_tests_utils.py` + - `__template_unittests.py` -> `__template.py` - exclude the `_test` suffix to remove from test suite. Updated example on new format for pytest. + - `check_unit_errors_test.py` + - `clip_vectors_to_wbd_test.py` + - `filter_catchments_and_add_attributes_test.py` + - `rating_curve_comparison_test.py` + - `shared_functions_test.py` + - `split_flow_test.py` + - `usgs_gage_crosswalk_test.py` + - `aggregate_branch_lists_test.py` + - `generate_branch_list_test.py` + - `generate_branch_list_csv_test.py` + - `aggregate_branch_lists_test.py` + - `generate_branch_list_csv_test.py` + - `generate_branch_list_test.py` + - `/gms` + - `derive_level_paths_test.py` + - `outputs_cleanup_test.py` + - `/tools` + - `inundate_unittests.py` -> `inundation_test.py` + - `inundate_gms_test.py` + + +

+ +## v4.1.1.0 - 2023-02-16 - [PR#809](https://github.com/NOAA-OWP/inundation-mapping/pull/809) + +The CatFIM code was updated to allow 1-foot interval processing across all stage-based AHPS sites ranging from action stage to 5 feet above major stage, along with restart capability for interrupted processing runs. + +### Changes + +- `tools/generate_categorical_fim.py` (all changes made here) + - Added try-except blocks for code that didn't allow most sites to actually get processed because it was trying to check values of some USGS-related variables that most of the sites didn't have + - Overwrite abilities of the different outputs for the viz team were not consistent (i.e., one of the files had the ability to be overwritten but another didn't), so that has been made consistent to disallow any overwrites of the existing final outputs for a specified output folder. + - The code also has the ability to restart from an interrupted run and resume processing uncompleted HUCs by first checking for a simple "complete" file for each HUC. If a HUC has that file, then it is skipped (because it already completed processing during a run for a particular output folder / run name). + - When a HUC is successfully processed, an empty "complete" text file is created / touched. + +

+ +## v4.1.0.0 - 2023-01-30 - [PR#806](https://github.com/NOAA-OWP/inundation-mapping/pull/806) + +As we move to Amazon Web Service, AWS, we need to change our processing system. Currently, it is `gms_pipeline.sh` using bash "parallel" as an iterator which then first processes all HUCs, but not their branches. One of `gms_pipeline.sh`'s next steps is to do branch processing which is again iterated via "parallel". AKA. Units processed as one step, branches processed as second independent step. + +**Note:** While we are taking steps to move to AWS, we will continue to maintain the ability of doing all processing on a single server using a single docker container as we have for a long time. Moving to AWS is simply taking portions of code from FIM and adding it to AWS tools for performance of large scale production runs. + +Our new processing system, starting with this PR, is to allow each HUC to process it's own branches. + +A further requirement was to split up the overall processing flow to independent steps, with each step being able to process itself without relying on "export" variables from other files. Note: There are still a few exceptions. The basic flow now becomes +- `fim_pre_processing.sh`, +- one or more calls to `fim_process_unit_wb.sh` (calling this file for each single HUC to be processed). +- followed by a call to `fim_post_processing.sh`. + + +Note: This is a very large, complex PR with alot of critical details. Please read the details at [PR 806](https://github.com/NOAA-OWP/inundation-mapping/pull/806). + +### CRITICAL NOTE +The new `fim_pipeline.sh` and by proxy `fim_pre_processing.sh` has two new key input args, one named **-jh** (job HUCs) and one named **-jb** (job branches). You can assign the number of cores/CPU's are used for processing a HUC versus the number of branches. For the -jh number arg, it only is used against the `fim_pipeline.sh` file when it is processing more than one HUC or a list of HUCs as it is the iterator for HUCs. The -jb flag says how many cores/CPU's can be used when processing branches (note.. the average HUC has 26 branches). + +BUT.... you have to be careful not to overload your system. **You need to multiply the -jh and the -jb values together, but only when using the `fim_pipeline.sh` script.** Why? _If you have 16 CPU's available on your machine, and you assign -jh as 10 and -jb as 26, you are actually asking for 126 cores (10 x 26) but your machine only has 16 cores._ If you are not using `fim_pipeline.sh` but using the three processing steps independently, then the -jh value has not need to be anything but the number of 1 as each actual HUC can only be processed one at a time. (aka.. no iterator). +
+ +### Additions + +- `fim_pipeline.sh` : The wrapper for the three new major "FIM" processing steps. This script allows processing in one command, same as the current tool of `gms_pipeline.sh`. +- `fim_pre_processing.sh`: This file handles all argument input from the user, validates those inputs and sets up or cleans up folders. It also includes a new system of taking most input parameters and some key enviro variables and writing them out to a files called `runtime_args.env`. Future processing steps need minimal input arguments as it can read most values it needs from this new `runtime_args.env`. This allows the three major steps to work independently from each other. Someone can now come in, run `fim_pre_processing.sh`, then run `fim_process_unit_wb.sh`, each with one HUC, as many time as they like, each adding just its own HUC folder to the output runtime folder. +- `fim_post_processing.sh`: Scans all HUC folders inside the runtime folders to handle a number of processing steps which include (to name a few): + - aggregating errors + - aggregating to create a single list (gms_inputs.csv) for all valid HUCs and their branch ids + - usgs gage aggregation + - adjustments to SRV's + - and more +- `fim_process_unit_wb.sh`: Accepts only input args of runName and HUC number. It then sets up global variable, folders, etc to process just the one HUC. The logic for processing the HUC is in `run_unit_wb.sh` but managed by this `fim_process_unit_wb.sh` file including all error trapping. +- `src` + - `aggregate_branch_lists.py`: When each HUC is being processed, it creates it's own .csv file with its branch id's. In post processing we need one master csv list and this file aggregates them. Note: This is a similar file already in the `src/gms` folder but that version operates a bit different and will be deprecated soon. + - `generate_branch_list.py`: This creates the single .lst for a HUC defining each branch id. With this list, `run_unit_wb.sh` can do a parallelized iteration over each of its branches for processing. Note: This is also similar to the current `src/gms` file of the same name and the gms folder version will also be deprecated soon. + - `generate_branch_list_csv.py`. As each branch, including branch zero, has processed and if it was successful, it will add to a .csv list in the HUC directory. At the end, it becomes a list of all successful branches. This file will be aggregates with all similar .csv in post processing for future processing. + - `run_unit_wb.sh`: The actual HUC processing logic. Note: This is fundamentally the same as the current HUC processing logic that exists currently in `src/gms/run_by_unit.sh`, which will be removed in the very near future. However, at the end of this file, it creates and manages a parallelized iterator for processing each of it's branches. + - `process_branch.sh`: Same concept as `process_unit_wb.sh` but this one is for processing a single branch. This file manages the true branch processing file of `src/gms/run_by_branch.sh`. It is a wrapper file to `src/gms/run_by_branch.sh` and catches all error and copies error files as applicable. This allows the parent processing files to continue despite branch errors. Both the new fim processing system and the older gms processing system currently share the branch processing file of `src/gms/run_by_branch.sh`. When the gms processing file is removed, this file will likely not change, only moved one directory up and be no longer in the `gms` sub-folder. +- `unit_tests` + - `aggregate_branch_lists_unittests.py' and `aggregate_branch_lists_params.json` (based on the newer `src` directory edition of `aggregate_branch_lists.py`). + - `generate_branch_list_unittest.py` and `generate_branch_list_params.json` (based on the newer `src` directory edition of `generate_branch_list.py`). + - `generate_branch_list_csv_unittest.py` and `generate_branch_list_csv_params.json` + +### Changes + +- `config` + - `params_template.env`: Removed the `default_max_jobs` value and moved the `startDiv` and `stopDiv` to the `bash_variables.env` file. + - `deny_gms_unit.lst` : Renamed from `deny_gms_unit_prod.lst` + - `deny_gms_branches.lst` : Renamed from `deny_gms_branches_prod.lst` + +- `gms_pipeline.sh`, `gms_run_branch.sh`, `gms_run_unit.sh`, and `gms_post_processing.sh` : Changed to hardcode the `default_max_jobs` to the value of 1. (we don't want this to be changed at all). They were also changed for minor adjustments for the `deny` list files names. + +- `src` + - `bash_functions.env`: Fix error with calculating durations. + - `bash_variables.env`: Adds the two export lines (stopDiv and startDiv) from `params_template.env` + - `clip_vectors_to_wbd.py`: Cleaned up some print statements for better output traceability. + - `check_huc_inputs.py`: Added logic to ensure the file was an .lst file. Other file formats were not be handled correctly. + - `gms` + - `delineate_hydros_and_produce_HAND.sh`: Removed all `stopDiv` variable to reduce log and screen output. + - `run_by_branch.sh`: Removed an unnecessary test for overriding outputs. + +### Removed + +- `config` + - `deny_gms_branches_dev.lst` + +

+ +## v4.0.19.5 - 2023-01-24 - [PR#801](https://github.com/NOAA-OWP/inundation-mapping/pull/801) + +When running tools/test_case_by_hydroid.py, it throws an error of local variable 'stats' referenced before assignment. + +### Changes + +- `tools` + - `pixel_counter.py`: declare stats object and remove the GA_Readonly flag + - `test_case_by_hydroid_id_py`: Added more logging. + +

+ +## v4.0.19.4 - 2023-01-25 - [PR#802](https://github.com/NOAA-OWP/inundation-mapping/pull/802) + +This revision includes a slight alteration to the filtering technique used to trim/remove lakeid nwm_reaches that exist at the upstream end of each branch network. By keeping a single lakeid reach at the branch level, we can avoid issues with the branch headwater point starting at a lake boundary. This ensures the headwater catchments for some branches are properly identified as a lake catchment (no inundation produced). + +### Changes + +- `src/gms/stream_branches.py`: New changes to the `find_upstream_reaches_in_waterbodies` function: Added a step to create a list of nonlake segments (lakeid = -9999) . Use the list of nonlake reaches to allow the filter to keep a the first lakeid reach that connects to a nonlake segment. + +

+ +## v4.0.19.3 - 2023-01-17 - [PR#794](https://github.com/NOAA-OWP/inundation-mapping/pull/794) + +Removing FIM3 files and references. Anything still required for FIM 3 are held in the dev-fim3 branch. + +### Removals + +- `data` + - `preprocess_rasters.py`: no longer valid as it is for NHD DEM rasters. +- `fim_run.sh` +- ` src` + - `aggregate_fim_outputs.sh` + - `fr_to_ms_raster.mask.py` + - `get_all_huc_in_inputs.py` + - `reduce_nhd_stream_density.py` + - `rem.py`: There are two files named `rem.py`, one in the src directory and one in the gms directory. This version in the src directory is no longer valid. The `rem.py` in the gms directory is being renamed to avoid future enhancements of moving files. + - `run_by_unit.sh`: There are two files named `run_by_unit.sh`, one in the src directory and one in the gms directory. This version in the src directory is for fim3. For the remaining `run_by_unit.sh`, it is NOT being renamed at this time as it will likely be renamed in the near future. + - `time_and_tee_run_by_unit.sh`: Same not as above for `run_by_unit.sh`. + - `utils` + - `archive_cleanup.py` + - `tools` + - `compare_gms_srcs_to_fr.py` + - `preprocess_fimx.py` + +### Changes + +- `src` + - `adjust_headwater_streams.py`: Likely deprecated but kept for safety reason. Deprecation note added. +- `tools` + - `cygnss_preprocess.py`: Likely deprecated but kept for safety reason. Deprecation note added. + - `nesdis_preprocess.py`: Likely deprecated but kept for safety reason. Deprecation note added. + +

+ +## v4.0.19.2 - 2023-01-17 - [PR#797](https://github.com/NOAA-OWP/inundation-mapping/pull/797) + +Consolidates global bash environment variables into a new `src/bash_variables.env` file. Additionally, Python environment variables have been moved into this file and `src/utils/shared_variables.py` now references this file. Hardcoded projections have been replaced by an environment variable. This also replaces the Manning's N file in `config/params_template.env` with a constant and updates relevant code. Unused environment variables have been removed. + +### Additions + +- `src/bash_variables.env`: Adds file for global environment variables + +### Removals + +- `config/` + - `mannings_default.json` + - `mannings_default_calibrated.json` + +### Changes + +- `config/params_template.env`: Changes manning_n from filename to default value of 0.06 +- `gms_run_branch.sh`: Adds `bash_variables.env` +- `gms_run_post_processing.sh`: Adds `bash_variables.env` and changes projection from hardcoded to environment variable +- `gms_run_unit.sh`: Adds `bash_variables.env` +- `src/` + - `add_crosswalk.py`: Assigns default manning_n value and removes assignments by stream orders + - `aggregate_vector_inputs.py`: Removes unused references to environment variables and function + - `gms/run_by_unit.sh`: Removes environment variable assignments and uses projection from environment variables + - `utils/shared_variables.py`: Removes environment variables and instead references src/bash_variables.env + +

+ +## v4.0.19.1 - 2023-01-17 - [PR#796](https://github.com/NOAA-OWP/inundation-mapping/pull/796) + +### Changes + +- `tools/gms_tools/combine_crosswalk_tables.py`: Checks length of dataframe list before concatenating + +

+ +## v4.0.19.0 - 2023-01-06 - [PR#782](https://github.com/NOAA-OWP/inundation-mapping/pull/782) + +Changes the projection of HAND processing to EPSG 5070. + +### Changes + +- `gms_run_post_processing.sh`: Adds target projection for `points` +- `data/nld/preprocess_levee_protected_areas.py`: Changed to use `utils.shared_variables.DEFAULT_FIM_PROJECTION_CRS` +- `src/` + - `clip_vectors_to_wbd.py`: Save intermediate outputs in EPSG:5070 + - `src_adjust_spatial_obs.py`: Changed to use `utils.shared_variables.DEFAULT_FIM_PROJECTION_CRS` + - `utils/shared_variables.py`: Changes the designated projection variables + - `gms/` + - `stream_branches.py`: Checks the projection of the input streams and changes if necessary + - `run_by_unit.py`: Changes the default projection crs variable and added as HUC target projection +- `tools/inundate_nation.py`: Changed to use `utils.shared_variables.PREP_PROJECTION` + +

+ +## v4.0.18.2 - 2023-01-11 - [PR#790](https://github.com/NOAA-OWP/inundation-mapping/pull/790) + +Remove Great Lakes clipping + +### Changes + +- `src/` + - `clip_vectors_to_wbd.py`: Removes Great Lakes clipping and references to Great Lakes polygons and lake buffer size + + - `gms/run_by_unit.sh`: Removes Great Lakes polygon and lake buffer size arguments to `src/clip_vectors_to_wbd.py` + +

+ +## v4.0.18.1 - 2022-12-13 - [PR #760](https://github.com/NOAA-OWP/inundation-mapping/pull/760) + +Adds stacked bar eval plots. + +### Additions + +- `/tools/eval_plots_stackedbar.py`: produces stacked bar eval plots in the same manner as `eval_plots.py`. + +

+ +## v4.0.18.0 - 2023-01-03 - [PR#780](https://github.com/NOAA-OWP/inundation-mapping/pull/780) + +Clips WBD and stream branch buffer polygons to DEM domain. + +### Changes + +- `src/` + - `clip_vectors_to_wbd.py`: Clips WBD polygon to DEM domain + + - `gms/` + - `buffer_stream_branches.py`: Clips branch buffer polygons to DEM domain + - `derive_level_paths.py`: Stop processing if no branches exist + - `mask_dem.py`: Checks if stream file exists before continuing + - `remove_error_branches.py`: Checks if error_branches has data before continuing + - `run_by_unit.sh`: Adds DEM domain as bash variable and adds it as an argument to calling `clip_vectors_to_wbd.py` and `buffer_stream_branches.py` + +

+ + +## v4.0.17.4 - 2023-01-06 - [PR#781](https://github.com/NOAA-OWP/inundation-mapping/pull/781) + +Added crosswalk_table.csv from the root output folder as being a file push up to Hydrovis s3 bucket after FIM BED runs. + +### Changes + +- `config` + - `aws_s3_put_fim4_hydrovis_whitelist.lst`: Added crosswalk_table.csv to whitelist. + + +

+ +## v4.0.17.3 - 2022-12-23 - [PR#773](https://github.com/NOAA-OWP/inundation-mapping/pull/773) + +Cleans up REM masking of levee-protected areas and fixes associated error. + +### Removals + +- `src/gms/` + - `delineate_hydros_and_produce_HAND.sh`: removes rasterization and masking of levee-protected areas from the REM + - `rasterize_by_order`: removes this file +- `config/` + - `deny_gms_branch_zero.lst`, `deny_gms_branches_dev.lst`, and `deny_gms_branches_prod.lst`: removes `LeveeProtectedAreas_subset_{}.tif` + +### Changes + +- `src/gms/rem.py`: fixes an error where the nodata value of the DEM was overlooked + +

+ +## v4.0.17.2 - 2022-12-29 - [PR #779](https://github.com/NOAA-OWP/inundation-mapping/pull/779) + +Remove dependency on `other` folder in `test_cases`. Also updates ESRI and QGIS agreement raster symbology label to include the addition of levee-protected areas as a mask. + +### Removals + +- `tools/` + - `aggregate_metrics.py` and `cache_metrics.py`: Removes reference to test_cases/other folder + +### Changes + +- `config/symbology/` + - `esri/agreement_raster.lyr` and `qgis/agreement_raster.qml`: Updates label from Waterbody mask to Masked since mask also now includes levee-protected areas +- `tools/` + - `eval_alt_catfim.py` and `run_test_case.py`: Updates waterbody mask to dataset located in /inputs folder + +

+ +## v4.0.17.1 - 2022-12-29 - [PR #778](https://github.com/NOAA-OWP/inundation-mapping/pull/778) + +This merge fixes a bug where all of the Stage-Based intervals were the same. + +### Changes +- `/tools/generate_categorical_fim.py`: Changed `stage` variable to `interval_stage` variable in `produce_stage_based_catfim_tifs` function call. + +

+ +## v4.0.17.0 - 2022-12-21 - [PR #771](https://github.com/NOAA-OWP/inundation-mapping/pull/771) + +Added rysnc to docker images. rysnc can now be used inside the images to move data around via docker mounts. + +### Changes + +- `Dockerfile` : added rsync + +

+ +## v4.0.16.0 - 2022-12-20 - [PR #768](https://github.com/NOAA-OWP/inundation-mapping/pull/768) + +`gms_run_branch.sh` was processing all of the branches iteratively, then continuing on to a large post processing portion of code. That has now be split to two files, one for branch iteration and the other file for just post processing. + +Other minor changes include: +- Removing the system where a user could override `DropStreamOrders` where they could process streams with stream orders 1 and 2 independently like other GMS branches. This option is now removed, so it will only allow stream orders 3 and higher as gms branches and SO 1 and 2 will always be in branch zero. + +- The `retry` flag on the three gms*.sh files has been removed. It did not work correctly and was not being used. Usage of it would have created unreliable results. + +### Additions + +- `gms_run_post_processing.sh` + - handles all tasks from after `gms_run_branch.sh` to this file, except for output cleanup, which stayed in `gms_run_branch.sh`. + - Can be run completely independent from `gms_run_unit.sh` or gms_run_branch.sh` as long as all of the files are in place. And can be re-run if desired. + +### Changes + +- `gms_pipeline.sh` + - Remove "retry" system. + - Remove "dropLowStreamOrders" system. + - Updated for newer reusable output date/time/duration system. + - Add call to new `gms_run_post_processing.sh` file. + +- `gms_run_branch.sh` + - Remove "retry" system. + - Remove "dropLowStreamOrders" system. + - Updated for newer reusable output date/time/duration system. + - Removed most code from below the branch iterator to the new `gms_run_post_processing.sh` file. However, it did keep the branch files output cleanup and non-zero exit code checking. + +- `gms_run_unit.sh` + - Remove "retry" system. + - Remove "dropLowStreamOrders" system. + - Updated for newer reusable output date/time/duration system. + +- `src` + - `bash_functions.env`: Added a new method to make it easier / simpler to calculation and display duration time. + - `filter_catchments_and_add_attributes.py`: Remove "dropLowStreamOrders" system. + - `split_flows.py`: Remove "dropLowStreamOrders" system. + - `usgs_gage_unit_setup.py`: Remove "dropLowStreamOrders" system. + +- `gms` + - `delineate_hydros_and_produced_HAND.sh` : Remove "dropLowStreamOrders" system. + - `derive_level_paths.py`: Remove "dropLowStreamOrders" system and some small style updates. + - `run_by_unit.sh`: Remove "dropLowStreamOrders" system. + +- `unit_tests/gms` + - `derive_level_paths_params.json` and `derive_level_paths_unittests.py`: Remove "dropLowStreamOrders" system. + +

+ +## v4.0.15.0 - 2022-12-20 - [PR #758](https://github.com/NOAA-OWP/inundation-mapping/pull/758) + +This merge addresses feedback received from field users regarding CatFIM. Users wanted a Stage-Based version of CatFIM, they wanted maps created for multiple intervals between flood categories, and they wanted documentation as to why many sites are absent from the Stage-Based CatFIM service. This merge seeks to address this feedback. CatFIM will continue to evolve with more feedback over time. + +## Changes +- `/src/gms/usgs_gage_crosswalk.py`: Removed filtering of extra attributes when writing table +- `/src/gms/usgs_gage_unit_setup.py`: Removed filter of gages where `rating curve == yes`. The filtering happens later on now. +- `/tools/eval_plots.py`: Added a post-processing step to produce CSVs of spatial data +- `/tools/generate_categorical_fim.py`: + - New arguments to support more advanced multiprocessing, support production of Stage-Based CatFIM, specific output directory pathing, upstream and downstream distance, controls on how high past "major" magnitude to go when producing interval maps for Stage-Based, the ability to run a single AHPS site. +- `/tools/generate_categorical_fim_flows.py`: + - Allows for flows to be retrieved for only one site (useful for testing) + - More logging + - Filtering stream segments according to stream order +- `/tools/generate_categorical_fim_mapping.py`: + - Support for Stage-Based CatFIM production + - Enhanced multiprocessing + - Improved post-processing +- `/tools/pixel_counter.py`: fixed a bug where Nonetypes were being returned +- `/tools/rating_curve_get_usgs_rating_curves.py`: + - Removed filtering when producing `usgs_gages.gpkg`, but adding attribute as to whether or not it meets acceptance criteria, as defined in `gms_tools/tools_shared_variables.py`. + - Creating a lookup list to filter out unacceptable gages before they're written to `usgs_rating_curves.csv` + - The `usgs_gages.gpkg` now includes two fields indicating whether or not gages pass acceptance criteria (defined in `tools_shared_variables.py`. The fields are `acceptable_codes` and `acceptable_alt_error` +- `/tools/tools_shared_functions.py`: + - Added `get_env_paths()` function to retrieve environmental variable information used by CatFIM and rating curves scripts + - `Added `filter_nwm_segments_by_stream_order()` function that uses WRDS to filter out NWM feature_ids from a list if their stream order is different than a desired stream order. +- `/tools/tools_shared_variables.py`: Added the acceptance criteria and URLS for gages as non-constant variables. These can be modified and tracked through version changes. These variables are imported by the CatFIM and USGS rating curve and gage generation scripts. +- `/tools/test_case_by_hydroid.py`: reformatting code, recommend adding more comments/docstrings in future commit + +

+ +## v4.0.14.2 - 2022-12-22 - [PR #772](https://github.com/NOAA-OWP/inundation-mapping/pull/772) + +Added `usgs_elev_table.csv` to hydrovis whitelist files. Also updated the name to include the word "hydrovis" in them (anticipating more s3 whitelist files). + +### Changes + +- `config` + - `aws_s3_put_fim4_hydrovis_whitelist.lst`: File name updated and added usgs_elev_table.csv so it gets push up as well. + - `aws_s3_put_fim3_hydrovis_whitelist.lst`: File name updated + +- `data/aws` + - `s3.py`: added `/foss_fim/config/aws_s3_put_fim4_hydrovis_whitelist.lst` as a default to the -w param. + +

+ +## v4.0.14.1 - 2022-12-03 - [PR #753](https://github.com/NOAA-OWP/inundation-mapping/pull/753) + +Creates a polygon of 3DEP DEM domain (to eliminate errors caused by stream networks with no DEM data in areas of HUCs that are outside of the U.S. border) and uses the polygon layer to clip the WBD and stream network (to a buffer inside the WBD). + +### Additions +- `data/usgs/acquire_and_preprocess_3dep_dems.py`: Adds creation of 3DEP domain polygon by polygonizing all HUC6 3DEP DEMs and then dissolving them. +- `src/gms/run_by_unit.sh`: Adds 3DEP domain polygon .gpkg as input to `src/clip_vectors_to_wbd.py` + +### Changes +- `src/clip_vectors_to_wbd.py`: Clips WBD to 3DEP domain polygon and clips streams to a buffer inside the clipped WBD polygon. + +

+ +## v4.0.14.0 - 2022-12-20 - [PR #769](https://github.com/NOAA-OWP/inundation-mapping/pull/769) + +Masks levee-protected areas from the DEM in branch 0 and in highest two stream order branches. + +### Additions + +- `src/gms/` + - `mask_dem.py`: Masks levee-protected areas from the DEM in branch 0 and in highest two stream order branches + - `delineate_hydros_and_produce_HAND.sh`: Adds `src/gms/mask_dem.py` + +

+ +## v4.0.13.2 - 2022-12-20 - [PR #767](https://github.com/NOAA-OWP/inundation-mapping/pull/767) + +Fixes inundation of nodata areas of REM. + +### Changes + +- `tools/inundation.py`: Assigns depth a value of `0` if REM is less than `0` + +

+ +## v4.0.13.1 - 2022-12-09 - [PR #743](https://github.com/NOAA-OWP/inundation-mapping/pull/743) + +This merge adds the tools required to generate Alpha metrics by hydroid. It summarizes the Apha metrics by branch 0 catchment for use in the Hydrovis "FIM Performance" service. + +### Additions + +- `pixel_counter.py`: A script to perform zonal statistics against raster data and geometries +- `pixel_counter_functions.py`: Supporting functions +- `pixel_counter_wrapper.py`: a script that wraps `pixel_counter.py` for batch processing +- `test_case_by_hydroid.py`: the main script to orchestrate the generation of alpha metrics by catchment + +

+ +## v4.0.13.0 - 2022-11-16 - [PR #744](https://github.com/NOAA-OWP/inundation-mapping/pull/744) + +Changes branch 0 headwaters data source from NHD to NWS to be consistent with branches. Removes references to NHD flowlines and headwater data. + +### Changes + +- `src/gms/derive_level_paths.py`: Generates headwaters before stream branch filtering + +### Removals + +- Removes NHD flowlines and headwater references from `gms_run_unit.sh`, `config/deny_gms_unit_prod.lst`, `src/clip_vectors_to_wbd.py`, `src/gms/run_by_unit.sh`, `unit_tests/__template_unittests.py`, `unit_tests/clip_vectors_to_wbd_params.json`, and `unit_tests/clip_vectors_to_wbd_unittests.py` + +

+ +## V4.0.12.2 - 2022-12-04 - [PR #754](https://github.com/NOAA-OWP/inundation-mapping/pull/754) + +Stop writing `gms_inputs_removed.csv` if no branches are removed with Error status 61. + +### Changes + +- `src/gms/remove_error_branches.py`: Checks if error branches is not empty before saving gms_inputs_removed.csv + +

+ +## v4.0.12.1 - 2022-11-30 - [PR #751](https://github.com/NOAA-OWP/inundation-mapping/pull/751) + +Updating a few deny list files. + +### Changes + +- `config`: + - `deny_gms_branches_dev.lst`, `deny_gms_branches_prod.lst`, and `deny_gms_unit_prod.lst` + +

+ + +## v4.0.12.0 - 2022-11-28 - [PR #736](https://github.com/NOAA-OWP/inundation-mapping/pull/736) + +This feature branch introduces a new methodology for computing Manning's equation for the synthetic rating curves. The new subdivision approach 1) estimates bankfull stage by crosswalking "bankfull" proxy discharge data to the raw SRC discharge values 2) identifies in-channel vs. overbank geometry values 3) applies unique in-channel and overbank Manning's n value (user provided values) to compute Manning's equation separately for channel and overbank discharge and adds the two components together for total discharge 4) computes a calibration coefficient (where benchmark data exists) that applies to the calibrated total discharge calculation. + +### Additions + +- `src/subdiv_chan_obank_src.py`: new script that performs all subdiv calculations and then produce a new (modified) `hydroTable.csv`. Inputs include `src_full_crosswalked.csv` for each huc/branch and a Manning's roughness csv file (containing: featureid, channel n, overbank n; file located in the `/inputs/rating_curve/variable_roughness/`). Note that the `identify_src_bankfull.py` script must be run prior to running the subdiv workflow. + +### Changes + +- `config/params_template.env`: removed BARC and composite roughness parameters; added new subdivision parameters; default Manning's n file set to `mannings_global_06_12.csv` +- `gms_run_branch.sh`: moved the PostgreSQL database steps to occur immediately before the SRC calibration steps; added new subdivision step; added condition to SRC calibration to ensure subdivision routine is run +- `src/add_crosswalk.py`: removed BARC function call; update placeholder value list (removed BARC and composite roughness variables) - these placeholder variables ensure that all hydrotables have the same dimensions +- `src/identify_src_bankfull.py`: revised FIM3 starting code to work with FIM4 framework; stripped out unnecessary calculations; restricted bankfull identification to stage values > 0 +- `src/src_adjust_spatial_obs.py`: added huc sort function to help user track progress from console outputs +- `src/src_adjust_usgs_rating.py`: added huc sort function to help user track progress from console outputs +- `src/src_roughness_optimization.py`: reconfigured code to compute a calibration coefficient and apply adjustments using the subdivision variables; renamed numerous variables; simplified code where possible +- `src/utils/shared_variables.py`: increased `ROUGHNESS_MAX_THRESH` from 0.6 to 0.8 +- `tools/vary_mannings_n_composite.py`: *moved this script from /src to /tools*; updated this code from FIM3 to work with FIM4 structure; however, it is not currently implemented (the subdivision routine replaces this) +- `tools/aggregate_csv_files.py`: helper tool to search for csv files by name/wildcard and concatenate all found files into one csv (used for aggregating previous calibrated roughness values) +- `tools/eval_plots.py`: updated list of metrics to plot to also include equitable threat score and mathews correlation coefficient (MCC) +- `tools/synthesize_test_cases.py`: updated the list of FIM version metrics that the `PREV` flag will use to create the final aggregated metrics csv; this change will combine the dev versions provided with the `-dc` flag along with the existing `previous_fim_list` + +

+ +## v4.0.11.5 - 2022-11-18 - [PR #746](https://github.com/NOAA-OWP/inundation-mapping/pull/746) + +Skips `src/usgs_gage_unit_setup.py` if no level paths exist. This may happen if a HUC has no stream orders > 2. This is a bug fix for #723 for the case that the HUC also has USGS gages. + +### Changes + +- `src/gms/run_by_unit.sh`: Adds check for `nwm_subset_streams_levelPaths.gpkg` before running `usgs_gage_unit_setup.py` + +

+ +## v4.0.11.4 - 2022-10-12 - [PR #709](https://github.com/NOAA-OWP/inundation-mapping/pull/709) + +Adds capability to produce single rating curve comparison plots for each gage. + +### Changes + +- `tools/rating_curve_comparison.py` + - Adds generate_single_plot() to make a single rating curve comparison plot for each gage in a given HUC + - Adds command line switch to generate single plots + +

+ +## v4.0.11.3 - 2022-11-10 - [PR #739](https://github.com/NOAA-OWP/inundation-mapping/pull/739) + +New tool with instructions of downloading levee protected areas and a tool to pre-process it, ready for FIM. + +### Additions + +- `data` + - `nld` + - `preprocess_levee_protected_areas.py`: as described above + +### Changes + +- `data` + - `preprocess_rasters.py`: added deprecation note. It will eventually be replaced in it's entirety. +- `src` + - `utils` + - `shared_functions.py`: a few styling adjustments. + +

+ +## v4.0.11.2 - 2022-11-07 - [PR #737](https://github.com/NOAA-OWP/inundation-mapping/pull/737) + +Add an extra input args to the gms_**.sh files to allow for an override of the branch zero deny list, same as we can do with the unit and branch deny list overrides. This is needed for debugging purposes. + +Also, if there is no override for the deny branch zero list and is not using the word "none", then use the default or overridden standard branch deny list. This will keep the branch zero's and branch output folders similar but not identical for outputs. + +### Changes + +- `gms_pipeline.sh`: Add new param to allow for branch zero deny list override. Plus added better logic for catching bad deny lists earlier. +- `gms_run_branch.sh`: Add new param to allow for branch zero deny list override. Add logic to cleanup all branch zero output folders with the default branch deny list (not the branch zero list), UNLESS an override exists for the branch zero deny list. +- `gms_run_unit.sh`: Add new param to allow for branch zero deny list override. +- `config` + - `deny_gms_branch_zero.lst`: update to keep an additional file in the outputs. +- `src` + - `output_cleanup.py`: added note saying it is deprecated. + - `gms` + - `run_by_branch.sh`: variable name change (matching new names in related files for deny lists) + - `run_by_unit.sh`: Add new param to allow for branch zero deny list override. + +

+ +## v4.0.11.1 - 2022-11-01 - [PR #732](https://github.com/NOAA-OWP/inundation-mapping/pull/732) + +Due to a recent IT security scan, it was determined that Jupyter-core needed to be upgraded. + +### Changes + +- `Pipfile` and `Pipfile.lock`: Added a specific version of Jupyter Core that is compliant with IT. + +

+ +## v4.0.11.0 - 2022-09-21 - [PR #690](https://github.com/NOAA-OWP/inundation-mapping/pull/690) + +Masks levee-protected areas from Relative Elevation Model if branch 0 or if branch stream order exceeds a threshold. + +### Additions + +- `src/gms/` + - `delineate_hydros_and_produce_HAND.sh` + - Reprojects and creates HUC-level raster of levee-protected areas from polygon layer + - Uses that raster to mask/remove those areas from the Relative Elevation Model + - `rasterize_by_order.py`: Subsets levee-protected area branch-level raster if branch 0 or if order exceeds a threshold (default threshold: max order - 1) +- `config/` + - `deny_gms_branches_default.lst`, and `deny_gms_branches_min.lst`: Added LeveeProtectedAreas_subset_{}.tif + - `params_template.env`: Adds mask_leveed_area_toggle + +### Changes + +- `src/gms/delineate_hydros_and_produce_HAND.sh`: Fixes a bug in ocean/Great Lakes masking +- `tools/` + - `eval_alt_catfim.py` and `run_test_case.py`: Changes the levee mask to the updated inputs/nld_vectors/Levee_protected_areas.gpkg + +

+ +## v4.0.10.5 - 2022-10-21 - [PR #720](https://github.com/NOAA-OWP/inundation-mapping/pull/720) + +Earlier versions of the acquire_and_preprocess_3dep_dems.py did not have any buffer added when downloading HUC6 DEMs. This resulted in 1 pixel nodata gaps in the final REM outputs in some cases at HUC8 sharing a HUC6 border. Adding the param of cblend 6 to the gdalwarp command meant put a 6 extra pixels all around perimeter. Testing showed that 6 pixels was plenty sufficient as the gaps were never more than 1 pixel on borders of no-data. + +### Changes + +- `data` + - `usgs` + - `acquire_and_preprocess_3dep_dems.py`: Added the `cblend 6` param to the gdalwarp call for when the dem is downloaded from USGS. + - `create_vrt_file.py`: Added sample usage comment. + - `src` + - `gms` + `run_by_unit.sh`: Added a comment about gdal as it relates to run_by_unit. + +Note: the new replacement inputs/3dep_dems/10m_5070/ files can / will be copied before PR approval as the true fix was replacment DEM's. There is zero risk of overwriting prior to code merge. + +

+ +## v4.0.10.4 - 2022-10-27 - [PR #727](https://github.com/NOAA-OWP/inundation-mapping/pull/727) + +Creates a single crosswalk table containing HUC (huc8), BranchID, HydroID, feature_id (and optionally LakeID) from branch-level hydroTables.csv files. + +### Additions + +- `tools/gms_tools/combine_crosswalk_tables.py`: reads and concatenates hydroTable.csv files, writes crosswalk table +- `gms_run_branch.sh`: Adds `tools/gms_tools/make_complete_hydrotable.py` to post-processing + +

+ +## v4.0.10.3 - 2022-10-19 - [PR #718](https://github.com/NOAA-OWP/inundation-mapping/pull/718) + +Fixes thalweg notch by clipping upstream ends of the stream segments to prevent the stream network from reaching the edge of the DEM and being treated as outlets when pit filling the burned DEM. + +### Changes + +- `src/clip_vectors_to_wbd.py`: Uses a slightly smaller buffer than wbd_buffer (wbd_buffer_distance-2*(DEM cell size)) to clip stream network inside of DEM extent. + +

+ +## v4.0.10.2 - 2022-10-24 - [PR #723](https://github.com/NOAA-OWP/inundation-mapping/pull/723) + +Runs branch 0 on HUCs with no other branches remaining after filtering stream orders if `drop_low_stream_orders` is used. + +### Additions + +- `src/gms` + - `stream_branches.py`: adds `exclude_attribute_values()` to filter out stream orders 1&2 outside of `load_file()` + +### Changes + +- `src/gms` + - `buffer_stream_branches.py`: adds check for `streams_file` + - `derive_level_paths.py`: checks length of `stream_network` before filtering out stream orders 1&2, then filters using `stream_network.exclude_attribute_values()` + - `generate_branch_list.py`: adds check for `stream_network_dissolved` + +

+ +## v4.0.10.1 - 2022-10-5 - [PR #695](https://github.com/NOAA-OWP/inundation-mapping/pull/695) + +This hotfix address a bug with how the rating curve comparison (sierra test) handles the branch zero synthetic rating curve in the comparison plots. Address #676 + +### Changes + +- `tools/rating_curve_comparison.py` + - Added logging function to print and write to log file + - Added new filters to ignore AHPS only sites (these are sites that we need for CatFIM but do not have a USGS gage or USGS rating curve available for sierra test analysis) + - Added functionality to identify branch zero SRCs + - Added new plot formatting to distinguish branch zero from other branches + +

+ +## v4.0.10.0 - 2022-10-4 - [PR #697](https://github.com/NOAA-OWP/inundation-mapping/pull/697) + +Change FIM to load DEM's from the new USGS 3Dep files instead of the original NHD Rasters. + +### Changes + +- `config` + - `params_template.env`: Change default of the calib db back to true: src_adjust_spatial back to "True". Plus a few text updates. +- `src` + - `gms` + - `run_by_unit.sh`: Change input_DEM value to the new vrt `$inputDataDir/3dep_dems/10m_5070/fim_seamless_3dep_dem_10m_5070.vrt` to load the new 3Dep DEM's. Note: The 3Dep DEM's are projected as CRS 5070, but for now, our code is using ESRI:102039. Later all code and input will be changed to CRS:5070. We now are defining the FIM desired projection (102039), so we need to reproject on the fly from 5070 to 102039 during the gdalwarp cut. + - `run_by_branch.sh`: Removed unused lines. + - `utils` + - `shared_variables.py`: Changes to use the new 3Dep DEM rasters instead of the NHD rasters. Moved some values (grouped some variables). Added some new variables for 3Dep. Note: At this time, some of these new enviro variables for 3Dep are not used but are expected to be used shortly. +- `data` + - `usgs` + - `acquire_and_preprocess_3dep_dems.py`: Minor updates for adjustments of environmental variables. Adjustments to ensure the cell sizes are fully defined as 10 x 10 as source has a different resolution. The data we downloaded to the new `inputs/3dep_dems/10m_5070` was loaded as 10x10, CRS:5070 rasters. + +### Removals + +- `lib` + - `aggregate_fim_outputs.py` : obsolete. Had been deprecated for a while and replaced by other files. + - `fr_to_mr_raster_mask.py` : obsolete. Had been deprecated for a while and replaced by other files. + +

+ +## v4.0.9.8 - 2022-10-06 - [PR #701](https://github.com/NOAA-OWP/inundation-mapping/pull/701) + +Moved the calibration tool from dev-fim3 branch into "dev" (fim4) branch. Git history not available. + +Also updated making it easier to deploy, along with better information for external contributors. + +Changed the system so the calibration database name is configurable. This allows test databases to be setup in the same postgres db / server system. You can have more than one calb_db_keys.env running in different computers (or even more than one on one server) pointing to the same actual postgres server and service. ie) multiple dev machine can call a single production server which hosts the database. + +For more details see /tools/calibration-db/README.md + +### Changes + +- `tools` + - `calibration-db` + - `docker-compose.yml`: changed to allow for configurable database name. (allows for more then one database in a postgres database system (one for prod, another for test if needed)) + +### Additions + +- `config` + - `calb_db_keys_template.env`: a new template verison of the required config values. + +### Removals + +- `tools` + - `calibration-db` + - `start_db.sh`: Removed as the command should be run on demand and not specifically scripted because of its configurable location of the env file. + +

+ +## v4.0.9.7 - 2022-10-7 - [PR #703](https://github.com/NOAA-OWP/inundation-mapping/pull/703) + +During a recent release of a FIM 3 version, it was discovered that FIM3 has slightly different AWS S3 upload requirements. A new s3 whitelist file has been created for FIM3 and the other s3 file was renamed to include the phrase "fim4" in it. + +This is being added to source control as it might be used again and we don't want to loose it. + +### Additions + +- `config` + - `aws_s3_put_fim3_whitelist.lst` + +### Renamed + +- `config` + - `aws_s3_put_fim4_whitelist.lst`: renamed from aws_s3_put_whitelist.lst + +

+ +## v4.0.9.6 - 2022-10-17 - [PR #711](https://github.com/NOAA-OWP/inundation-mapping/pull/711) + +Bug fix and formatting upgrades. It was also upgraded to allow for misc other inundation data such as high water data. + +### Changes + +- `tools` + - `inundate_nation.py`: As stated above. + +### Testing + +- it was run in a production model against fim 4.0.9.2 at 100 yr and 2 yr as well as a new High Water dataset. + +

+ +## v4.0.9.5 - 2022-10-3 - [PR #696](https://github.com/NOAA-OWP/inundation-mapping/pull/696) + +- Fixed deny_gms_unit_prod.lst to comment LandSea_subset.gpkg, so it does not get removed. It is needed for processing in some branches +- Change default for params_template.env -> src_adjust_spatial="False", back to default of "True" +- Fixed an infinite loop when src_adjust_usgs_rating.py was unable to talk to the calib db. +- Fixed src_adjsust_usgs_rating.py for when the usgs_elev_table.csv may not exist. + +### Changes + +- `gms_run_branch.sh`: removed some "time" command in favour of using fim commands from bash_functions.sh which give better time and output messages. + +- `config` + - `deny_gms_unit_prod.lst`: Commented out LandSea_subset.gpkg as some HUCs need that file in place. + - `params_template.env`: Changed default src_adjust_spatial back to True + +- `src` + - `src_adjust_spatial_obs.py`: Added code to a while loop (line 298) so it is not an indefinite loop that never stops running. It will now attempts to contact the calibration db after 6 attempts. Small adjustments to output and logging were also made and validation that a connection to the calib db was actually successful. + - `src_adjust_usgs_rating.py`: Discovered that a usgs_elev_df might not exist (particularly when processing was being done for hucs that have no usgs guage data). If the usgs_elev_df does not exist, it no longer errors out. + +

+ +## v4.0.9.4 - 2022-09-30 - [PR #691](https://github.com/NOAA-OWP/inundation-mapping/pull/691) + +Cleanup Branch Zero output at the end of a processing run. Without this fix, some very large files were being left on the file system. Adjustments and cleanup changed the full BED output run from appx 2 TB output to appx 1 TB output. + +### Additions + +- `unit_tests` + - `gms` + - `outputs_cleanup_params.json` and `outputs_cleanup_unittests.py`: The usual unit test files. + +### Changes + +- `gms_pipeline.sh`: changed variables and text to reflect the renamed default `deny_gms_branchs_prod.lst` and `deny_gms_unit_prod.lst` files. Also tells how a user can use the word 'none' for the deny list parameter (both or either unit or branch deny list) to skip output cleanup(s). + +- `gms_run_unit.sh`: changed variables and text to reflect the renamed default `deny_gms_unit_prod.lst` files. Also added a bit of minor output text (styling). Also tells how a user can use the word 'none' for the deny list parameter to skip output cleanup. + +- `gms_run_branch.sh`: + ... changed variables and text to reflect the renamed default `deny_gms_branches.lst` files. + ... added a bit of minor output text (styling). + ... also tells how a user can use the word 'none' for the deny list parameter to skip output cleanup. + ... added a new section that calls the `outputs_cleanup.py` file and will do post cleanup on branch zero output files. + +- `src` + - `gms` + - `outputs_cleanup.py`: pretty much rewrote it in its entirety. Now accepts a manditory branch id (can be zero) and can recursively search subdirectories. ie) We can submit a whole output directory with all hucs and ask to cleanup branch 0 folder OR cleanup files in any particular directory as we did before (per branch id). + + - `run_by_unit.sh`: updated to pass in a branch id (or the value of "0" meaning branch zero) to outputs_cleanup.py. + - `run_by_branch.sh`: updated to pass in a branch id to outputs_cleanup.py. + +- `unit_tests` + - `README.md`: updated to talk about the specific deny list for unit_testing. + - `__template_unittests.py`: updated for the latest code standards for unit tests. + +- `config` + - `deny_gms_branch_unittest.lst`: Added some new files to be deleted, updated others. + - `deny_gms_branch_zero.lst`: Added some new files to be deleted. + - `deny_gms_branches_dev.lst`: Renamed from `deny_gms_branches_default.lst` and some new files to be deleted, updated others. Now used primarily for development and testing use. + - `deny_gms_branches_prod.lst`: Renamed from `deny_gms_branches_min` and some new files to be deleted, updated others. Now used primarily for when releasing a version to production. + - `deny_gms_unit_prod.lst`: Renamed from `deny_gms_unit_default.lst`, yes... there currently is no "dev" version. Added some new files to be deleted. + +

+ +## v4.0.9.3 - 2022-09-13 - [PR #681](https://github.com/NOAA-OWP/inundation-mapping/pull/681) + +Created a new tool to downloaded USGS 3Dep DEM's via their S3 bucket. + +Other changes: + - Some code file re-organization in favour of the new `data` folder which is designed for getting, setting, and processing data from external sources such as AWS, WBD, NHD, NWM, etc. + - Added tmux as a new tool embedded inside the docker images. + +### Additions + +- `data` + - `usgs` + - `acquire_and_preprocess_3dep_dems.py`: The new tool as described above. For now it is hardcoded to a set path for USGS AWS S3 vrt file but may change later for it to become parameter driven. + - `create_vrt_file.py`: This is also a new tool that can take a directory of geotiff files and create a gdal virtual file, .vrt extention, also called a `virtual raster`. Instead of clipping against HUC4, 6, 8's raster files, and run risks of boundary issues, vrt's actual like all of the tif's are one giant mosaiced raster and can be clipped as one. + +### Removals + +- 'Dockerfile.prod`: No longer being used (never was used) + +### Changes + +- `Dockerfile`: Added apt install for tmux. This tool will now be available in docker images and assists developers. + +- `data` + - `acquire_and_preprocess_inputs.py`: moved from the `tools` directory but not other changes made. Note: will required review/adjustments before being used again. + - `nws` + - `preprocess_ahps_nws.py`: moved from the `tools` directory but not other changes made. Note: will required review/adjustments before being used again. + - `preprocess_rasters.py`: moved from the `tools` directory but not other changes made. Note: will required review/adjustments before being used again. + - `usgs` + - `preprocess_ahps_usgs.py`: moved from the `tools` directory but not other changes made. Note: will required review/adjustments before being used again. + - `preprocess_download_usgs_grids.py`: moved from the `tools` directory but not other changes made. Note: will required review/adjustments before being used again. + + - `src` + - `utils` + - `shared_functions.py`: changes made were + - Cleanup the "imports" section of the file (including a change to how the utils.shared_variables file is loaded. + - Added `progress_bar_handler` function which can be re-used by other code files. + - Added `get_file_names` which can create a list of files from a given directory matching a given extension. + - Modified `print_current_date_time` and `print_date_time_duration` and methods to return the date time strings. These helper methods exist to help with standardization of logging and output console messages. + - Added `print_start_header` and `print_end_header` to help with standardization of console and logging output messages. + - `shared_variables.py`: Additions in support of near future functionality of having fim load DEM's from USGS 3DEP instead of NHD rasters. + +

+ +## v4.0.9.2 - 2022-09-12 - [PR #678](https://github.com/NOAA-OWP/inundation-mapping/pull/678) + +This fixes several bugs related to branch definition and trimming due to waterbodies. + +### Changes + +- `src/gms/stream_branches.py` + - Bypasses erroneous stream network data in the to ID field by using the Node attribute instead. + - Adds check if no nwm_lakes_proj_subset.gpkg file is found due to no waterbodies in the HUC. + - Allows for multiple upstream branches when stream order overrides arbolate sum. + +

+ +## v4.0.9.1 - 2022-09-01 - [PR #664](https://github.com/NOAA-OWP/inundation-mapping/pull/664) + +A couple of changes: +1) Addition of a new tool for pushing files / folders up to an AWS (Amazon Web Service) S3 bucket. +2) Updates to the Docker image creation files to include new packages for boto3 (for AWS) and also added `jupyter`, `jupterlab` and `ipympl` to make it easier to use those tools during development. +3) Correct an oversight of `logs\src_optimization` not being cleared upon `overwrite` run. + +### Additions + +- `src` + - `data` + - `README.md`: Details on how the new system for `data` folders (for communication for external data sources/services). + - `aws` + - `aws_base.py`: A file using a class and inheritance system (parent / child). This file has properties and a method that all child class will be expected to use and share. This makes it quicker and easier to added new AWS tools and helps keep consistant patterns and standards. + - `aws_creds_template.env`: There are a number of ways to validate credentials to send data up to S3. We have chosen to use an `.env` file that can be passed into the tool from any location. This is the template for that `.env` file. Later versions may be changed to use AWS profile security system. + - `s3.py`: This file pushes file and folders up to a defined S3 bucket and root folder. Note: while it is designed only for `puts` (pushing to S3), hooks were added in case functional is added later for `gets` (pull from S3). + + +### Changes + +- `utils` + - `shared_functions.py`: A couple of new features + - Added a method which accepts a path to a .lst or .txt file with a collection of data and load it into a python list object. It can be used for a list of HUCS, file paths, or almost anything. + - A new method for quick addition of current date/time in output. + - A new method for quick calculation and formatting of time duration in hours, min and seconds. + - A new method for search for a string in a given python list. It was designed with the following in mind, we already have a python list loaded with whitelist of files to be included in an S3 push. As we iterate through files from the file system, we can use this tool to see if the file should be pushed to S3. This tool can easily be used contexts and there is similar functionality in other FIM4 code that might be able to this method. + +- `Dockerfile` : Removed a line for reloading Shapely in recent PRs, which for some reason is no longer needed after adding the new BOTO3 python package. Must be related to python packages dependencies. This removed Shapely warning seen as a result of another recent PR. Also added AWS CLI for bash commands. + +- `Pipfile` and `Pipfile.lock`: Updates for the four new python packages, `boto3` (for AWS), `jupyter`, `jupyterlab` and `ipympl`. We have some staff that use Jupyter in their dev actitivies. Adding this package into the base Docker image will make it easier for them. + +

+ +## 4.0.9.0 - 2022-09-09 - [PR #672](https://github.com/NOAA-OWP/inundation-mapping/pull/672) + +When deriving level paths, this improvement allows stream order to override arbolate sum when selecting the proper upstream segment to continue the current branch. + +

+ +## 4.0.8.0 - 2022-08-26 - [PR #671](https://github.com/NOAA-OWP/inundation-mapping/pull/671) + +Trims ends of branches that are in waterbodies; also removes branches if they are entirely in a waterbody. + +## Changes + +- `src/gms/stream_branches.py`: adds `trim_branches_in_waterbodies()` and `remove_branches_in_waterbodies()` to trim and prune branches in waterbodies. + +

+ +## v4.0.7.2 - 2022-08-11 - [PR #654](https://github.com/NOAA-OWP/inundation-mapping/pull/654) + +`inundate_nation.py` A change to switch the inundate nation function away from refrences to `inundate.py`, and rather use `inundate_gms.py` and `mosaic_inundation.py` + +### Changes + +- `inundate_gms`: Changed `mask_type = 'filter'` + +

+ +## v4.0.7.1 - 2022-08-22 - [PR #665](https://github.com/NOAA-OWP/inundation-mapping/pull/665) + +Hotfix for addressing missing input variable when running `gms_run_branch.sh` outside of `gms_pipeline.sh`. + +### Changes +- `gms_run_branch.sh`: defining path to WBD HUC input file directly in ogr2ogr call rather than using the $input_WBD_gdb defined in `gms_run_unit.sh` +- `src/src_adjust_spatial_obs.py`: removed an extra print statement +- `src/src_roughness_optimization.py`: removed a log file write that contained sensitive host name + +

+ +## v4.0.7.0 - 2022-08-17 - [PR #657](https://github.com/NOAA-OWP/inundation-mapping/pull/657) + +Introduces synthetic rating curve calibration workflow. The calibration computes new Manning's coefficients for the HAND SRCs using input data: USGS gage locations, USGS rating curve csv, and a benchmark FIM extent point database stored in PostgreSQL database. This addresses [#535]. + +### Additions + +- `src/src_adjust_spatial_obs.py`: new synthetic rating curve calibration routine that prepares all of the spatial (point data) benchmark data for ingest to the Manning's coefficient calculations performed in `src_roughness_optimization.py` +- `src/src_adjust_usgs_rating.py`: new synthetic rating curve calibration routine that prepares all of the USGS gage location and observed rating curve data for ingest to the Manning's coefficient calculations performed in `src_roughness_optimization.py` +- `src/src_roughness_optimization.py`: new SRC post-processing script that ingests observed data and HUC/branch FIM output data to compute optimized Manning's coefficient values and update the discharge values in the SRCs. Outputs a new hydroTable.csv. + +### Changes + +- `config/deny_gms_branch_zero.lst`: added `gw_catchments_reaches_filtered_addedAttributes_crosswalked_{}.gpkg` to list of files to keep (used in calibration workflow) +- `config/deny_gms_branches_min.lst`: added `gw_catchments_reaches_filtered_addedAttributes_crosswalked_{}.gpkg` to list of files to keep (used in calibration workflow) +- `config/deny_gms_unit_default.lst`: added `usgs_elev_table.csv` to list of files to keep (used in calibration workflow) +- `config/params_template.env`: added new variables for user to control calibration + - `src_adjust_usgs`: Toggle to run src adjustment routine (True=on; False=off) + - `nwm_recur_file`: input file location with nwm feature_id and recurrence flow values + - `src_adjust_spatial`: Toggle to run src adjustment routine (True=on; False=off) + - `fim_obs_pnt_data`: input file location with benchmark point data used to populate the postgresql database + - `CALB_DB_KEYS_FILE`: path to env file with sensitive paths for accessing postgres database +- `gms_run_branch.sh`: includes new steps in the workflow to connect to the calibration PostgreSQL database, run SRC calibration w/ USGS gage rating curves, run SRC calibration w/ benchmark point database +- `src/add_crosswalk.py`: added step to create placeholder variables to be replaced in post-processing (as needed). Created here to ensure consistent column variables in the final hydrotable.csv +- `src/gms/run_by_unit.sh`: added new steps to workflow to create the `usgs_subset_gages.gpkg` file for branch zero and then perform crosswalk and create `usgs_elev_table.csv` for branch zero +- `src/make_stages_and_catchlist.py`: Reconcile flows and catchments hydroids +- `src/usgs_gage_aggregate.py`: changed streamorder data type from integer to string to better handle missing values in `usgs_gage_unit_setup.py` +- `src/usgs_gage_unit_setup.py`: added new inputs and function to populate `usgs_elev_table.csv` for branch zero using all available gages within the huc (not filtering to a specific branch) +- `src/utils/shared_functions.py`: added two new functions for calibration workflow + - `check_file_age`: check the age of a file (use for flagging potentially outdated input) + - `concat_huc_csv`: concatenate huc csv files to a single dataframe/csv +- `src/utils/shared_variables.py`: defined new SRC calibration threshold variables + - `DOWNSTREAM_THRESHOLD`: distance in km to propogate new roughness values downstream + - `ROUGHNESS_MAX_THRESH`: max allowable adjusted roughness value (void values larger than this) + - `ROUGHNESS_MIN_THRESH`: min allowable adjusted roughness value (void values smaller than this) + +

+ +## v4.0.6.3 - 2022-08-04 - [PR #652](https://github.com/NOAA-OWP/inundation-mapping/pull/652) + +Updated `Dockerfile`, `Pipfile` and `Pipfile.lock` to add the new psycopg2 python package required for a WIP code fix for the new FIM4 calibration db. + +

+ +## v4.0.6.2 - 2022-08-16 - [PR #639](https://github.com/NOAA-OWP/inundation-mapping/pull/639) + +This file converts USFIMR remote sensed inundation shapefiles into a raster that can be used to compare to the FIM data. It has to be run separately for each shapefile. This addresses [#629]. + +### Additions + +- `/tools/fimr_to_benchmark.py`: This file converts USFIMR remote sensed inundation shapefiles into a raster that can be used to compare to the FIM data. It has to be run separately for each shapefile. + +

+ +## v4.0.6.1 - 2022-08-12 - [PR #655](https://github.com/NOAA-OWP/inundation-mapping/pull/655) + +Prunes branches that fail with NO_FLOWLINES_EXIST (Exit code: 61) in `gms_run_branch.sh` after running `split_flows.py` + +### Additions +- Adds `remove_error_branches.py` (called from `gms_run_branch.sh`) +- Adds `gms_inputs_removed.csv` to log branches that have been removed across all HUCs + +### Removals +- Deletes branch folders that fail +- Deletes branch from `gms_inputs.csv` + +

+ +## v4.0.6.0 - 2022-08-10 - [PR #614](https://github.com/NOAA-OWP/inundation-mapping/pull/614) + +Addressing #560, this fix in run_by_branch trims the DEM derived streamline if it extends past the end of the branch streamline. It does this by finding the terminal point of the branch stream, snapping to the nearest point on the DEM derived stream, and cutting off the remaining downstream portion of the DEM derived stream. + +### Changes + +- `/src/split_flows.py`: Trims the DEM derived streamline if it flows past the terminus of the branch (or level path) streamline. +- `/src/gms/delineate_hydros_and_produce_HAND.sh`: Added branch streamlines as an input to `split_flows.py`. + +

+ +## v4.0.5.4 - 2022-08-01 - [PR #642](https://github.com/NOAA-OWP/inundation-mapping/pull/642) + +Fixes bug that causes [Errno2] No such file or directory error when running synthesize_test_cases.py if testing_versions folder doesn't exist (for example, after downloading test_cases from ESIP S3). + +### Additions + +- `run_test_case.py`: Checks for testing_versions folder in test_cases and adds it if it doesn't exist. + +

+ +## v4.0.5.3 - 2022-07-27 - [PR #630](https://github.com/NOAA-OWP/inundation-mapping/issues/630) + +A file called gms_pipeline.sh already existed but was unusable. This has been updated and now can be used as a "one-command" execution of the fim4/gms run. While you still can run gms_run_unit.sh and gms_run_branch.sh as you did before, you no longer need to. Input arguments were simplified to allow for more default and this simplification was added to `gms_run_unit.sh` and `gms_run_branch.sh` as well. + +A new feature was added that is being used for `gms_pipeline.sh` which tests the percent and number of errors after hucs are processed before continuing onto branch processing. + +New FIM4/gms usability is now just (at a minumum): `gms_pipeline.sh -n -u ` + +`gms_run_branch.sh` and `gms_run_branch.sh` have also been changed to add the new -a flag and default to dropping stream orders 1 and 2. + +### Additions + +- `src` + - `check_unit_errors.py`: as described above. +- `unit_tests` + - `check_unit_errors_unittests.py` and `check_unit_errors_params.json`: to match new file. + +### Changes + +- `README.md`: Updated text for FIM4, gms_pipeline, S3 input updates, information about updating dependencies, misc link updates and misc text verbage. +- `gms_pipeline.sh`: as described above. +- `gms_run_unit.sh`: as described above. Also small updates to clean up folders and files in case of an overwrite. +- `gms_run_branch.sh`: as described above. +- `src` + - `utils` + - `fim_enums.py`: FIM_system_exit_codes renamed to FIM_exit_codes. + - `shared_variables.py`: added configurable values for minimum number and percentage of unit errors. + - `bash_functions.env`: Update to make the cumulative time screen outputs in mins/secs instead of just seconds. + - `check_huc_inputs.py`: Now returns the number of HUCs being processed, needed by `gms_pipeline.sh` (Note: to get the value back to a bash file, it has to send it back via a "print" line and not a "return" value. Improved input validation, +- `unit_tests` + - `README.md`: Misc text and link updates. + +### Removals + +- `config\params_template_calibrated.env`: No longer needed. Has been removed already from dev-fim3 and confirmed that it is not needed. +

+ +## v4.0.5.2 - 2022-07-25 - [PR #622](https://github.com/NOAA-OWP/inundation-mapping/pull/622) + +Updates to unit tests including a minor update for outputs and loading in .json parameter files. +

+ + +## v4.0.5.1 - 2022-06-27 - [PR #612](https://github.com/NOAA-OWP/inundation-mapping/pull/612) + +`Alpha Test Refactor` An upgrade was made a few weeks back to the dev-fim3 branch that improved performance, usability and readability of running alpha tests. Some cleanup in other files for readability, debugging verbosity and styling were done as well. A newer, cleaner system for printing lines when the verbose flag is enabled was added. + +### Changes + +- `gms_run_branch.sh`: Updated help instructions to about using multiple HUCs as command arguments. +- `gms_run_unit.sh`: Updated help instructions to about using multiple HUCs as command arguments. +- `src/utils` + - `shared_functions.py`: + - Added a new function called `vprint` which creates a simpler way (and better readability) for other python files when wanting to include a print line when the verbose flag is on. + - Added a new class named `FIM_Helpers` as a wrapper for the new `vprint` method. + - With the new `FIM_Helpers` class, a previously existing method named `append_id_to_file_name` was moved into this class making it easier and quicker for usage in other classes. + +- `tools` + - `composite_inundation.py`: Updated its usage of the `append_id_to_file_name` function to now call the`FIM_Helpers` method version of it. + - `gms_tools` + - `inundate_gms.py`: Updated for its adjusted usage of the `append_id_to_file_name` method, also removed its own `def __vprint` function in favour of the `FIM_Helpers.vprint` method. + - `mosaic_inundation.py`: + - Added adjustments for use of `append_id_to_file_name` and adjustments for `fh.vprint`. + - Fixed a bug for the variable `ag_mosaic_output` which was not pre-declared and would fail as using an undefined variable in certain conditions. + - `run_test_case.py`: Ported `test_case` class from FIM 3 and tweaked slightly to allow for GMS FIM. Also added more prints against the new fh.vprint method. Also added a default print line for progress / traceability for all alpha test regardless if the verbose flag is set. + - `synthesize_test_cases.py`: Ported `test_case` class from FIM 3. +- `unit_tests` + - `shared_functions_unittests.py`: Update to match moving the `append_id_to_file_name` into the `FIM_Helpers` class. Also removed all "header print lines" for each unit test method (for output readability). + +

+ +## v4.0.5.0 - 2022-06-16 - [PR #611](https://github.com/NOAA-OWP/inundation-mapping/pull/611) + +'Branch zero' is a new branch that runs the HUCs full stream network to make up for stream orders 1 & 2 being skipped by the GMS solution and is similar to the FR extent in FIM v3. This new branch is created during `run_by_unit.sh` and the processed DEM is used by the other GMS branches during `run_by_branch.sh` to improve efficiency. + +### Additions + +- `src/gms/delineate_hydros_and_produce_HAND.sh`: Runs all of the modules associated with delineating stream lines and catchments and building the HAND relative elevation model. This file is called once during `gms_run_unit` to produce the branch zero files and is also run for every GMS branch in `gms_run_branch`. +- `config/deny_gms_branch_zero.lst`: A list specifically for branch zero that helps with cleanup (removing unneeded files after processing). + +### Changes + +- `src/` + - `output_cleanup.py`: Fixed bug for viz flag. + - `gms/` + - `run_by_unit.sh`: Added creation of "branch zero", DEM pre-processing, and now calls. + - `delineate_hydros_and_produce_HAND.sh` to produce HAND outputs for the entire stream network. + - `run_by_branch.sh`: Removed DEM processing steps (now done in `run_by_unit.sh`), moved stream network delineation and HAND generation to `delineate_hydros_and_produce_HAND.sh`. + - `generate_branch_list.py`: Added argument and parameter to sure that the branch zero entry was added to the branch list. +- `config/` + - `params_template.env`: Added `zero_branch_id` variable. +- `tools` + - `run_test_case.py`: Some styling / readability upgrades plus some enhanced outputs. Also changed the _verbose_ flag to _gms_verbose_ being passed into Mosaic_inundation function. + - `synthesize_test_cases.py`: arguments being passed into the _alpha_test_args_ from being hardcoded from flags to verbose (effectively turning on verbose outputs when applicable. Note: Progress bar was not affected. + - `tools_shared_functions.py`: Some styling / readability upgrades. +- `gms_run_unit.sh`: Added export of extent variable, dropped the -s flag and added the -a flag so it now defaults to dropping stream orders 1 and 2. +- `gms_run_branch.sh`: Fixed bug when using overwrite flag saying branch errors folder already exists, dropped the -s flag and added the -a flag so it now defaults to dropping stream orders 1 and 2. + +### Removals + +- `tests/`: Redundant +- `tools/shared_variables`: Redundant + +

+ +## v4.0.4.3 - 2022-05-26 - [PR #605](https://github.com/NOAA-OWP/inundation-mapping/pull/605) + +We needed a tool that could composite / mosaic inundation maps for FIM3 FR and FIM4 / GMS with stream orders 3 and higher. A tool previously existed named composite_fr_ms_inundation.py and it was renamed to composite_inundation.py and upgraded to handle any combination of 2 of 3 items (FIM3 FR, FIM3 MS and/or FIM4 GMS). + +### Additions + +- `tools/composite_inundation.py`: Technically it is a renamed from composite_ms_fr_inundation.py, and is based on that functionality, but has been heavily modified. It has a number of options, but primarily is designed to take two sets of output directories, inundate the files, then composite them into a single mosiac'd raster per huc. The primary usage is expected to be compositing FIM3 FR with FIM4 / GMS with stream orders 3 and higher. + +- `unit_tests/gms/inundate_gms_unittests.py and inundate_gms_params.json`: for running unit tests against `tools/gms_tools/inunundate_gms.py`. +- `unit_tests/shared_functions_unittests.py and shared_functions_params.json`: A new function named `append_id_to_file_name_single_identifier` was added to `src/utils/shared_functions.py` and some unit tests for that function was created. + +### Removed + +- `tools/composite_ms_fr_inundation.py`: replaced with upgraded version named `composite_inundation.py`. + +### Changes + +- `tools/gms_tools/inundate_gms.py`: some style, readabilty cleanup plus move a function up to `shared_functions.py`. +- `tools/gms_tools/mosaic_inundation.py`: some style, readabilty cleanup plus move a function up to `shared_functions.py`. +- `tools/inundation.py`: some style, readabilty cleanup. +- `tools/synthesize_test_cases.py`: was updated primarily for sample usage notes. + +

+ +## v4.0.4.2 - 2022-05-03 - [PR #594](https://github.com/NOAA-OWP/inundation-mapping/pull/594) + +This hotfix includes several revisions needed to fix/update the FIM4 area inundation evaluation scripts. These changes largely migrate revisions from the FIM3 evaluation code to the FIM4 evaluation code. + +### Changes + +- `tools/eval_plots.py`: Copied FIM3 code revisions to enable RAS2FIM evals and PND plots. Replaced deprecated parameter name for matplotlib grid() +- `tools/synthesize_test_cases.py`: Copied FIM3 code revisions to assign FR, MS, COMP resolution variable and addressed magnitude list variable for IFC eval +- `tools/tools_shared_functions.py`: Copied FIM3 code revisions to enable probability not detected (PND) metric calculation +- `tools/tools_shared_variables.py`: Updated magnitude dictionary variables for RAS2FIM evals and PND plots + +

+ +## v4.0.4.1 - 2022-05-02 - [PR #587](https://github.com/NOAA-OWP/inundation-mapping/pull/587) + +While testing GMS against evaluation and inundation data, we discovered some challenges for running alpha testing at full scale. Part of it was related to the very large output volume for GMS which resulted in outputs being created on multiple servers and folders. Considering the GMS volume and processing, a tool was required to extract out the ~215 HUC's that we have evaluation data for. Next, we needed isolate valid HUC output folders from original 2,188 HUC's and its 100's of thousands of branches. The first new tool allows us to point to the `test_case` data folder and create a list of all HUC's that we have validation for. + +Now that we have a list of relavent HUC's, we need to consolidate output folders from the previously processed full CONUS+ output data. The new `copy_test_case_folders.py` tool extracts relavent HUC (gms unit) folders, based on the list created above, into a consolidated folder. The two tools combine result in significantly reduced overall processing time for running alpha tests at scale. + +`gms_run_unit.sh` and `aggregated_branch_lists.py` were adjusted to make a previously hardcoded file path and file name to be run-time parameters. By adding the two new arguments, the file could be used against the new `copy_test_case_folders.py`. `copy_test_case_folders.py` and `gms_run_unit.sh` can now call `aggregated_branch_lists.py` to create a key input file called `gms_inputs.csv` which is a key file required for alpha testing. + +A few other small adjustments were made for readability and traceability as well as a few small fixes discovered when running at scale. + +### Additions + +- `tools/find_test_case_folders.py`: A new tool for creating a list of HUC's that we have test/evaluation data for. +- `tools/copy_test_case_folders.py`: A new tool for using the list created above, to scan through other fully processed output folders and extract only the HUC's (gms units) and it's branches into a consolidated folder, ready for alpha test processing (or other needs). + +### Changes + +- `src/gms/aggregate_branch_lists.py`: Adjusted to allow two previously hardcoded values to now be incoming arguments. Now this file can be used by both `gms_run_unit.sh` and `copy_test_case_folders.py`. +- `tools/synthesize_test_cases.py`: Adjustments for readability and progress status. The embedded progress bars are not working and will be addressed later. +- `tools/run_test_case.py`: A print statement was added to help with processing progess was added. +- `gms_run_unit.sh`: This was adjusted to match the new input parameters for `aggregate_branch_lists.py` as well as additions for progress status. It now will show the entire progress period start datetime, end datetime and duration. +- `gms_run_branch.sh`: Also was upgraded to show the entire progress period start datetime, end datetime and duration. + +

+ +## v4.0.4.0 - 2022-04-12 - [PR #557](https://github.com/NOAA-OWP/inundation-mapping/pull/557) + +During large scale testing of the new **filtering out stream orders 1 and 2** feature [PR #548](https://github.com/NOAA-OWP/inundation-mapping/pull/548), a bug was discovered with 14 HUCS that had no remaining streams after removing stream orders 1 and 2. This resulted in a number of unmanaged and unclear exceptions. An exception may be still raised will still be raised in this fix for logging purposes, but it is now very clear what happened. Other types of events are logged with clear codes to identify what happened. + +Fixes were put in place for a couple of new logging behaviors. + +1. Recognize that for system exit codes, there are times when an event is neither a success (code 0) nor a failure (code 1). During processing where stream orders are dropped, some HUCs had no remaining reaches, others had mismatched reaches and others as had missing flowlines (reaches) relating to dissolved level paths (merging individual reaches as part of GMS). When these occur, we want to abort the HUC (unit) or branch processing, identify that they were aborted for specific reasons and continue. A new custom system exit code system was adding using python enums. Logging was enhanced to recognize that some exit codes were not a 0 or a 1 and process them differently. + +2. Pathing and log management became an issue. It us not uncommon for tens or hundreds of thousands of branches to be processed. A new feature was to recognize what is happening with each branch or unit and have them easily found and recognizable. Futher, processing for failure (sys exit code of 1) are now copied into a unique folder as the occur to help with visualization of run time errors. Previously errors were not extracted until the end of the entire run which may be multiple days. + +3. A minor correction was made when dissolved level paths were created with the new merged level path not always having a valid stream order value. + +### File Additions + +- `src/` + - `utils/` + - `fim_enums.py`: + - A new class called `FIM_system_exit_codes` was added. This allows tracking and blocking of duplicate system exit codes when a custom system code is required. + + +### Changes + +- `fim_run.sh`: Added the gms `non-zero-exit-code` system to `fim_run` to help uncover and isolate errors during processing. Errors recorded in log files within in the logs/unit folder are now copied into a new folder called `unit_errors`. + +- `gms_run_branch.sh`: + - Minor adjustments to how the `non-zero-exit code` logs were created. Testing uncovered that previous versions were not always reliable. This is now stablized and enhanced. + - In previous versions, only the `gms_unit.sh` was aware that **stream order filtering** was being done. Now all branch processing is also aware that filtering is in place. Processing in child files and classes can now make adjustments as/if required for stream order filtering. + - Small output adjustments were made to help with overall screen and log readability. + +- `gms_run_unit.sh`: + - Minor adjustments to how the `non-zero-exit-code` logs were created similar to `gms_run_branch.sh.` + - Small text corrections, formatting and output corrections were added. + - A feature removing all log files at the start of the entire process run were added if the `overwrite` command line argument was added. + +- `src/` + - `filter_catchments_and_add_attributes.py`: + - Some minor formatting and readability adjustments were added. + - Additions were made to help this code be aware and responding accordingly if that stream order filtering has occurred. Previously recorded as bugs coming from this class, are now may recorded with the new custom exit code if applicable. + + - `run_by_unit.sh` (supporting fim_run.sh): + - As a change was made to sub-process call to `filter_catchments_and_add_attributes.py` file, which is shared by gms, related to reach errors / events. + + - `split_flows.py`: + - Some minor formatting and readability adjustments were added. + - Additions were made to recognize the same type of errors as being described in other files related to stream order filtering issues. + - A correction was made to be more precise and more explicit when a gms branch error existed. This was done to ensure that we were not letting other exceptions be trapped that were NOT related to stream flow filtering. + + - `time_and_tee_run_by_unit.sh`: + - The new custom system exit codes was added. Note that the values of 61 (responding system code) are hardcoded instead of using the python based `Fim_system_exit_code` system. This is related to limited communication between python and bash. + + - `gms/` + - `derive_level_paths.py`: + - Was upgraded to use the new fim_enums.Fim_system_exit_codes system. This occurs when no streams / flows remain after filtering. Without this upgrade, standard exceptions were being issued with minimal details for the error. + - Minor adjustments to formatting for readability were made. + + - `generate_branch_list.py` : Minor adjustments to formatting for readability were made. + + - `run_by_branch.sh`: + - Some minor formatting and readability adjustments were added. + - Additions to the subprocess call to `split_flows.py` were added so it was aware that branch filtering was being used. `split_flows.py` was one of the files that was throwing errors related to stream order filtering. A subprocess call to `filter_catchments_and_add_attributes.py` adjustment was also required for the same reason. + + - `run_by_unit.sh`: + - Some minor formatting and readability adjustments were added. + - An addition was made to help trap errors that might be triggered by `derive_level_paths.py` for `stream order filtering`. + + - `time_and_tee_run_by_branch.sh`: + - A system was added recognize if an non successful system exit code was sent back from `run_by_branch`. This includes true errors of code 1 and other new custom system exit codes. Upon detection of non-zero-exit codes, log files are immediately copied into special folders for quicker and easier visibility. Previously errors were not brought forth until the entire process was completed which ranged fro hours up to 18 days. Note: System exit codes of 60 and 61 were hardcoded instead of using the values from the new `FIM_system_exit_codes` due to limitation of communication between python and bash. + + - `time_and_tee_run_by_unit.sh`: + - The same upgrade as described above in `time_and_tee_run_by_branch.sh` was applied here. + - Minor readability and output formatting changes were made. + + - `todo.md` + - An entry was removed from this list which talked about errors due to small level paths exactly as was fixed in this pull request set. + +- `unit_tests/` + - `gms/` + - `derive_level_paths_unittests.py` : Added a new unit test specifically testing this type of condition with a known HUC that triggered the branch errors previously described.. + - `derive_level_paths_params.json`: + - Added a new node with a HUC number known to fail. + - Changed pathing for unit test data pathing from `/data/outputs/gms_example_unit_tests` to `/data/outputs/fim_unit_test_data_do_not_remove`. The new folder is intended to be a more permanent folder for unit test data. + - Some additional tests were added validating the argument for dropping stream orders. + +### Unit Test File Additions: + +- `unit_tests/` + - `filter_catchments_and_add_attributes_unittests.py` and `filter_catchments_and_add_attributes_params.json`: + + - `split_flows_unittests.py' and `split_flows_params.json` + +

+ +## v4.0.3.1 - 2022-03-10 - [PR #561](https://github.com/NOAA-OWP/inundation-mapping/pull/561) + +Bug fixes to get the Alpha Test working in FIM 4. + +### Changes + +- `tools/sythesize_test_cases.py`: Fixed bugs that prevented multiple benchmark types in the same huc from running `run_test_case.py`. +- `tools/run_test_case.py`: Fixed mall bug for IFC benchmark. +- `tools/eval_plots.py`: Fixed Pandas query bugs. + +

+ +## v4.0.3.0 - 2022-03-03 - [PR #550](https://github.com/NOAA-OWP/inundation-mapping/pull/550) + +This PR ports the functionality of `usgs_gage_crosswalk.py` and `rating_curve_comparison.py` to FIM 4. + +### Additions + +- `src/`: + - `usgs_gage_aggregate.py`: Aggregates all instances of `usgs_elev_table.csv` to the HUC level. This makes it easier to view the gages in each HUC without having to hunt through branch folders and easier for the Sierra Test to run at the HUC level. + - `usgs_gage_unit_setup.py`: Assigns a branch to each USGS gage within a unit. The output of this module is `usgs_subset_gages.gpkg` at the HUC level containing the `levpa_id` attribute. + +### Changes + +- `gms_run_branch.sh`: Added a line to aggregate all `usgs_elev_table.csv` into the HUC directory level using `src/usgs_gage_aggregate.py`. +- `src/`: + - `gms/` + - `run_by_branch.sh`: Added a block to run `src/usgs_gage_crosswalk.py`. + - `run_by_unit.sh`: Added a block to run `src/usgs_gage_unit_setup.py`. + - `usgs_gage_crosswalk.py`: Similar to it's functionality in FIM 3, this module snaps USGS gages to the stream network, samples the underlying DEMs, and writes the attributes to `usgs_elev_table.csv`. This CSV is later aggregated to the HUC level and eventually used in `tools/rating_curve_comparison.py`. Addresses #539 +- `tools/rating_curve_comparison.py`: Updated Sierra Test to work with FIM 4 data structure. +- `unit_tests/`: + - `rating_curve_comparison_unittests.py` & `rating_curve_comparison_params.json`: Unit test code and parameters for the Sierra Test. + - `usgs_gage_crosswalk_unittests.py` & `usgs_gage_crosswalk_params.json`: Unit test code and parameters for `usgs_gage_crosswalk.py` +- `config/`: + - `deny_gms_branches_default.lst` & `config/deny_gms_branches_min.lst`: Add `usgs_elev_table.csv` to the lists as a comment so it doesn't get deleted during cleanup. + - `deny_gms_unit_default.lst`: Add `usgs_subset_gages.gpkg` to the lists as a comment so it doesn't get deleted during cleanup. + +

+ +## v4.0.2.0 - 2022-03-02 - [PR #548](https://github.com/NOAA-OWP/inundation-mapping/pull/548) + +Added a new optional system which allows an argument to be added to the `gms_run_unit.sh` command line to filter out stream orders 1 and 2 when calculating branches. + +### Changes + +- `gms_run_unit.sh`: Add the new optional `-s` command line argument. Inclusion of this argument means "drop stream orders 1 and 2". + +- `src/gms` + - `run_by_unit.sh`: Capture and forward the drop stream orders flag to `derive_level_paths.py` + + - `derive_level_paths.py`: Capture the drop stream order flag and working with `stream_branches.py` to include/not include loading nwm stream with stream orders 1 and 2. + + - `stream_branchs.py`: A correction was put in place to allow for the filter of branch attributes and values to be excluded. The `from_file` method has the functionality but was incomplete. This was corrected and how could accept the values from `derive_level_paths.py` to use the branch attribute of "order_" (gkpg field) and values excluded of [1,2] when optionally desired. + +- `unit_tests/gms` + - `derive_level_paths_unittests.py` and `derive_level_paths_params.py`: Updated for testing for the new "drop stream orders 1 and 2" feature. Upgrades were also made to earlier existing incomplete test methods to test more output conditions. + +

+ +## v4.0.1.0 - 2022-02-02 - [PR #525](https://github.com/NOAA-OWP/cahaba/pull/525) + +The addition of a very simple and evolving unit test system which has two unit tests against two py files. This will set a precendence and will grow over time and may be automated, possibly during git check-in triggered. The embedded README.md has more details of what we currently have, how to use it, how to add new unit tests, and expected future enhancements. + +### Additions + +- `/unit_tests/` folder which has the following: + + - `clip_vectors_to_wbd_params.json`: A set of default "happy path" values that are expected to pass validation for the clip_vectors_to_wbd.py -> clip_vectors_to_wbd (function). + + - `clip_vectors_to_wbd_unittests.py`: A unit test file for src/clip_vectors_to_wbd.py. Incomplete but evolving. + + - `README.md`: Some information about how to create unit tests and how to use them. + + - `unit_tests_utils.py`: A python file where methods that are common to all unit tests can be placed. + + - `gms/derive_level_paths_params.json`: A set of default "happy path" values that are expected to pass validation for the derive_level_paths_params.py -> Derive_level_paths (function). + + - `gms/derive_level_paths_unittests.py`: A unit test file for `src/derive_level_paths.py`. Incomplete but evolving. + +

+ +## v4.0.0.0 - 2022-02-01 - [PR #524](https://github.com/NOAA-OWP/cahaba/pull/524) + +FIM4 builds upon FIM3 and allows for better representation of inundation through the reduction of artificial restriction of inundation at catchment boundaries. + +More details will be made available through a publication by Aristizabal et. al. and will be included in the "Credits and References" section of the README.md, titled "Reducing Horton-Strahler Stream Order Can Enhance Flood Inundation Mapping Skill with Applications for the U.S. National Water Model." + +### Additions + +- `/src/gms`: A new directory containing scripts necessary to produce the FIM4 Height Above Nearest Drainage grids and synthetic rating curves needed for inundation mapping. +- `/tools/gms_tools`: A new directory containing scripts necessary to generate and evaluate inundation maps produced from FIM4 Height Above Nearest Drainage grids and synthetic rating curves. + +

+ ## v3.0.24.3 - 2021-11-29 - [PR #488](https://github.com/NOAA-OWP/cahaba/pull/488) Fixed projection issue in `synthesize_test_cases.py`. -## Changes +### Changes - `Pipfile`: Added `Pyproj` to `Pipfile` to specify a version that did not have the current projection issues. @@ -15,7 +1783,7 @@ Fixed projection issue in `synthesize_test_cases.py`. Adding a new check to keep `usgs_elev_table.csv`, `src_base.csv`, `small_segments.csv` for runs not using the `-viz` flag. We unintentionally deleted some .csv files in `vary_mannings_n_composite.py` but need to maintain some of these for non `-viz` runs (e.g. `usgs_elev_table.csv` is used for sierra test input). -## Changes +### Changes - `fim_run.sh`: passing `-v` flag to `vary_mannings_n_composite.py` to determine which csv files to delete. Setting `$viz` = 0 for non `-v` runs. - `src/vary_mannings_n_composite.py`: added `-v` input arg and if statement to check which .csv files to delete. @@ -28,11 +1796,11 @@ Adding a new check to keep `usgs_elev_table.csv`, `src_base.csv`, `small_segment Patch to clean up unnecessary files and create better names for intermediate raster files. -## Removals +### Removals - `tools/run_test_case_gms.py`: Unnecessary file. -## Changes +### Changes - `tools/composite_ms_fr_inundation.py`: Clean up documentation and intermediate file names. - `tools/run_test_case.py`: Remove unnecessary imports. @@ -43,11 +1811,11 @@ Patch to clean up unnecessary files and create better names for intermediate ras Adds `composite_ms_fr_inundation.py` to allow for the generation of an inundation map given a "flow file" CSV and full-resolution (FR) and mainstem (MS) relative elevation models, synthetic rating curves, and catchments rasters created by the `fim_run.sh` script. -## Additions +### Additions - `composite_ms_fr_inundation.py`: New module that is used to inundate both MS and FR FIM and composite the two inundation rasters. - `/tools/gms_tools/`: Three modules (`inundate_gms.py`, `mosaic_inundation.py`, `overlapping_inundation.py`) ported from the GMS branch used to composite inundation rasters. -## Changes +### Changes - `inundation.py`: Added 2 exception classes ported from the GMS branch.

@@ -55,7 +1823,7 @@ Adds `composite_ms_fr_inundation.py` to allow for the generation of an inundatio ## v3.0.23.3 - 2021-11-04 - [PR #481](https://github.com/NOAA-OWP/cahaba/pull/481) Includes additional hydraulic properties to the `hydroTable.csv`: `Number of Cells`, `SurfaceArea (m2)`, `BedArea (m2)`, `Volume (m3)`, `SLOPE`, `LENGTHKM`, `AREASQKM`, `Roughness`, `TopWidth (m)`, `WettedPerimeter (m)`. Also adds `demDerived_reaches_split_points.gpkg`, `flowdir_d8_burned_filled.tif`, and `dem_thalwegCond.tif` to `-v` whitelist. -## Changes +### Changes - `run_by_unit.sh`: Added `EXIT FLAG` tag and previous non-zero exit code tag to the print statement to allow log lookup. - `add_crosswalk.py`: Added extra attributes to the hydroTable.csv. Includes a default `barc_on` and `vmann_on` (=False) attribute that is overwritten (=True) if SRC post-processing modules are run. - `bathy_src_adjust_topwidth.py`: Overwrites the `barc_on` attribute where applicable and includes the BARC-modified Volume property. @@ -67,7 +1835,7 @@ Includes additional hydraulic properties to the `hydroTable.csv`: `Number of Cel ## v3.0.23.2 - 2021-11-04 - [PR #480](https://github.com/NOAA-OWP/cahaba/pull/480) Hotfix for `vary_manning_n_composite.py` to address null discharge values for non-CONUS hucs. -## Changes +### Changes - `vary_manning_n_composite.py`: Add numpy where clause to set final discharge value to the original value if `vmann=False`

@@ -75,7 +1843,7 @@ Hotfix for `vary_manning_n_composite.py` to address null discharge values for no ## v3.0.23.1 - 2021-11-03 - [PR #479](https://github.com/NOAA-OWP/cahaba/pull/479) Patches the API updater. The `params_calibrated.env` is replaced with `params_template.env` because the BARC and Multi-N modules supplant the calibrated values. -## Changes +### Changes - `api/node/updater/updater.py`: Changed `params_calibrated.env` to `params_template.env`

@@ -84,14 +1852,14 @@ Patches the API updater. The `params_calibrated.env` is replaced with `params_te Moved the synthetic rating curve (SRC) processes from the `\tools` directory to `\src` directory to support post-processing in `fim_run.sh`. These SRC post-processing modules will now run as part of the default `fim_run.sh` workflow. Reconfigured bathymetry adjusted rating curve (BARC) module to use the 1.5yr flow from NWM v2 recurrence flow data in combination with the Bieger et al. (2015) regression equations with bankfull discharge predictor variable input. -## Additions +### Additions - `src/bathy_src_adjust_topwidth.py` --> New version of bathymetry adjusted rating curve (BARC) module that is configured to use the Bieger et al. (2015) regression equation with input bankfull discharge as the predictor variable (previous version used the drainage area version of the regression equations). Also added log output capability, added reconfigured output content in `src_full_crosswalked_BARC.csv` and `hydroTable.csv`, and included modifications to allow BARC to run as a post-processing step in `fim_run.sh`. Reminder: BARC is only configured for MS extent. -## Removals +### Removals - `config/params_calibrated.env` --> deprecated the calibrated roughness values by stream order with the new introduction of variable/composite roughness module - `src/bathy_rc_adjust.py` --> deprecated the previous BARC version -## Changes +### Changes - `src/identify_src_bankfull.py` --> Moved this script from /tools to /src, added more doc strings, cleaned up output log, and reconfigured to allow execution from fim_run.sh post-processing. - `src/vary_mannings_n_composite.py` --> Moved this script from /tools to /src, added more doc strings, cleaned up output log, added/reconfigured output content in src_full_crosswalked_vmann.csv and hydroTable.csv, and reconfigured to allow execution from fim_run.sh post-processing. - `config/params_template.env` --> Added additional parameter/variables for input to `identify_src_bankfull.py`, `vary_mannings_n_composite.py`, and `bathy_src_adjust_topwidth.py`. @@ -110,7 +1878,7 @@ Moved the synthetic rating curve (SRC) processes from the `\tools` directory to Manually filtering segments from stream input layer to fix flow reversal of the MS River (HUC 08030100). -## Changes +### Changes - `clip_vectors_to_wbd.py`: Fixes bug where flow direction is reversed for HUC 08030100. The issue is resolved by filtering incoming stream segments that intersect with the elevation grid boundary.

@@ -119,11 +1887,11 @@ Manually filtering segments from stream input layer to fix flow reversal of the These "tool" enhancements 1) delineate in-channel vs. out-of-channel geometry to allow more targeted development of key physical drivers influencing the SRC calculations (e.g. bathymetry & Manning’s n) #418 and 2) applies a variable/composite Manning’s roughness (n) using user provided csv with in-channel vs. overbank roughness values #419 & #410. -## Additions +### Additions - `identify_src_bankfull.p`: new post-processing tool that ingests a flow csv (e.g. NWM 1.5yr recurr flow) to approximate the bankfull STG and then calculate the channel vs. overbank proportions using the volume and hydraulic radius variables - `vary_mannings_n_composite.py`: new post-processing tool that ingests a csv containing feature_id, channel roughness, and overbank roughness and then generates composite n values via the channel ratio variable -## Changes +### Changes - `eval_plots.py`: modified the plot legend text to display full label for development tests - `inundation.py`: added new optional argument (-n) and corresponding function to produce a csv containing the stage value (and SRC variables) calculated from the flow to stage interpolation. @@ -133,7 +1901,7 @@ These "tool" enhancements 1) delineate in-channel vs. out-of-channel geometry to This new workflow ingests FIM point observations from users and “corrects” the synthetic rating curves to produce the desired FIM extent at locations where feedback is available (locally calibrate FIM). -## Changes +### Changes - `add_crosswalk.py`: added `NextDownID` and `order_` attributes to the exported `hydroTable.csv`. This will potentially be used in future enhancements to extend SRC changes to upstream/downstream catchments. - `adjust_rc_with_feedback.py`: added a new workflow to perform the SRC modifications (revised discharge) using the existing HAND geometry variables combined with the user provided point location flow and stage data. - `inundation_wrapper_custom_flow.py`: updated code to allow for huc6 processing to generate custom inundation outputs. @@ -144,7 +1912,7 @@ This new workflow ingests FIM point observations from users and “corrects” t Patches an issue where only certain benchmark categories were being used in evaluation. -## Changes +### Changes - In `tools/tools_shared_variables.py`, created a variable `MAGNITUDE_DICT` to store benchmark category magnitudes. - `synthesize_test_cases.py` imports `MAGNITUDE_DICT` and uses it to assign magnitudes. @@ -154,7 +1922,7 @@ Patches an issue where only certain benchmark categories were being used in eval Renames the BARC modified variables that are exported to `src_full_crosswalked.csv` to replace the original variables. The default/original variables are renamed with `orig_` prefix. This change is needed to ensure downstream uses of the `src_full_crosswalked.csv` are able to reference the authoritative version of the channel geometry variables (i.e. BARC-adjust where available). -## Changes +### Changes - In `src_full_crosswalked.csv`, default/original variables are renamed with `orig_` prefix and `SA_div` is renamed to `SA_div_flag`.

@@ -163,7 +1931,7 @@ Renames the BARC modified variables that are exported to `src_full_crosswalked.c This fixes a bug in the `get_metadata()` function in `/tools/tools_shared_functions.py` that arose because of a WRDS update. Previously the `metadata_source` response was returned as independent variables, but now it is returned a list of strings. Another issue was observed where the `EVALUATED_SITES_CSV` variable was being misdefined (at least on the development VM) through the OS environmental variable setting. -## Changes +### Changes - In `tools_shared_functions.py`, changed parsing of WRDS `metadata_sources` to account for new list type. - In `generate_categorical_fim_flows.py`, changed the way the `EVALUATED_SITES_CSV` path is defined from OS environmental setting to a relative path that will work within Docker container. @@ -173,7 +1941,7 @@ This fixes a bug in the `get_metadata()` function in `/tools/tools_shared_functi This merge addresses an issues with the bathymetry adjusted rating curve (BARC) calculations exacerbating single-pixel inundation issues for the lower Mississippi River. This fix allows the user to specify a stream order value that will be ignored in BARC calculations (reverts to using the original/default rating curve). If/when the "thalweg notch" issue is addressed, this change may be unmade. -## Changes +### Changes - Added new env variable `ignore_streamorders` set to 10. - Added new BARC code to set the bathymetry adjusted cross-section area to 0 (reverts to using the default SRC values) based on the streamorder env variable. @@ -183,7 +1951,7 @@ This merge addresses an issues with the bathymetry adjusted rating curve (BARC) Patches the minimum stream length in the template parameters file. -## Changes +### Changes - Changes `max_split_distance_meters` in `params_template.env` to 1500.

@@ -192,7 +1960,7 @@ Patches the minimum stream length in the template parameters file. This adds a script, `adjust_rc_with_feedback.py`, that will be expanded in future issues. The primary function that performs the HAND value and hydroid extraction is ingest_points_layer() but this may change as the overall synthetic rating curve automatic update machanism evolves. -## Additions +### Additions - Added `adjust_rc_with_feedback.py` with `ingest_points_layer()`, a function to extract HAND and hydroid values for use in an automatic synthetic rating curve updating mechanism.

@@ -201,7 +1969,7 @@ This adds a script, `adjust_rc_with_feedback.py`, that will be expanded in futu General repository cleanup, made memory-profiling an optional flag, API's release feature now saves outputs. -## Changes +### Changes - Remove `Dockerfile.prod`, rename `Dockerfile.dev` to just `Dockerfile`, and remove `.dockerignore`. - Clean up `Dockerfile` and remove any unused* packages or variables. - Remove any unused* Python packages from the `Pipfile`. @@ -218,7 +1986,7 @@ General repository cleanup, made memory-profiling an optional flag, API's releas This merge modifies `clip_vectors_to_wbd.py` to check for relevant input data. -## Changes +### Changes - `clip_vectors_to_wbd.py` now checks that there are NWM stream segments within the buffered HUC boundary. - `included_huc8_ms.lst` has several additional HUC8s. @@ -228,7 +1996,7 @@ This merge modifies `clip_vectors_to_wbd.py` to check for relevant input data. This merge improves documentation in various scripts. -## Changes +### Changes This PR better documents the following: - `inundate_nation.py` @@ -242,7 +2010,7 @@ This PR better documents the following: This merge adds two new scripts into `/tools/` for use in QAQC. -## Additions +### Additions - `inundate_nation.py` to produce inundation maps for the entire country for use in QAQC. - `check_deep_flooding.py` to check for depths of inundation greater than a user-supplied threshold at specific areas defined by a user-supplied shapefile. @@ -258,11 +2026,11 @@ Updating `README.md`. Updating logging and fixing bug in vector preprocessing. -## Additions +### Additions - `fim_completion_check.py` adds message to docker log to log any HUCs that were requested but did not finish `run_by_unit.sh`. - Adds `input_data_edits_changelog.txt` to the inputs folder to track any manual or version/location specific changes that were made to data used in FIM 3. -## Changes +### Changes - Provides unique exit codes to relevant domain checkpoints within `run_by_unit.sh`. - Bug fixes in `reduce_nhd_stream_density.py`, `mprof plot` call. - Improved error handling in `add_crosswalk.py`. @@ -273,7 +2041,7 @@ Updating logging and fixing bug in vector preprocessing. Hot fix to `synthesize_test_cases`. -## Changes +### Changes - Fixed if/elif/else statement in `synthesize_test_cases.py` that resulted in only IFC data being evaluated.

@@ -282,7 +2050,7 @@ Hot fix to `synthesize_test_cases`. Updates to evaluation scripts to allow for Alpha testing at Iowa Flood Center (IFC) sites. Also, `BAD_SITES` variable updates to omit sites not suitable for evaluation from metric calculations. -## Changes +### Changes - The `BAD_SITES` list in `tools_shared_variables.py` was updated and reasons for site omission are documented. - Refactored `run_test_case.py`, `synthesize_test_cases.py`, `tools_shared_variables.py`, and `eval_plots.py` to allow for IFC comparisons. @@ -292,13 +2060,13 @@ Updates to evaluation scripts to allow for Alpha testing at Iowa Flood Center (I Adding a thalweg profile tool to identify significant drops in thalweg elevation. Also setting lateral thalweg adjustment threshold in hydroconditioning. -## Additions +### Additions - `thalweg_drop_check.py` checks the elevation along the thalweg for each stream path downstream of MS headwaters within a HUC. -## Removals +### Removals - Removing `dissolveLinks` arg from `clip_vectors_to_wbd.py`. -## Changes +### Changes - Cleaned up code in `split_flows.py` to make it more readable. - Refactored `reduce_nhd_stream_density.py` and `adjust_headwater_streams.py` to limit MS headwater points in `agg_nhd_headwaters_adj.gpkg`. - Fixed a bug in `adjust_thalweg_lateral.py` lateral elevation replacement threshold; changed threshold to 3 meters. @@ -310,39 +2078,42 @@ Adding a thalweg profile tool to identify significant drops in thalweg elevation Feature to evaluate performance of alternative CatFIM techniques. -## Additions +### Additions - Added `eval_catfim_alt.py` to evaluate performance of alternative CatFIM techniques.

+ ## v3.0.18.0 - 2021-06-09 - [PR #404](https://github.com/NOAA-OWP/cahaba/pull/404) To help analyze the memory consumption of the Fim Run process, the python module `memory-profiler` has been added to give insights into where peak memory usage is with in the codebase. In addition, the Dockerfile was previously broken due to the Taudem dependency removing the version that was previously being used by FIM. To fix this, and allow new docker images to be built, the Taudem version has been updated to the newest version on the Github repo and thus needs to be thoroughly tested to determine if this new version has affected the overall FIM outputs. -## Additions +### Additions - Added `memory-profiler` to `Pipfile` and `Pipfile.lock`. - Added `mprof` (memory-profiler cli utility) call to the `time_and_tee_run_by_unit.sh` to create overall memory usage graph location in the `/logs/{HUC}_memory.png` of the outputs directory. - Added `@profile` decorator to all functions within scripts used in the `run_by_unit.sh` script to allow for memory usage tracking, which is then recorded in the `/logs/{HUC}.log` file of the outputs directory. -## Changes +### Changes - Changed the Taudem version in `Dockerfile.dev` to `98137bb6541a0d0077a9c95becfed4e56d0aa0ac`. - Changed all calls of python scripts in `run_by_unit.s` to be called with the `-m memory-profiler` argument to allow scripts to also track memory usage.

+ ## v3.0.17.1 - 2021-06-04 - [PR #395](https://github.com/NOAA-OWP/cahaba/pull/395) Bug fix to the `generate_nws_lid.py` script -## Changes +### Changes - Fixes incorrectly assigned attribute field "is_headwater" for some sites in the `nws_lid.gpkg` layer. - Updated `agg_nhd_headwaters_adj.gpkg`, `agg_nhd_streams_adj.gpkg`, `nwm_flows.gpkg`, and `nwm_catchments.gpkg` input layers using latest NWS LIDs.

+ ## v3.0.17.0 - 2021-06-04 - [PR #393](https://github.com/NOAA-OWP/cahaba/pull/393) BARC updates to cap the bathy calculated xsec area in `bathy_rc_adjust.py` and allow user to choose input bankfull geometry. -## Changes +### Changes - Added new env variable to control which input file is used for the bankfull geometry input to bathy estimation workflow. - Modified the bathymetry cross section area calculation to cap the additional area value so that it cannot exceed the bankfull cross section area value for each stream segment (bankfull value obtained from regression equation dataset). @@ -351,21 +2122,23 @@ BARC updates to cap the bathy calculated xsec area in `bathy_rc_adjust.py` and a - Evaluate the FIM Bathymetry Adjusted Rating Curve (BARC) tool performance using the estimated bankfull geometry dataset derived for the NWM route link dataset.

+ ## v3.0.16.3 - 2021-05-21 - [PR #388](https://github.com/NOAA-OWP/cahaba/pull/388) Enhancement and bug fixes to `synthesize_test_cases.py`. -## Changes +### Changes - Addresses a bug where AHPS sites without benchmark data were receiving a CSI of 0 in the master metrics CSV produced by `synthesize_test_cases.py`. - Includes a feature enhancement to `synthesize_test_cases.py` that allows for the inclusion of user-specified testing versions in the master metrics CSV. - Removes some of the print statements used by `synthesize_test_cases.py`.

+ ## v3.0.16.2 - 2021-05-18 - [PR #384](https://github.com/NOAA-OWP/cahaba/pull/384) Modifications and fixes to `run_test_case.py`, `eval_plots.py`, and AHPS preprocessing scripts. -## Changes +### Changes - Comment out return statement causing `run_test_case.py` to skip over sites/hucs when calculating contingency rasters. - Move bad sites list and query statement used to filter out bad sites to the `tools_shared_variables.py`. - Add print statements in `eval_plots.py` detailing the bad sites used and the query used to filter out bad sites. @@ -374,129 +2147,142 @@ Modifications and fixes to `run_test_case.py`, `eval_plots.py`, and AHPS preproc - Update workarounds for some sites in ahps preprocessing scripts.

+ ## v3.0.16.1 - 2021-05-11 - [PR #380](https://github.com/NOAA-OWP/cahaba/pull/380) The current version of Eventlet used in the Connector module of the FIM API is outdated and vulnerable. This update bumps the version to the patched version. -## Changes +### Changes - Updated `api/node/connector/requirements.txt` to have the Eventlet version as 0.31.0

+ ## v3.0.16.0 - 2021-05-07 - [PR #378](https://github.com/NOAA-OWP/cahaba/pull/378) New "Release" feature added to the FIM API. This feature will allow for automated FIM, CatFIM, and relevant metrics to be generated when a new FIM Version is released. See [#373](https://github.com/NOAA-OWP/cahaba/issues/373) for more detailed steps that take place in this feature. -## Additions +### Additions - Added new window to the UI in `api/frontend/gui/templates/index.html`. - Added new job type to `api/node/connector/connector.py` to allow these release jobs to run. - Added additional logic in `api/node/updater/updater.py` to run the new eval and CatFIM scripts used in the release feature. -## Changes +### Changes - Updated `api/frontend/output_handler/output_handler.py` to allow for copying more broad ranges of file paths instead of only the `/data/outputs` directory.

+ ## v3.0.15.10 - 2021-05-06 - [PR #375](https://github.com/NOAA-OWP/cahaba/pull/375) Remove Great Lakes coastlines from WBD buffer. -## Changes +### Changes - `gl_water_polygons.gpkg` layer is used to mask out Great Lakes boundaries and remove NHDPlus HR coastline segments.

+ ## v3.0.15.9 - 2021-05-03 - [PR #372](https://github.com/NOAA-OWP/cahaba/pull/372) Generate `nws_lid.gpkg`. -## Additions +### Additions - Generate `nws_lid.gpkg` with attributes indicating if site is a headwater `nws_lid` as well as if it is co-located with another `nws_lid` which is referenced to the same `nwm_feature_id` segment.

+ ## v3.0.15.8 - 2021-04-29 - [PR #371](https://github.com/NOAA-OWP/cahaba/pull/371) Refactor NHDPlus HR preprocessing workflow. Resolves issue #238 -## Changes +### Changes - Consolidate NHD streams, NWM catchments, and headwaters MS and FR layers with `mainstem` column. - HUC8 intersections are included in the input headwaters layer. - `clip_vectors_to_wbd.py` removes incoming stream segment from the selected layers.

+ ## v3.0.15.7 - 2021-04-28 - [PR #367](https://github.com/NOAA-OWP/cahaba/pull/367) Refactor synthesize_test_case.py to handle exceptions during multiprocessing. Resolves issue #351 -## Changes +### Changes - refactored `inundation.py` and `run_test_case.py` to handle exceptions without using `sys.exit()`.

+ ## v3.0.15.6 - 2021-04-23 - [PR #365](https://github.com/NOAA-OWP/cahaba/pull/365) Implement CatFIM threshold flows to Sierra test and add AHPS benchmark preprocessing scripts. -## Additions +### Additions - Produce CatFIM flows file when running `rating_curve_get_usgs_gages.py`. - Several scripts to preprocess AHPS benchmark data. Requires numerous file dependencies not available through Cahaba. -## Changes +### Changes - Modify `rating_curve_comparison.py` to ingest CatFIM threshold flows in calculations. - Modify `eval_plots.py` to save all site specific bar plots in same parent directory instead of in subdirectories. - Add variables to `env.template` for AHPS benchmark preprocessing.

+ ## v3.0.15.5 - 2021-04-20 - [PR #363](https://github.com/NOAA-OWP/cahaba/pull/363) Prevent eval_plots.py from erroring out when spatial argument enabled if certain datasets not analyzed. -## Changes +### Changes - Add check to make sure analyzed dataset is available prior to creating spatial dataset.

+ ## v3.0.15.4 - 2021-04-20 - [PR #356](https://github.com/NOAA-OWP/cahaba/pull/356) Closing all multiprocessing Pool objects in repo.

+ ## v3.0.15.3 - 2021-04-19 - [PR #358](https://github.com/NOAA-OWP/cahaba/pull/358) Preprocess NHDPlus HR rasters for consistent projections, nodata values, and convert from cm to meters. -## Additions +### Additions - `preprocess_rasters.py` reprojects raster, converts to meters, and updates nodata value to -9999. - Cleaned up log messages from `bathy_rc_adjust.py` and `usgs_gage_crosswalk.py`. - Outputs paths updated in `generate_categorical_fim_mapping.py` and `generate_categorical_fim.py`. - `update_raster_profile` cleans up raster crs, blocksize, nodata values, and converts elevation grids from cm to meters. - `reproject_dem.py` imports gdal to reproject elevation rasters because an error was occurring when using rasterio. -## Changes +### Changes - `burn_in_levees.py` replaces the `gdal_calc.py` command to resolve inconsistent outputs with burned in levee values.

+ ## v3.0.15.2 - 2021-04-16 - [PR #359](https://github.com/NOAA-OWP/cahaba/pull/359) Hotfix to preserve desired files when production flag used in `fim_run.sh`. -## Changes +### Changes - Fixed production whitelisted files.

+ ## v3.0.15.1 - 2021-04-13 - [PR #355](https://github.com/NOAA-OWP/cahaba/pull/355) Sierra test considered all USGS gage locations to be mainstems even though many actually occurred with tributaries. This resulted in unrealistic comparisons as incorrect gages were assigned to mainstems segments. This feature branch identifies gages that are on mainstems via attribute field. -## Changes +### Changes - Modifies `usgs_gage_crosswalk.py` to filter out gages from the `usgs_gages.gpkg` layer such that for a "MS" run, only consider gages that contain rating curve information (via `curve` attribute) and are also mainstems gages (via `mainstems` attribute). - Modifies `usgs_gage_crosswalk.py` to filter out gages from the `usgs_gages.gpkg` layer such that for a "FR" run, only consider gages that contain rating curve information (via `curve` attribute) and are not mainstems gages (via `mainstems` attribute). - Modifies how mainstems segments are determined by using the `nwm_flows_ms.gpkg` as a lookup to determine if the NWM segment specified by WRDS for a gage site is a mainstems gage. -## Additions +### Additions - Adds a `mainstem` attribute field to `usgs_gages.gpkg` that indicates whether a gage is located on a mainstems river. - Adds `NWM_FLOWS_MS` variable to the `.env` and `.env.template` files. - Adds the `extent` argument specified by user when running `fim_run.sh` to `usgs_gage_crosswalk.py`.

+ ## v3.0.15.0 - 2021-04-08 - [PR #340](https://github.com/NOAA-OWP/cahaba/pull/340) Implementing a prototype technique to estimate the missing bathymetric component in the HAND-derived synthetic rating curves. The new Bathymetric Adjusted Rating Curve (BARC) function is built within the `fim_run.sh` workflow and will ingest bankfull geometry estimates provided by the user to modify the cross section area used in the synthetic rating curve generation. @@ -514,6 +2300,7 @@ Implementing a prototype technique to estimate the missing bathymetric component - Flags issues with the thalweg-notch artifact.

+ ## v3.0.14.0 - 2021-04-05 - [PR #338](https://github.com/NOAA-OWP/cahaba/pull/338) Create tool to retrieve rating curves from USGS sites and convert to elevation (NAVD88). Intended to be used as part of the Sierra Test. @@ -531,6 +2318,7 @@ Create tool to retrieve rating curves from USGS sites and convert to elevation ( 3) `usgs_gages.gpkg`: A geospatial layer (in FIM projection) of all active USGS gages that meet a predefined criteria. Additionally, the `curve` attribute indicates whether a rating curve is found in the `usgs_rating_curves.csv`. This spatial file is only generated if the `all` option is passed with the `-l` argument.

+ ## v3.0.13.0 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) Created tool to compare synthetic rating curve with benchmark rating curve (Sierra Test). @@ -545,6 +2333,7 @@ Created tool to compare synthetic rating curve with benchmark rating curve (Sier - `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between synthetic rating curves and USGS rating curve data.

+ ## v3.0.12.1 - 2021-03-31 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. @@ -559,6 +2348,7 @@ Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. - Creates `fim_performance_polys.shp`: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer.

+ ## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237) Add more detail/information to plotting capabilities. @@ -572,6 +2362,7 @@ Add more detail/information to plotting capabilities. - Create a csv containing the data used to create the scatterplots.

+ ## v3.0.11.0 - 2021-03-22 - [PR #319](https://github.com/NOAA-OWP/cahaba/pull/298) Improvements to CatFIM service source data generation. @@ -586,6 +2377,7 @@ Improvements to CatFIM service source data generation. - Create new `nws_lid_sites` shapefile located in same directory as the `catfim_library` shapefile.

+ ## v3.0.10.1 - 2021-03-24 - [PR #320](https://github.com/NOAA-OWP/cahaba/pull/320) Patch to synthesize_test_cases.py. @@ -594,6 +2386,7 @@ Patch to synthesize_test_cases.py. - Bug fix to `synthesize_test_cases.py` to allow comparison between `testing` version and `official` versions.

+ ## v3.0.10.0 - 2021-03-12 - [PR #298](https://github.com/NOAA-OWP/cahaba/pull/298) Preprocessing of flow files for Categorical FIM. @@ -609,6 +2402,7 @@ Preprocessing of flow files for Categorical FIM. - Stability fixes to `generate_categorical_fim.py`.

+ ## v3.0.9.0 - 2021-03-12 - [PR #297](https://github.com/NOAA-OWP/cahaba/pull/297) Enhancements to FIM API. @@ -624,6 +2418,7 @@ Enhancements to FIM API. - Both FR and MS configs can be selected for a single job.

+ ## v3.0.8.2 - 2021-03-11 - [PR #296](https://github.com/NOAA-OWP/cahaba/pull/296) Enhancements to post-processing for Viz-related use-cases. @@ -634,6 +2429,7 @@ Enhancements to post-processing for Viz-related use-cases. - Aggregate grid blocksize is changed from 256 to 1024 for faster postprocessing.

+ ## v3.0.8.1 - 2021-03-10 - [PR #302](https://github.com/NOAA-OWP/cahaba/pull/302) Patched import issue in `tools_shared_functions.py`. @@ -642,6 +2438,7 @@ Patched import issue in `tools_shared_functions.py`. - Changed `utils.` to `tools_` in `tools_shared_functions.py` after recent structural change to `tools` directory.

+ ## v3.0.8.0 - 2021-03-09 - [PR #279](https://github.com/NOAA-OWP/cahaba/pull/279) Refactored NWS Flood Categorical HAND FIM (CatFIM) pipeline to open source. @@ -653,6 +2450,7 @@ Refactored NWS Flood Categorical HAND FIM (CatFIM) pipeline to open source. - Removed `util` folders under `tools` directory.

+ ## v3.0.7.1 - 2021-03-02 - [PR #290](https://github.com/NOAA-OWP/cahaba/pull/290) Renamed benchmark layers in `test_cases` and updated variable names in evaluation scripts. @@ -662,6 +2460,7 @@ Renamed benchmark layers in `test_cases` and updated variable names in evaluatio - Updated `run_test_case_calibration.py` with new benchmark layer names.

+ ## v3.0.7.0 - 2021-03-01 - [PR #288](https://github.com/NOAA-OWP/cahaba/pull/288) Restructured the repository. This has no impact on hydrological work done in the codebase and is simply moving files and renaming directories. @@ -672,6 +2471,7 @@ Restructured the repository. This has no impact on hydrological work done in the - Changed any instance of `lib` or `libDir` to `src` or `srcDir`.

+ ## v3.0.6.0 - 2021-02-25 - [PR #276](https://github.com/NOAA-OWP/cahaba/pull/276) Enhancement that creates metric plots and summary statistics using metrics compiled by `synthesize_test_cases.py`. @@ -685,6 +2485,7 @@ Enhancement that creates metric plots and summary statistics using metrics compi - CSV of analyzed data and analyzed sites

+ ## v3.0.5.3 - 2021-02-23 - [PR #275](https://github.com/NOAA-OWP/cahaba/pull/275) Bug fixes to new evaluation code. @@ -697,6 +2498,7 @@ Bug fixes to new evaluation code. - Updated README.md

+ ## v3.0.5.2 - 2021-02-23 - [PR #272](https://github.com/NOAA-OWP/cahaba/pull/272) Adds HAND synthetic rating curve (SRC) datum elevation values to `hydroTable.csv` output. @@ -716,6 +2518,7 @@ Fixed `TEST_CASES_DIR` path in `tests/utils/shared_variables.py`. - Removed `"_new"` from `TEST_CASES_DIR` variable.

+ ## v3.0.5.0 - 2021-02-22 - [PR #267](https://github.com/NOAA-OWP/cahaba/pull/267) Enhancements to allow for evaluation at AHPS sites, the generation of a query-optimized metrics CSV, and the generation of categorical FIM. This merge requires that the `/test_cases` directory be updated for all machines performing evaluation. @@ -751,6 +2554,7 @@ Rating curves for short stream segments are replaced with rating curves from ups - Variable names and general workflow are cleaned up.

+ ## v3.0.4.3 - 2021-02-12 - [PR #254](https://github.com/NOAA-OWP/cahaba/pull/254) Modified `rem.py` with a new function to output HAND reference elev. @@ -763,6 +2567,7 @@ Modified `rem.py` with a new function to output HAND reference elev. - Overwrites the `demDerived_reaches_split.gpk` layer by adding additional attribute `Min_Thal_Elev_meters` to view the elevation value for each hydroid.

+ ## v3.0.4.2 - 2021-02-12 - [PR #255](https://github.com/NOAA-OWP/cahaba/pull/255) Addresses issue when running on HUC6 scale. @@ -776,6 +2581,7 @@ Addresses issue when running on HUC6 scale. - Fixed known issue where sometimes an incoming stream is not included in the final selection will affect aggregate outputs.

+ ## v3.0.4.1 - 2021-02-12 - [PR #261](https://github.com/NOAA-OWP/cahaba/pull/261) Updated MS Crosswalk method to address gaps in FIM. @@ -787,6 +2593,7 @@ Updated MS Crosswalk method to address gaps in FIM. - `add_crosswalk.py` now performs a secondary MS crosswalk selection by nearest NWM MS catchment.

+ ## v3.0.4.0 - 2021-02-10 - [PR #256](https://github.com/NOAA-OWP/cahaba/pull/256) New python script "wrappers" for using `inundation.py`. @@ -798,6 +2605,7 @@ New python script "wrappers" for using `inundation.py`. - Created new `tools` parent directory to store `inundation_wrapper_nwm_flows.py` and `inundation_wrapper_custom_flow.py`.

+ ## v3.0.3.1 - 2021-02-04 - [PR #253](https://github.com/NOAA-OWP/cahaba/pull/253) Bug fixes to correct mismatched variable name and file path. @@ -808,6 +2616,7 @@ Bug fixes to correct mismatched variable name and file path. - `acquire_and_preprocess_inputs.py` now creates `huc_lists` folder and updates file path.

+ ## v3.0.3.0 - 2021-02-04 - [PR #227](https://github.com/NOAA-OWP/cahaba/pull/227) Post-process to aggregate FIM outputs to HUC6 scale. @@ -827,6 +2636,7 @@ Post-process to aggregate FIM outputs to HUC6 scale. - Cleanup of `clip_vectors_to_wbd.py`.

+ ## v3.0.2.0 - 2021-01-25 - [PR #218](https://github.com/NOAA-OWP/cahaba/pull/218) Addition of an API service to schedule, run and manage `fim_run` jobs through a user-friendly web interface. @@ -836,6 +2646,7 @@ Addition of an API service to schedule, run and manage `fim_run` jobs through a - `api` folder that contains all the codebase for the new service.

+ ## v3.0.1.0 - 2021-01-21 - [PR #206](https://github.com/NOAA-OWP/cahaba/pull/206) Preprocess MS and FR stream networks @@ -852,6 +2663,7 @@ Preprocess MS and FR stream networks - Cleaned up variable names and types.

+ ## v3.0.0.4 - 2021-01-20 - [PR #230](https://github.com/NOAA-OWP/cahaba/pull/230) Changed the directory where the `included_huc*.lst` files are being read from. @@ -861,6 +2673,7 @@ Changed the directory where the `included_huc*.lst` files are being read from. - Changed the directory where the `included_huc*.lst` files are being read from.

+ ## v3.0.0.3 - 2021-01-14 - [PR #210](https://github.com/NOAA-OWP/cahaba/pull/210) Hotfix for handling nodata value in rasterized levee lines. @@ -871,6 +2684,7 @@ Hotfix for handling nodata value in rasterized levee lines. - Initialize the `nld_rasterized_elev.tif` using a value of `-9999` instead of `$ndv`.

+ ## v3.0.0.2 - 2021-01-06 - [PR #200](https://github.com/NOAA-OWP/cahaba/pull/200) Patch to address AHPSs mapping errors. @@ -883,6 +2697,7 @@ Patch to address AHPSs mapping errors. - Updated [readme](https://github.com/NOAA-OWP/cahaba/commit/9bffb885f32dfcd95978c7ccd2639f9df56ff829)

+ ## v3.0.0.1 - 2020-12-31 - [PR #184](https://github.com/NOAA-OWP/cahaba/pull/184) Modifications to build and run Docker image more reliably. Cleanup on some pre-processing scripts. @@ -896,6 +2711,7 @@ Modifications to build and run Docker image more reliably. Cleanup on some pre-p - `aggregate_vector_inputs.py` doesn't work yet. Need to externally download required data to run fim_run.sh

+ ## v3.0.0.0 - 2020-12-22 - [PR #181](https://github.com/NOAA-OWP/cahaba/pull/181) The software released here builds on the flood inundation mapping capabilities demonstrated as part of the National Flood Interoperability Experiment, the Office of Water Prediction's Innovators Program and the National Water Center Summer Institute. The flood inundation mapping software implements the Height Above Nearest Drainage (HAND) algorithm and incorporates community feedback and lessons learned over several years. The software has been designed to meet the requirements set by stakeholders interested in flood prediction and has been developed in partnership with several entities across the water enterprise. diff --git a/fim_pipeline.sh b/fim_pipeline.sh new file mode 100755 index 000000000..154f899fe --- /dev/null +++ b/fim_pipeline.sh @@ -0,0 +1,76 @@ +#!/bin/bash -e + +: ' +fim_pipeline.sh -u -n + +For more details on + +- There are a wide number of options and defaulted values, for details run ```fim_pipeline.sh -h``` +- Manditory arguments: + - `-u` can be a single huc, a series passed in quotes space delimited, or a line-delimited file + i. To run entire domain of available data use the ```/data/inputs/included_huc8.lst``` file or a huc list file of your choice. + - `-n` is a name of your run (only alphanumeric) +- Outputs can be found under ```/outputs/``` + +Processing of HUC''s in FIM4 comes in three pieces. You can run `fim_pipeline.sh` which automatically runs all of three major section, but you can run each of the sections independently if you like. The three sections are: +- `fim_pre_processing.sh` : This section must be run first as it creates the basic output folder for the run. It also creates a number of key files and folders for the next two sections. +- `fim_process_unit_wb.sh` : This script processes one and exactly one HUC8 plus all of it''s related branches. While it can only process one, you can run this script multiple times, each with different HUC (or overwriting a HUC). When you run `fim_pipeline.sh`, it automatically iterates when more than one HUC number has been supplied either by command line arguments or via a HUC list. For each HUC provided, `fim_pipeline.sh` will `fim_process_unit_wb.sh`. Using the `fim_process_unit_wb.sh` script allows for a run / rerun of a HUC, or running other HUCs at different times / days or even different docker containers. +- `fim_post_processing.sh` : This section takes all of the HUCs that have been processed, aggregates key information from each HUC directory and looks for errors across all HUC folders. It also processes the group in sub-steps such as usgs guages processesing, rating curve adjustments and more. Naturally, running or re-running this script can only be done after running `fim_pre_processing.sh` and at least one run of `fim_process_unit_wb.sh`. + +Running the `fim_pipeline.sh` is a quicker process than running all three steps independently. +' + +set -e + +# TODO +# update Dockerfile to add this as an env value, and delete line below +projectDir=/foss_fim + +# See fim_pre_processing.sh for details of how to use this script. fim_pre_processing.sh +# is a proxy for collecting and validating input. + +echo +echo "======================= Start of fim_pipeline.sh =========================" +echo "---- Started: `date -u`" + +## LOAD AND VALIDATE INCOMING ARGUMENTS +source $srcDir/bash_functions.env +. $projectDir/fim_pre_processing.sh "$@" + + +logFile=$outputRunDataDir/logs/unit/pipeline_summary_unit.log +process_wb_file=$projectDir/fim_process_unit_wb.sh + +pipeline_start_time=`date +%s` + +# PROCESS THE UNITS (And branches) +# Why an if and else? watch the number of colons +if [ -f "$hucList" ]; then + if [ "$jobHucLimit" = "1" ]; then + parallel --verbose --lb -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName :::: $hucList + else + parallel --eta -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName :::: $hucList + fi +else + if [ "$jobHucLimit" = "1" ]; then + parallel --verbose --lb -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName ::: $hucList + else + parallel --eta -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName ::: $hucList + fi +fi + +echo +echo "---- Unit (HUC) processing is complete" +date -u + +## POST PROCESSING + +# TODO: multiply the two job limits together for the limit here ?? +. $projectDir/fim_post_processing.sh -n $runName -j $jobHucLimit + +echo +echo "======================== End of fim_pipeline.sh ==========================" +date -u +Calc_Duration $pipeline_start_time +echo + diff --git a/fim_post_processing.sh b/fim_post_processing.sh new file mode 100755 index 000000000..54ad2a5b1 --- /dev/null +++ b/fim_post_processing.sh @@ -0,0 +1,201 @@ +#!/bin/bash -e + +: +usage () +{ + echo 'Post processing for creating FIM hydrofabric.' + echo 'Usage : fim_post_processing.sh [REQ: -n ]' + echo ' [OPT: -h -j ]' + echo '' + echo 'REQUIRED:' + echo ' -n/--runName : A name to tag the output directories and log files.' + echo '' + echo 'OPTIONS:' + echo ' -h/--help : help file' + echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time. 1 outputs' + echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' + echo ' Note: Not the same variable name as fim_pipeline or fim_pre_processing' + echo ' and can be the multiplication of jobHucLimit and jobBranchLimit' + echo + exit +} + +while [ "$1" != "" ]; do +case $1 +in + -n|--runName) + shift + runName=$1 + ;; + -j|--jobLimit) + shift + jobLimit=$1 + ;; + -h|--help) + shift + usage + ;; + *) ;; + esac + shift +done + +# TODO +# update Dockerfile to add this as an env value, and delete line below +projectDir=/foss_fim + + +# print usage if arguments empty +if [ "$runName" = "" ] +then + echo "ERROR: Missing -n run time name argument" + usage +fi + +outputRunDataDir=$outputDataDir/$runName + +## Check for run data directory ## +if [ ! -d "$outputRunDataDir" ]; then + echo "Depends on output from units and branches. Please provide an output folder name that has hucs/branches run." + exit 1 +fi + +if [ "$jobLimit" = "" ]; then jobLimit=1; fi + +# Clean out the other post processing files before starting +rm -rdf $outputRunDataDir/logs/src_optimization +rm -f $outputRunDataDir/logs/log_bankfull_indentify.log +rm -f $outputRunDataDir/logs/subdiv_src_.log + +# load up enviromental information +args_file=$outputRunDataDir/runtime_args.env +fim_inputs=$outputRunDataDir/fim_inputs.csv + +source $args_file +source $outputRunDataDir/params.env +source $srcDir/bash_functions.env +source $srcDir/bash_variables.env + + +echo +echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echo "---- Start of fim_post_processing" +echo "---- Started: `date -u`" +T_total_start +post_proc_start_time=`date +%s` + + +## AGGREGATE BRANCH LISTS INTO ONE ## +echo -e $startDiv"Start branch aggregation" +python3 $srcDir/aggregate_branch_lists.py -d $outputRunDataDir -f "branch_ids.csv" -o $fim_inputs + +## GET NON ZERO EXIT CODES FOR UNITS ## +## No longer applicable + +## GET NON ZERO EXIT CODES FOR BRANCHES ## +echo -e $startDiv"Start non-zero exit code checking" +find $outputRunDataDir/logs/branch -name "*_branch_*.log" -type f | xargs grep -E "Exit status: ([1-9][0-9]{0,2})" > "$outputRunDataDir/branch_errors/non_zero_exit_codes.log" & + +## RUN AGGREGATE BRANCH ELEV TABLES ## +echo "Processing usgs gage aggregation" +python3 $srcDir/usgs_gage_aggregate.py -fim $outputRunDataDir -i $fim_inputs + +## RUN SYNTHETIC RATING CURVE BANKFULL ESTIMATION ROUTINE ## +if [ "$src_bankfull_toggle" = "True" ]; then + echo -e $startDiv"Estimating bankfull stage in SRCs" + # Run SRC bankfull estimation routine routine + Tstart + python3 $srcDir/identify_src_bankfull.py -fim_dir $outputRunDataDir -flows $bankfull_flows_file -j $jobLimit + Tcount +fi + +## RUN SYNTHETIC RATING SUBDIVISION ROUTINE ## +if [ "$src_subdiv_toggle" = "True" ]; then + echo -e $startDiv"Performing SRC channel/overbank subdivision routine" + # Run SRC Subdivision & Variable Roughness routine + Tstart + python3 $srcDir/subdiv_chan_obank_src.py -fim_dir $outputRunDataDir -mann $vmann_input_file -j $jobLimit + Tcount +fi + +## CONNECT TO CALIBRATION POSTGRESQL DATABASE (OPTIONAL) ## +if [ "$src_adjust_spatial" = "True" ] && [ "$skipcal" = "0" ]; then + if [ ! -f $CALB_DB_KEYS_FILE ]; then + echo "ERROR! - the src_adjust_spatial parameter in the params_template.env (or equiv) is set to "True" (see parameter file), but the provided calibration database access keys file does not exist: $CALB_DB_KEYS_FILE" + exit 1 + else + source $CALB_DB_KEYS_FILE + + : ' + This makes the local variables from the calb_db_keys files + into global variables that can be used in other files, including python. + + Why not just leave the word export in front of each of the keys in the + calb_db_keys.env? Becuase that file is used against docker-compose + when we start up that part of the sytem and it does not like the word + export. + ' + + # Pick up the docker parent host machine name and override the one coming from the config file (aws only) + if [ "$isAWS" = "1" ]; then + CALIBRATION_DB_HOST=$(curl http://169.254.169.254/latest/meta-data/local-ipv4 -s) + fi + + export CALIBRATION_DB_HOST=$CALIBRATION_DB_HOST + export CALIBRATION_DB_NAME=$CALIBRATION_DB_NAME + export CALIBRATION_DB_USER_NAME=$CALIBRATION_DB_USER_NAME + export CALIBRATION_DB_PASS=$CALIBRATION_DB_PASS + export DEFAULT_FIM_PROJECTION_CRS=$DEFAULT_FIM_PROJECTION_CRS + + Tstart + echo "Populate PostgrSQL database with benchmark FIM extent points and HUC attributes (the calibration database)" + echo "Loading HUC Data" + echo + + ogr2ogr -overwrite -nln hucs -t_srs $DEFAULT_FIM_PROJECTION_CRS -f PostgreSQL PG:"host=$CALIBRATION_DB_HOST dbname=$CALIBRATION_DB_NAME user=$CALIBRATION_DB_USER_NAME password=$CALIBRATION_DB_PASS" $inputDataDir/wbd/WBD_National.gpkg WBDHU8 + + echo "Loading Point Data" + echo + ogr2ogr -overwrite -t_srs $DEFAULT_FIM_PROJECTION_CRS -f PostgreSQL PG:"host=$CALIBRATION_DB_HOST dbname=$CALIBRATION_DB_NAME user=$CALIBRATION_DB_USER_NAME password=$CALIBRATION_DB_PASS" $fim_obs_pnt_data usgs_nws_benchmark_points -nln points + + Tcount + fi +else + echo "Skipping Populate PostgrSQL database" +fi + +## RUN SYNTHETIC RATING CURVE CALIBRATION W/ USGS GAGE RATING CURVES ## +if [ "$src_adjust_usgs" = "True" ] && [ "$src_subdiv_toggle" = "True" ]; then + Tstart + echo + echo -e $startDiv"Performing SRC adjustments using USGS rating curve database" + # Run SRC Optimization routine using USGS rating curve data (WSE and flow @ NWM recur flow thresholds) + python3 $srcDir/src_adjust_usgs_rating.py -run_dir $outputRunDataDir -usgs_rc $inputDataDir/usgs_gages/usgs_rating_curves.csv -nwm_recur $nwm_recur_file -j $jobLimit + Tcount + date -u +fi + +## RUN SYNTHETIC RATING CURVE CALIBRATION W/ BENCHMARK POINT DATABASE (POSTGRESQL) ## +if [ "$src_adjust_spatial" = "True" ] && [ "$src_subdiv_toggle" = "True" ] && [ "$skipcal" = "0" ]; then + Tstart + echo + echo -e $startDiv"Performing SRC adjustments using benchmark point database" + python3 $srcDir/src_adjust_spatial_obs.py -fim_dir $outputRunDataDir -j $jobLimit + Tcount + date -u +fi + +echo +echo -e $startDiv"Combining crosswalk tables" +# aggregate outputs +Tstart +python3 /foss_fim/tools/combine_crosswalk_tables.py -d $outputRunDataDir -o $outputRunDataDir/crosswalk_table.csv +Tcount +date -u + +echo +echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echo "---- Start of fim_post_processing complete" +echo "---- Ended: `date -u`" +Calc_Duration $post_proc_start_time +echo diff --git a/fim_pre_processing.sh b/fim_pre_processing.sh new file mode 100755 index 000000000..b11b6084d --- /dev/null +++ b/fim_pre_processing.sh @@ -0,0 +1,247 @@ +#!/bin/bash -e +: +usage () +{ + echo + echo 'Produce FIM hydrofabric datasets for unit and branch scale.' + echo 'Usage : [REQ: -u -n ]' + echo ' [OPT: -h -c -o' + echo ' -ud ' + echo ' -bd ' + echo ' -zd ' + echo ' -jh ' + echo ' -jb ' + echo ' -skipcal ' + echo ' -isaws ]' + echo '' + echo 'REQUIRED:' + echo ' -u/--hucList : HUC8s to run or multiple passed in quotes (space delimited) file.' + echo ' A line delimited file, with a .lst extension, is also acceptable.' + echo ' HUCs must present in inputs directory.' + echo ' -n/--runName : A name to tag the output directories and log files.' + echo + echo 'OPTIONS:' + echo ' -h/--help : Help file' + echo ' -c/--config : Configuration file with bash environment variables to export' + echo ' Default (if arg not added) : /foss_fim/config/params_template.env' + echo ' -ud/--unitDenylist' + echo ' A file with a line delimited list of files in UNIT (HUC) directories to be removed' + echo ' upon completion (see config/deny_unit.lst for a starting point)' + echo ' Default (if arg not added) : /foss_fim/config/deny_unit.lst' + echo ' -- Note: if you want to keep all output files (aka.. no files removed),' + echo ' use the word NONE as this value for this parameter.' + echo ' -bd/--branchDenylist' + echo ' A file with a line delimited list of files in BRANCHES directories to be removed' + echo ' upon completion of branch processing.' + echo ' (see config/deny_branches.lst for a starting point)' + echo ' Default: /foss_fim/config/deny_branches.lst' + echo ' -- Note: if you want to keep all output files (aka.. no files removed),' + echo ' use the word NONE as this value for this parameter.' + echo ' -zd/--branchZeroDenylist' + echo ' A file with a line delimited list of files in BRANCH ZERO directories to' + echo ' be removed upon completion of branch zero processing.' + echo ' (see config/deny_branch_zero.lst for a starting point)' + echo ' Default: /foss_fim/config/deny_branch_zero.lst' + echo ' -- Note: if you want to keep all output files (aka.. no files removed),' + echo ' use the word NONE as this value for this parameter.' + echo ' -jh/--jobLimit : Max number of concurrent HUC jobs to run. Default 1 job at time.' + echo ' -jb/--jobBranchLimit : Max number of concurrent Branch jobs to run. Default 1 job at time.' + echo ' NOTE: Make sure that the multiplication of jh and jb subtract 2 (jh x jb -2) does not' + echo ' exceed the total number of cores available.' + echo ' -o : Overwrite outputs if already exist' + echo ' -skipcal : If this param is included, post gres calibration system will be skipped' + echo ' -isaws : If this param is included, the code will use AWS objects where possible' + echo ' : Note: This feature is not yet implemented' + echo + exit +} + +set -e + +while [ "$1" != "" ]; do +case $1 +in + -u|--hucList) + shift + hucList=$1 + ;; + -c|--configFile ) + shift + envFile=$1 + ;; + -n|--runName) + shift + runName=$1 + ;; + -jh|--jobHucLimit) + shift + jobHucLimit=$1 + ;; + -jb|--jobBranchLimit) + shift + jobBranchLimit=$1 + ;; + -h|--help) + shift + usage + ;; + -ud|--unitDenylist) + shift + deny_unit_list=$1 + ;; + -bd|--branchDenylist) + shift + deny_branches_list=$1 + ;; + -zd|--branchZeroDenylist) + shift + deny_branch_zero_list=$1 + ;; + -o) + overwrite=1 + ;; + -skipcal) + skipcal=1 + ;; + -isaws) + isAWS=1 + ;; + *) ;; + esac + shift +done + +# print usage if arguments empty +if [ "$hucList" = "" ] +then + echo "ERROR: Missing -u Huclist argument" + usage +fi +if [ "$runName" = "" ] +then + echo "ERROR: Missing -n run time name argument" + usage +fi + +# TODO +# update Dockerfile to add this as an env value, and delete line below +projectDir=/foss_fim + +# outputDataDir, srcDir and others come from the Dockerfile +outputRunDataDir=$outputDataDir/$runName + +# default values +if [ "$envFile" = "" ]; then envFile=/$projectDir/config/params_template.env; fi +if [ "$jobHucLimit" = "" ]; then jobHucLimit=1; fi +if [ "$jobBranchLimit" = "" ]; then jobBranchLimit=1; fi +if [ -z "$overwrite" ]; then overwrite=0; fi +if [ -z "$skipcal" ]; then skipcal=0; fi +if [ -z "$isAWS" ]; then isAWS=0; fi + +# validate and set defaults for the deny lists +if [ "$deny_unit_list" = "" ] +then + deny_unit_list=$projectDir/config/deny_unit.lst +elif [ "${deny_unit_list^^}" != "NONE" ] && [ ! -f "$deny_unit_list" ] +then + # NONE is not case sensitive + echo "Error: The -ud does not exist and is not the word NONE" + usage +fi + +# validate and set defaults for the deny lists +if [ "$deny_branches_list" = "" ] +then + deny_branches_list=$projectDir/config/deny_branches.lst +elif [ "${deny_branches_list^^}" != "NONE" ] && [ ! -f "$deny_branches_list" ] +then + # NONE is not case sensitive + echo "Error: The -bd does not exist and is not the word NONE" + usage +fi + +# We do a 1st cleanup of branch zero using branchZeroDenylist (which might be none). +# Later we do a 2nd cleanup of the branch zero that make the final output of branch zero +# to match what all other branch folders have for remaining files. But.. if we override +# branchZeroDenylist, we don't want it to be cleaned a second time. +has_deny_branch_zero_override=0 +if [ "$deny_branch_zero_list" = "" ] +then + deny_branch_zero_list=$projectDir/config/deny_branch_zero.lst +elif [ "${deny_branch_zero_list^^}" != "NONE" ] # NONE is not case sensitive +then + if [ ! -f "$deny_branch_zero_list" ] + then + echo "Error: The -zd does not exist and is not the word NONE" + usage + else + # only if the deny branch zero has been overwritten and file exists + has_deny_branch_zero_override=1 + fi +else + has_deny_branch_zero_override=1 # it is the value of NONE and is overridden +fi + +# Safety feature to avoid accidentaly overwrites +if [ -d $outputRunDataDir ] && [ $overwrite -eq 0 ]; then + echo + echo "ERROR: Output dir $outputRunDataDir exists. Use overwrite -o to run." + echo + usage +fi + +## SOURCE ENV FILE AND FUNCTIONS ## +source $srcDir/bash_functions.env + +# these export are for fim_pipeline only. +export runName=$runName +export jobHucLimit=$jobHucLimit + +num_hucs=$(python3 $srcDir/check_huc_inputs.py -u $hucList) +echo +echo "--- Number of HUCs to process is $num_hucs" + +# make dirs +if [ ! -d $outputRunDataDir ]; then + mkdir -p $outputRunDataDir +else + # remove these directories and files on a new or overwrite run + rm -rdf $outputRunDataDir/logs + rm -rdf $outputRunDataDir/branch_errors + rm -rdf $outputRunDataDir/unit_errors + rm -rdf $outputRunDataDir/eval + rm -f $outputRunDataDir/crosswalk_table.csv + rm -f $outputRunDataDir/fim_inputs* + rm -f $outputRunDataDir/*.env +fi + +#logFile=$outputRunDataDir/logs/unit/summary_unit.log +mkdir -p $outputRunDataDir/logs/unit +mkdir -p $outputRunDataDir/logs/branch +mkdir -p $outputRunDataDir/unit_errors +mkdir -p $outputRunDataDir/branch_errors + +# copy over config file and rename it (note.. yes, the envFile file can still be +# loaded from command line and have its own values, it simply gets renamed and saved) +cp $envFile $outputRunDataDir/params.env + +# create an new .env file on the fly that contains all runtime values +# that any unit can load it independently (in seperate AWS objects, AWS fargates) +# or via pipeline. There is likely a more elegent way to do this. + +args_file=$outputRunDataDir/runtime_args.env + +# the jobHucLimit is not from the args files, only jobBranchLimit +echo "export runName=$runName" >> $args_file +echo "export jobHucLimit=$jobHucLimit" >> $args_file +echo "export jobBranchLimit=$jobBranchLimit" >> $args_file +echo "export deny_unit_list=$deny_unit_list" >> $args_file +echo "export deny_branches_list=$deny_branches_list" >> $args_file +echo "export deny_branch_zero_list=$deny_branch_zero_list" >> $args_file +echo "export has_deny_branch_zero_override=$has_deny_branch_zero_override" >> $args_file +echo "export isAWS=$isAWS" >> $args_file +echo "export skipcal=$skipcal" >> $args_file + +echo "--- Pre-processing is complete" + +echo diff --git a/fim_process_unit_wb.sh b/fim_process_unit_wb.sh new file mode 100755 index 000000000..842835a73 --- /dev/null +++ b/fim_process_unit_wb.sh @@ -0,0 +1,129 @@ +#!/bin/bash -e + +# Why is this file here and it appears to be using duplicate export variables? +# For AWS, we need to make a direct call to this files with two params, hucNumber first, +# then the runName same as the -n flag in fim_pipeline and fim_pre_processing.sh + +# This file will also catch any and all errors from src/run_unit_wb.sh file, even script aborts from that file + +# You really can not call directly to src/run_unit_wb.sh as that file relys on export values +# from this file. +# run_unit_wb will futher process branches with its own iterator (parallelization). + +# Sample Usage: /foss_fim/fim_process_unit_wb.sh rob_test_wb_1 05030104 + +## START MESSAGE ## + +echo + +usage () +{ + echo + echo 'Produce FIM hydrofabric datasets for a single unit and branch scale.' + echo 'NOTE: fim_pre_processing must have been already run and this tool' + echo ' will not include post processing. Only single independent single' + echo ' huc and its branches.' + echo 'Usage : There are no arg keys (aka.. no dashes)' + echo ' you need the run name first, then the huc.' + echo ' Arguments:' + echo ' 1) run name' + echo ' 2) HUC number' + echo ' Example:' + echo ' /foss_fim/fim_process_unit_wb.sh rob_test_1 05030104' + echo + exit +} + +export runName=$1 +export hucNumber=$2 + +# print usage if arguments empty +if [ "$runName" = "" ] +then + echo "ERROR: Missing run time name argument (1st argument)" + usage +fi + +if [ "$hucNumber" = "" ] +then + echo "ERROR: Missing hucNumber argument (2nd argument)" + usage +fi + +re='^[0-9]+$' +if ! [[ $hucNumber =~ $re ]] ; then + echo "Error: hucNumber is not a number" >&2; exit 1 + usage +fi + +echo "==========================================================================" +echo "---- Start of huc processing for $hucNumber" + +# outputDataDir, srcDir and others come from the Dockerfile + +export outputRunDataDir=$outputDataDir/$runName +export outputHucDataDir=$outputRunDataDir/$hucNumber +export outputBranchDataDir=$outputHucDataDir/branches +export current_branch_id=0 + +## huc data +if [ -d "$outputHucDataDir" ]; then + rm -rf $outputHucDataDir +fi + +# make outputs directory +mkdir -p $outputHucDataDir +mkdir -p $outputBranchDataDir + +# Clean out previous unit logs and branch logs starting with this huc +rm -f $outputRunDataDir/logs/unit/"$hucNumber"_unit.log +rm -f $outputRunDataDir/logs/branch/"$hucNumber"_summary_branch.log +rm -f $outputRunDataDir/logs/branch/"$hucNumber"*.log +rm -f $outputRunDataDir/unit_errors/"$hucNumber"*.log +rm -f $outputRunDataDir/branch_errors/"$hucNumber"*.log +hucLogFileName=$outputRunDataDir/logs/unit/"$hucNumber"_unit.log + +# Process the actual huc +/usr/bin/time -v $srcDir/run_unit_wb.sh 2>&1 | tee $hucLogFileName + +#exit ${PIPESTATUS[0]} (and yes.. there can be more than one) +# and yes.. we can not use the $? as we are messing with exit codes +return_codes=( "${PIPESTATUS[@]}" ) + +#echo "huc return codes are:" +#echo $return_codes + +# we do this way instead of working directly with stderr and stdout +# as they were messing with output logs which we always want. +err_exists=0 +for code in "${return_codes[@]}" +do + # Make an extra copy of the unit log into a new folder. + + # Note: It was tricky to load in the fim_enum into bash, so we will just + # go with the code for now + if [ $code -eq 0 ]; then + echo + # do nothing + elif [ $code -eq 60 ]; then + echo + echo "***** Unit has no valid branches *****" + err_exists=1 + elif [ $code -eq 61 ]; then + echo + echo "***** Unit has no remaining valid flowlines *****" + err_exists=1 + else + echo + echo "***** An error has occured *****" + err_exists=1 + fi +done + +if [ "$err_exists" = "1" ]; then + # copy the error log over to the unit_errors folder to better isolate it + cp $hucLogFileName $outputRunDataDir/unit_errors +fi +echo "==========================================================================" +# we always return a success at this point (so we don't stop the loops / iterator) +exit 0 diff --git a/fim_run.sh b/fim_run.sh deleted file mode 100755 index 6d4d58a7a..000000000 --- a/fim_run.sh +++ /dev/null @@ -1,181 +0,0 @@ -#!/bin/bash -e -: -usage () -{ - echo 'Produce FIM datasets' - echo 'Usage : fim_run.sh [REQ: -u -c -n ] [OPT: -h -j ]' - echo '' - echo 'REQUIRED:' - echo ' -u/--hucList : HUC 4,6,or 8 to run or multiple passed in quotes. Line delimited file' - echo ' also accepted. HUCs must present in inputs directory.' - echo ' -e/--extent : full resolution or mainstem method; options are MS or FR' - echo ' -c/--config : configuration file with bash environment variables to export' - echo ' -n/--runName : a name to tag the output directories and log files as. could be a version tag.' - echo '' - echo 'OPTIONS:' - echo ' -h/--help : help file' - echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time. 1 outputs' - echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' - echo ' -o/--overwrite : overwrite outputs if already exist' - echo ' -p/--production : only save final inundation outputs' - echo ' -w/--whitelist : list of files to save in a production run in addition to final inundation outputs' - echo ' ex: file1.tif,file2.json,file3.csv' - echo ' -v/--viz : compute post-processing on outputs to be used in viz' - echo ' -m/--mem : enable memory profiling' - exit -} - -if [ "$#" -lt 7 ] -then - usage -fi - -while [ "$1" != "" ]; do -case $1 -in - -u|--hucList) - shift - hucList="$1" - ;; - -c|--configFile ) - shift - envFile=$1 - ;; - -e|--extent) - shift - extent=$1 - ;; - -n|--runName) - shift - runName=$1 - ;; - -j|--jobLimit) - shift - jobLimit=$1 - ;; - -h|--help) - shift - usage - ;; - -o|--overwrite) - overwrite=1 - ;; - -p|--production) - production=1 - ;; - -w|--whitelist) - shift - whitelist="$1" - ;; - -v|--viz) - viz=1 - ;; - -m|--mem) - mem=1 - ;; - *) ;; - esac - shift -done - -# print usage if arguments empty -if [ "$hucList" = "" ] -then - usage -fi -if [ "$extent" = "" ] -then - usage -fi -if [ "$envFile" = "" ] -then - usage -fi -if [ "$runName" = "" ] -then - usage -fi - -## SOURCE ENV FILE AND FUNCTIONS ## -source $envFile -source $srcDir/bash_functions.env - -# default values -if [ "$jobLimit" = "" ] ; then - jobLimit=$default_max_jobs -fi -if [ "$viz" = "" ] ; then - viz=0 -fi - -## Define Outputs Data Dir & Log File## -export outputRunDataDir=$outputDataDir/$runName -export extent=$extent -export production=$production -export whitelist=$whitelist -export viz=$viz -export mem=$mem -logFile=$outputRunDataDir/logs/summary.log - -## Define inputs -export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg -export input_nwm_lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg -export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg -export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg -export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj.gpkg -export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg -export input_GL_boundaries=$inputDataDir/landsea/gl_water_polygons.gpkg -## Input handling ## -$srcDir/check_huc_inputs.py -u "$hucList" - -## Make output and data directories ## -if [ -d "$outputRunDataDir" ] && [ "$overwrite" -eq 1 ]; then - rm -rf "$outputRunDataDir" -elif [ -d "$outputRunDataDir" ] && [ -z "$overwrite" ] ; then - echo "$runName data directories already exist. Use -o/--overwrite to continue" - exit 1 -fi -mkdir -p $outputRunDataDir/logs - -## RUN ## -if [ -f "$hucList" ]; then - if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh :::: $hucList - else - parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh :::: $hucList - fi -else - if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh ::: $hucList - else - parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh ::: $hucList - fi -fi - -# identify missing HUCs -# time python3 /foss_fim/tools/fim_completion_check.py -i $hucList -o $outputRunDataDir -if [ "$extent" = "MS" ] && [ "$bathy_src_toggle" = "True" ]; then - # Run BARC routine - echo -e $startDiv"Performing Bathy Adjusted Rating Curve routine"$stopDiv - time python3 /foss_fim/src/bathy_src_adjust_topwidth.py -fim_dir $outputRunDataDir -bfull_geom $bankfull_input_table -j $jobLimit -plots $src_plot_option -else - echo -e $startDiv"SKIPPING Bathy Adjusted Rating Curve routine"$stopDiv -fi - -echo -e $startDiv"Estimating bankfull stage in SRCs"$stopDiv -if [ "$src_bankfull_toggle" = "True" ]; then - # Run BARC routine - time python3 /foss_fim/src/identify_src_bankfull.py -fim_dir $outputRunDataDir -flows $bankfull_flows_file -j $jobLimit -plots $src_bankfull_plot_option -fi - -echo -e $startDiv"Applying variable roughness in SRCs"$stopDiv -if [ "$src_vrough_toggle" = "True" ]; then - # Run BARC routine - time python3 /foss_fim/src/vary_mannings_n_composite.py -fim_dir $outputRunDataDir -mann $vmann_input_file -bc $bankfull_attribute -suff $vrough_suffix -j $jobLimit -plots $src_vrough_plot_option -viz_clean $viz -fi - -echo "$viz" -if [[ "$viz" -eq 1 ]]; then - # aggregate outputs - time python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 6 -fi diff --git a/src/acquire_and_preprocess_inputs.py b/src/acquire_and_preprocess_inputs.py deleted file mode 100755 index 417cc4d93..000000000 --- a/src/acquire_and_preprocess_inputs.py +++ /dev/null @@ -1,384 +0,0 @@ -#!/usr/bin/env python3 - -import os -import argparse -import csv -import sys -sys.path.append('/foss_fim/src') -import shutil -from multiprocessing import Pool -import geopandas as gpd -from urllib.error import HTTPError -from tqdm import tqdm - -from utils.shared_variables import (NHD_URL_PARENT, - NHD_URL_PREFIX, - NHD_RASTER_URL_SUFFIX, - NHD_VECTOR_URL_SUFFIX, - NHD_VECTOR_EXTRACTION_PREFIX, - NHD_VECTOR_EXTRACTION_SUFFIX, - PREP_PROJECTION, - WBD_NATIONAL_URL, - FIM_ID - ) - -from utils.shared_functions import (pull_file, - run_system_command, - delete_file, - getDriver) - -NHDPLUS_VECTORS_DIRNAME = 'nhdplus_vectors' -NHDPLUS_RASTERS_DIRNAME = 'nhdplus_rasters' -NWM_HYDROFABRIC_DIRNAME = 'nwm_hydrofabric' -NWM_FILE_TO_SUBSET_WITH = 'nwm_flows.gpkg' - - -def subset_wbd_to_nwm_domain(wbd,nwm_file_to_use): - - intersecting_indices = [not (gpd.read_file(nwm_file_to_use,mask=b).empty) for b in wbd.geometry] - - return(wbd[intersecting_indices]) - - -def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_use,overwrite_wbd,num_workers): - """ - This helper function pulls and unzips Watershed Boundary Dataset (WBD) data. It uses the WBD URL defined by WBD_NATIONAL_URL. - This function also subsets the WBD layers (HU4, HU6, HU8) to CONUS and converts to geopkacage layers. - - Args: - path_to_saved_data_parent_dir (str): The system path to where the WBD will be downloaded, unzipped, and preprocessed. - - """ - - # Construct path to wbd_directory and create if not existent. - wbd_directory = os.path.join(path_to_saved_data_parent_dir, 'wbd') - if not os.path.exists(wbd_directory): - os.mkdir(wbd_directory) - - wbd_gdb_path = os.path.join(wbd_directory, 'WBD_National_GDB.gdb') - pulled_wbd_zipped_path = os.path.join(wbd_directory, 'WBD_National_GDB.zip') - - multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') - - nwm_huc_list_file_template = os.path.join(wbd_directory,'nwm_wbd{}.csv') - - nwm_file_to_use = os.path.join(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_use) - if not os.path.isfile(nwm_file_to_use): - raise IOError("NWM File to Subset Too Not Available: {}".format(nwm_file_to_use)) - - if not os.path.exists(multilayer_wbd_geopackage) or overwrite_wbd: - # Download WBD and unzip if it's not already done. - if not os.path.exists(wbd_gdb_path): - if not os.path.exists(pulled_wbd_zipped_path): - pull_file(WBD_NATIONAL_URL, pulled_wbd_zipped_path) - os.system("7za x {pulled_wbd_zipped_path} -o{wbd_directory}".format(pulled_wbd_zipped_path=pulled_wbd_zipped_path, wbd_directory=wbd_directory)) - - procs_list, wbd_gpkg_list = [], [] - multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') - # Add fimid to HU8, project, and convert to geopackage. - if os.path.isfile(multilayer_wbd_geopackage): - os.remove(multilayer_wbd_geopackage) - print("Making National WBD GPKG...") - print("\tWBDHU8") - wbd_hu8 = gpd.read_file(wbd_gdb_path, layer='WBDHU8') - wbd_hu8 = wbd_hu8.rename(columns={'huc8':'HUC8'}) # rename column to caps - wbd_hu8 = wbd_hu8.sort_values('HUC8') - fimids = [str(item).zfill(4) for item in list(range(1000, 1000 + len(wbd_hu8)))] - wbd_hu8[FIM_ID] = fimids - wbd_hu8 = wbd_hu8.to_crs(PREP_PROJECTION) # Project. - wbd_hu8 = subset_wbd_to_nwm_domain(wbd_hu8,nwm_file_to_use) - wbd_hu8.geometry = wbd_hu8.buffer(0) - wbd_hu8.to_file(multilayer_wbd_geopackage,layer='WBDHU8',driver=getDriver(multilayer_wbd_geopackage),index=False) # Save. - wbd_hu8.HUC8.to_csv(nwm_huc_list_file_template.format('8'),index=False,header=False) - #wbd_gpkg_list.append(os.path.join(wbd_directory, 'WBDHU8.gpkg')) # Append to wbd_gpkg_list for subsetting later. - del wbd_hu8 - - # Prepare procs_list for multiprocessed geopackaging. - for wbd_layer_num in ['4', '6']: - wbd_layer = 'WBDHU' + wbd_layer_num - print("\t{}".format(wbd_layer)) - wbd = gpd.read_file(wbd_gdb_path,layer=wbd_layer) - wbd = wbd.to_crs(PREP_PROJECTION) - wbd = wbd.rename(columns={'huc'+wbd_layer_num : 'HUC' + wbd_layer_num}) - wbd = subset_wbd_to_nwm_domain(wbd,nwm_file_to_use) - wbd.geometry = wbd.buffer(0) - wbd.to_file(multilayer_wbd_geopackage,layer=wbd_layer,driver=getDriver(multilayer_wbd_geopackage),index=False) - wbd['HUC{}'.format(wbd_layer_num)].to_csv(nwm_huc_list_file_template.format(wbd_layer_num),index=False,header=False) - #output_gpkg = os.path.join(wbd_directory, wbd_layer + '.gpkg') - #wbd_gpkg_list.append(output_gpkg) - #procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {wbd_gdb_path} {wbd_layer}'.format(output_gpkg=output_gpkg, wbd_gdb_path=wbd_gdb_path, wbd_layer=wbd_layer, projection=PREP_PROJECTION)]) - - # with Pool(processes=num_workers) as pool: - # pool.map(run_system_command, procs_list) - - # Subset WBD layers to CONUS and add to single geopackage. - #print("Subsetting WBD layers to CONUS...") - #multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') - #for gpkg in wbd_gpkg_list: - # subset_wbd_gpkg(gpkg, multilayer_wbd_geopackage) - - # Clean up temporary files. - #for temp_layer in ['WBDHU4', 'WBDHU6', 'WBDHU8']: - # delete_file(os.path.join(wbd_directory, temp_layer + '.gpkg')) - #pulled_wbd_zipped_path = os.path.join(wbd_directory, 'WBD_National_GDB.zip') - #delete_file(pulled_wbd_zipped_path) - #delete_file(os.path.join(wbd_directory, 'WBD_National_GDB.jpg')) - - return(wbd_directory) - - -def pull_and_prepare_nwm_hydrofabric(path_to_saved_data_parent_dir, path_to_preinputs_dir,num_workers): - """ - This helper function pulls and unzips NWM hydrofabric data. It uses the NWM hydrofabric URL defined by NWM_HYDROFABRIC_URL. - - Args: - path_to_saved_data_parent_dir (str): The system path to where a 'nwm' subdirectory will be created and where NWM hydrofabric - will be downloaded, unzipped, and preprocessed. - - """ - - # -- Acquire and preprocess NWM data -- # - nwm_hydrofabric_directory = os.path.join(path_to_saved_data_parent_dir, 'nwm_hydrofabric') - if not os.path.exists(nwm_hydrofabric_directory): - os.mkdir(nwm_hydrofabric_directory) - - nwm_hydrofabric_gdb = os.path.join(path_to_preinputs_dir, 'nwm_v21.gdb') - - # Project and convert to geopackage. - print("Projecting and converting NWM layers to geopackage...") - procs_list = [] - for nwm_layer in ['nwm_flows', 'nwm_lakes', 'nwm_catchments']: # I had to project the catchments and waterbodies because these 3 layers had varying CRSs. - print("Operating on " + nwm_layer) - output_gpkg = os.path.join(nwm_hydrofabric_directory, nwm_layer + '_proj.gpkg') - procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {nwm_hydrofabric_gdb} {nwm_layer}'.format(projection=PREP_PROJECTION, output_gpkg=output_gpkg, nwm_hydrofabric_gdb=nwm_hydrofabric_gdb, nwm_layer=nwm_layer)]) - - with Pool(processes=num_workers) as pool: - pool.map(run_system_command, procs_list) - - -def pull_and_prepare_nhd_data(args): - """ - This helper function is designed to be multiprocessed. It pulls and unzips NHD raster and vector data. - Args: - args (list): A list of arguments in this format: [nhd_raster_download_url, nhd_raster_extraction_path, nhd_vector_download_url, nhd_vector_extraction_path] - """ - # Parse urls and extraction paths from procs_list. - nhd_raster_download_url = args[0] - nhd_raster_extraction_path = args[1] - nhd_vector_download_url = args[2] - nhd_vector_extraction_path = args[3] - overwrite_nhd = args[4] - - nhd_gdb = nhd_vector_extraction_path.replace('.zip', '.gdb') # Update extraction path from .zip to .gdb. - - # Download raster and vector, if not already in user's directory (exist check performed by pull_file()). - nhd_raster_extraction_parent = os.path.dirname(nhd_raster_extraction_path) - huc = os.path.basename(nhd_raster_extraction_path).split('_')[2] - - nhd_raster_parent_dir = os.path.join(nhd_raster_extraction_parent, 'HRNHDPlusRasters' + huc) - - if not os.path.exists(nhd_raster_parent_dir): - os.mkdir(nhd_raster_parent_dir) - - elev_cm_tif = os.path.join(nhd_raster_parent_dir, 'elev_cm.tif') - if not os.path.exists(elev_cm_tif) or overwrite_nhd: - pull_file(nhd_raster_download_url, nhd_raster_extraction_path) - os.system("7za e {nhd_raster_extraction_path} -o{nhd_raster_parent_dir} elev_cm.tif -r ".format(nhd_raster_extraction_path=nhd_raster_extraction_path, nhd_raster_parent_dir=nhd_raster_parent_dir)) - - file_list = os.listdir(nhd_raster_parent_dir) - for f in file_list: - full_path = os.path.join(nhd_raster_parent_dir, f) - if 'elev_cm' not in f: - if os.path.isdir(full_path): - shutil.rmtree(full_path) - elif os.path.isfile(full_path): - os.remove(full_path) - os.remove(nhd_raster_extraction_path) - - nhd_vector_extraction_parent = os.path.dirname(nhd_vector_extraction_path) - - if not os.path.exists(nhd_vector_extraction_parent): - os.mkdir(nhd_vector_extraction_parent) - - if not os.path.exists(nhd_gdb) or overwrite_nhd: # Only pull if not already pulled and processed. - # Download and fully unzip downloaded GDB. - pull_file(nhd_vector_download_url, nhd_vector_extraction_path) - huc = os.path.split(nhd_vector_extraction_parent)[1] # Parse HUC. - os.system("7za x {nhd_vector_extraction_path} -o{nhd_vector_extraction_parent}".format(nhd_vector_extraction_path=nhd_vector_extraction_path, nhd_vector_extraction_parent=nhd_vector_extraction_parent)) - # extract input stream network - nhd = gpd.read_file(nhd_gdb,layer='NHDPlusBurnLineEvent') - nhd = nhd.to_crs(PREP_PROJECTION) - nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusBurnLineEvent' + huc + '.gpkg'),driver='GPKG') - # extract flowlines for FType attributes - nhd = gpd.read_file(nhd_gdb,layer='NHDFlowline') - nhd = nhd.to_crs(PREP_PROJECTION) - nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDFlowline' + huc + '.gpkg'),driver='GPKG') - # extract attributes - nhd = gpd.read_file(nhd_gdb,layer='NHDPlusFlowLineVAA') - nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusFlowLineVAA' + huc + '.gpkg'),driver='GPKG') - # -- Project and convert NHDPlusBurnLineEvent and NHDPlusFlowLineVAA vectors to geopackage -- # - #for nhd_layer in ['NHDPlusBurnLineEvent', 'NHDPlusFlowlineVAA']: - # run_system_command(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {nhd_gdb} {nhd_layer}'.format(projection=PREP_PROJECTION, output_gpkg=output_gpkg, nhd_gdb=nhd_gdb, nhd_layer=nhd_layer)]) # Use list because function is configured for multiprocessing. - # Delete unnecessary files. - delete_file(nhd_vector_extraction_path.replace('.zip', '.jpg')) - delete_file(nhd_vector_extraction_path) # Delete the zipped GDB. - - -def build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory): - """ - This function builds a list of available HUC4s, HUC6s, and HUC8s and saves the lists to .lst files. - - Args: - path_to_saved_data_parent_dir (str): The path to the parent directory where the .lst files will be saved. - wbd_directory (str): The path to the directory storing the WBD geopackages which are used to determine which HUCs are available for processing. - - """ - - print("Building included HUC lists...") - # Identify all saved NHDPlus Vectors. - nhd_plus_raster_dir = os.path.join(path_to_saved_data_parent_dir, NHDPLUS_RASTERS_DIRNAME) - nhd_plus_vector_dir = os.path.join(path_to_saved_data_parent_dir, NHDPLUS_VECTORS_DIRNAME) - - huc4_list = [i[-4:] for i in os.listdir(nhd_plus_raster_dir)] - huc6_list, huc8_list = [], [] - - # Read WBD into dataframe. - full_huc_gpkg = os.path.join(wbd_directory, 'WBD_National.gpkg') - huc_gpkg = 'WBDHU8' # The WBDHU4 are handled by the nhd_plus_raster_dir name. - - # Open geopackage. - wbd = gpd.read_file(full_huc_gpkg, layer=huc_gpkg) - - # Loop through entries and compare against the huc4_list to get available HUCs within the geopackage domain. - for index, row in tqdm(wbd.iterrows(),total=len(wbd)): - huc = row["HUC" + huc_gpkg[-1]] - huc_mask = wbd.loc[wbd[str("HUC" + huc_gpkg[-1])]==huc].geometry - burnline = os.path.join(nhd_plus_vector_dir, huc[0:4], 'NHDPlusBurnLineEvent' + huc[0:4] + '.gpkg') - if os.path.exists(burnline): - nhd_test = len(gpd.read_file(burnline, mask = huc_mask)) # this is slow, iterates through 2000+ HUC8s - # Append huc to huc8 list. - if (str(huc[:4]) in huc4_list) & (nhd_test>0): - huc8_list.append(huc) - - huc6_list = [w[:6] for w in huc8_list] - huc6_list = set(huc6_list) - - # Write huc lists to appropriate .lst files. - huc_lists_dir = os.path.join(path_to_saved_data_parent_dir, 'huc_lists') - if not os.path.exists(huc_lists_dir): - os.mkdir(huc_lists_dir) - included_huc4_file = os.path.join(huc_lists_dir, 'included_huc4.lst') - included_huc6_file = os.path.join(huc_lists_dir, 'included_huc6.lst') - included_huc8_file = os.path.join(huc_lists_dir, 'included_huc8.lst') - - # Overly verbose file writing loops. Doing this in a pinch. - with open(included_huc4_file, 'w') as f: - for item in huc4_list: - f.write("%s\n" % item) - - with open(included_huc6_file, 'w') as f: - for item in huc6_list: - f.write("%s\n" % item) - - with open(included_huc8_file, 'w') as f: - for item in huc8_list: - f.write("%s\n" % item) - - -def manage_preprocessing(hucs_of_interest, num_workers=1,overwrite_nhd=False, overwrite_wbd=False): - """ - This functions manages the downloading and preprocessing of gridded and vector data for FIM production. - - Args: - hucs_of_interest (str): Path to a user-supplied config file of hydrologic unit codes to be pulled and post-processed. - - """ - - #get input data dir - path_to_saved_data_parent_dir = os.environ['inputDataDir'] - - nhd_procs_list = [] # Initialize procs_list for multiprocessing. - - # Create the parent directory if nonexistent. - if not os.path.exists(path_to_saved_data_parent_dir): - os.mkdir(path_to_saved_data_parent_dir) - - # Create NHDPlus raster parent directory if nonexistent. - nhd_raster_dir = os.path.join(path_to_saved_data_parent_dir, NHDPLUS_RASTERS_DIRNAME) - if not os.path.exists(nhd_raster_dir): - os.mkdir(nhd_raster_dir) - - # Create the vector data parent directory if nonexistent. - vector_data_dir = os.path.join(path_to_saved_data_parent_dir, NHDPLUS_VECTORS_DIRNAME) - if not os.path.exists(vector_data_dir): - os.mkdir(vector_data_dir) - - # Parse HUCs from hucs_of_interest. - if isinstance(hucs_of_interest,list): - if len(hucs_of_interest) == 1: - try: - with open(hucs_of_interest[0]) as csv_file: # Does not have to be CSV format. - huc_list = [i[0] for i in csv.reader(csv_file)] - except FileNotFoundError: - huc_list = hucs_of_interest - else: - huc_list = hucs_of_interest - elif isinstance(hucs_of_interest,str): - try: - with open(hucs_of_interest) as csv_file: # Does not have to be CSV format. - huc_list = [i[0] for i in csv.reader(csv_file)] - except FileNotFoundError: - huc_list = list(hucs_of_interest) - - - # Construct paths to data to download and append to procs_list for multiprocessed pull, project, and converstion to geopackage. - for huc in huc_list: - huc = str(huc) # Ensure huc is string. - - # Construct URL and extraction path for NHDPlus raster. - nhd_raster_download_url = os.path.join(NHD_URL_PARENT, NHD_URL_PREFIX + huc + NHD_RASTER_URL_SUFFIX) - nhd_raster_extraction_path = os.path.join(nhd_raster_dir, NHD_URL_PREFIX + huc + NHD_RASTER_URL_SUFFIX) - - # Construct URL and extraction path for NHDPlus vector. Organize into huc-level subdirectories. - nhd_vector_download_url = os.path.join(NHD_URL_PARENT, NHD_URL_PREFIX + huc + NHD_VECTOR_URL_SUFFIX) - nhd_vector_download_parent = os.path.join(vector_data_dir, huc) - if not os.path.exists(nhd_vector_download_parent): - os.mkdir(nhd_vector_download_parent) - nhd_vector_extraction_path = os.path.join(nhd_vector_download_parent, NHD_VECTOR_EXTRACTION_PREFIX + huc + NHD_VECTOR_EXTRACTION_SUFFIX) - - # Append extraction instructions to nhd_procs_list. - nhd_procs_list.append([nhd_raster_download_url, nhd_raster_extraction_path, nhd_vector_download_url, nhd_vector_extraction_path, overwrite_nhd]) - - # Pull and prepare NHD data. - # with Pool(processes=num_workers) as pool: - # pool.map(pull_and_prepare_nhd_data, nhd_procs_list) - - for huc in nhd_procs_list: - try: - pull_and_prepare_nhd_data(huc) - except HTTPError: - print("404 error for HUC4 {}".format(huc)) - - # Pull and prepare NWM data. - #pull_and_prepare_nwm_hydrofabric(path_to_saved_data_parent_dir, path_to_preinputs_dir,num_workers) # Commented out for now. - - # Pull and prepare WBD data. - wbd_directory = pull_and_prepare_wbd(path_to_saved_data_parent_dir,NWM_HYDROFABRIC_DIRNAME,NWM_FILE_TO_SUBSET_WITH,overwrite_wbd,num_workers) - - # Create HUC list files. - build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory) - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Acquires and preprocesses WBD and NHD data for use in fim_run.sh.') - parser.add_argument('-u','--hucs-of-interest',help='HUC4, series of HUC4s, or path to a line-delimited file of HUC4s to acquire.',required=True,nargs='+') - #parser.add_argument('-j','--num-workers',help='Number of workers to process with',required=False,default=1,type=int) - parser.add_argument('-n', '--overwrite-nhd', help='Optional flag to overwrite NHDPlus Data',required=False,action='store_true') - parser.add_argument('-w', '--overwrite-wbd', help='Optional flag to overwrite WBD Data',required=False,action='store_true') - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - manage_preprocessing(**args) diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index 9b38418b2..15dfe2efc 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -8,14 +8,34 @@ import json import argparse import sys +from utils.shared_functions import getDriver # sys.path.append('/foss_fim/src') # sys.path.append('/foss_fim/config') from utils.shared_functions import getDriver, mem_profile from utils.shared_variables import FIM_ID +from memory_profiler import profile +# Feb 17, 2023 +# We want to explore using FR methodology as branch zero @mem_profile -def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode=False): +def add_crosswalk(input_catchments_fileName, + input_flows_fileName, + input_srcbase_fileName, + output_catchments_fileName, + output_flows_fileName, + output_src_fileName, + output_src_json_fileName, + output_crosswalk_fileName, + output_hydro_table_fileName, + input_huc_fileName, + input_nwmflows_fileName, + input_nwmcatras_fileName, + mannings_n, + input_nwmcat_fileName, + extent, + small_segments_filename, + calibration_mode=False): input_catchments = gpd.read_file(input_catchments_fileName) input_flows = gpd.read_file(input_flows_fileName) @@ -51,10 +71,14 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_flows = output_flows.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') - elif extent == 'MS': + elif (extent == 'MS') | (extent == 'GMS'): ## crosswalk using stream segment midpoint method input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) - input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem==1] + + # only reduce nwm catchments to mainstems if running mainstems + if extent == 'MS': + input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem==1] + input_nwmcat = input_nwmcat.rename(columns={'ID':'feature_id'}) if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) input_nwmcat=input_nwmcat.set_index('feature_id') @@ -103,19 +127,14 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) output_flows = input_flows.merge(crosswalk,on='HydroID') - output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') - # read in manning's n values - if calibration_mode == False: - with open(mannings_n, "r") as read_file: - mannings_dict = json.load(read_file) - else: - mannings_dict = {} - for cnt,value in enumerate(mannings_n.split(",")[2:]): - streamorder = cnt+1 - mannings_dict[str(streamorder)] = value + # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh + if 'areasqkm' not in output_catchments.columns: + output_catchments['areasqkm'] = output_catchments.geometry.area/(1000**2) - output_flows['ManningN'] = output_flows['order_'].astype(str).map(mannings_dict) + output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') + + output_flows['ManningN'] = mannings_n if output_flows.NextDownID.dtype != 'int': output_flows.NextDownID = output_flows.NextDownID.astype(int) @@ -213,18 +232,42 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f if extent == 'FR': output_src = output_src.merge(input_majorities[['HydroID','feature_id']],on='HydroID') - elif extent == 'MS': + elif (extent == 'MS') | (extent == 'GMS'): output_src = output_src.merge(crosswalk[['HydroID','feature_id']],on='HydroID') output_crosswalk = output_src[['HydroID','feature_id']] output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True) + ## bathy estimation integration in synthetic rating curve calculations + #if (bathy_src_calc == True and extent == 'MS'): + # output_src = bathy_rc_lookup(output_src,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName) + #else: + # print('Note: NOT using bathy estimation approach to modify the SRC...') + # make hydroTable output_hydro_table = output_src.loc[:,['HydroID','feature_id','NextDownID','order_','Number of Cells','SurfaceArea (m2)','BedArea (m2)','TopWidth (m)','LENGTHKM','AREASQKM','WettedPerimeter (m)','HydraulicRadius (m)','WetArea (m2)','Volume (m3)','SLOPE','ManningN','Stage','Discharge (m3s-1)']] output_hydro_table.rename(columns={'Stage' : 'stage','Discharge (m3s-1)':'discharge_cms'},inplace=True) - output_hydro_table['barc_on'] = False # set barc_on attribute to Fasle (default) --> will be overwritten if BARC module runs - output_hydro_table['vmann_on'] = False # set vmann_on attribute to Fasle (default) --> will be overwritten if variable roughness module runs - + ## Set placeholder variables to be replaced in post-processing (as needed). Create here to ensure consistent column vars + ## These variables represent the original unmodified values + output_hydro_table['default_discharge_cms'] = output_src['Discharge (m3s-1)'] + output_hydro_table['default_Volume (m3)'] = output_src['Volume (m3)'] + output_hydro_table['default_WetArea (m2)'] = output_src['WetArea (m2)'] + output_hydro_table['default_HydraulicRadius (m)'] = output_src['HydraulicRadius (m)'] + output_hydro_table['default_ManningN'] = output_src['ManningN'] + ## Placeholder vars for subdivision routine + output_hydro_table['subdiv_applied'] = False + output_hydro_table['overbank_n'] = pd.NA + output_hydro_table['channel_n'] = pd.NA + output_hydro_table['subdiv_discharge_cms'] = pd.NA + ## Placeholder vars for the calibration routine + output_hydro_table['calb_applied'] = pd.NA + output_hydro_table['last_updated'] = pd.NA + output_hydro_table['submitter'] = pd.NA + output_hydro_table['obs_source'] = pd.NA + output_hydro_table['precalb_discharge_cms'] = pd.NA + output_hydro_table['calb_coef_usgs'] = pd.NA + output_hydro_table['calb_coef_spatial'] = pd.NA + output_hydro_table['calb_coef_final'] = pd.NA if output_hydro_table.HydroID.dtype != 'str': output_hydro_table.HydroID = output_hydro_table.HydroID.astype(str) output_hydro_table[FIM_ID] = output_hydro_table.loc[:,'HydroID'].apply(lambda x : str(x)[0:4]) @@ -273,7 +316,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Crosswalk for MS/FR networks; calculate synthetic rating curves; update short rating curves') + parser = argparse.ArgumentParser(description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves') parser.add_argument('-d','--input-catchments-fileName', help='DEM derived catchments', required=True) parser.add_argument('-a','--input-flows-fileName', help='DEM derived streams', required=True) parser.add_argument('-s','--input-srcbase-fileName', help='Base synthetic rating curve table', required=True) @@ -288,28 +331,11 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f parser.add_argument('-y','--input-nwmcatras-fileName',help='NWM catchment raster',required=False) parser.add_argument('-m','--mannings-n',help='Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.',required=True) parser.add_argument('-z','--input-nwmcat-fileName',help='NWM catchment polygon',required=True) - parser.add_argument('-p','--extent',help='MS or FR extent',required=True) + parser.add_argument('-p','--extent',help='GMS only for now', default='GMS', required=False) parser.add_argument('-k','--small-segments-filename',help='output list of short segments',required=True) parser.add_argument('-c','--calibration-mode',help='Mannings calibration flag',required=False,action='store_true') args = vars(parser.parse_args()) - input_catchments_fileName = args['input_catchments_fileName'] - input_flows_fileName = args['input_flows_fileName'] - input_srcbase_fileName = args['input_srcbase_fileName'] - output_catchments_fileName = args['output_catchments_fileName'] - output_flows_fileName = args['output_flows_fileName'] - output_src_fileName = args['output_src_fileName'] - output_src_json_fileName = args['output_src_json_fileName'] - output_crosswalk_fileName = args['output_crosswalk_fileName'] - output_hydro_table_fileName = args['output_hydro_table_fileName'] - input_huc_fileName = args['input_huc_fileName'] - input_nwmflows_fileName = args['input_nwmflows_fileName'] - input_nwmcatras_fileName = args['input_nwmcatras_fileName'] - mannings_n = args['mannings_n'] - input_nwmcat_fileName = args['input_nwmcat_fileName'] - extent = args['extent'] - small_segments_filename = args['small_segments_filename'] - calibration_mode = args['calibration_mode'] - - add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode) + add_crosswalk(**args) + diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py index 8350311c5..300e542b0 100644 --- a/src/adjust_headwater_streams.py +++ b/src/adjust_headwater_streams.py @@ -13,6 +13,12 @@ import warnings warnings.simplefilter("ignore") +################################## +## +## Likely Deprecated: File not in use. Noticed Jan 16, 2023 +## Might want to be kept for possible re-use at a later time? +## +################################## def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): @@ -162,6 +168,14 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): if __name__ == '__main__': +################################## +## +## Likely Deprecated: File not in use. Noticed Jan 16, 2023 +## Might want to be kept for possible re-use at a later time? +## +################################## + + parser = argparse.ArgumentParser(description='adjust headwater stream geometery based on headwater start points') parser.add_argument('-f','--huc',help='huc number',required=True) parser.add_argument('-l','--nhd-streams',help='NHDPlus HR geodataframe',required=True) @@ -186,3 +200,4 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): if adj_headwater_points_fileName is not None: adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName'])) + diff --git a/src/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py index c9930f480..f838a299b 100755 --- a/src/adjust_thalweg_lateral.py +++ b/src/adjust_thalweg_lateral.py @@ -32,32 +32,6 @@ def make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window return(zone_min_dict) - # Open files. - elevation_raster_object = rasterio.open(elevation_raster) - allocation_zone_raster_object = rasterio.open(allocation_raster) - cost_distance_raster_object = rasterio.open(cost_distance_raster) - - meta = elevation_raster_object.meta.copy() - meta['tiled'], meta['compress'] = True, 'lzw' - - # -- Create zone_min_dict -- # - zone_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums - # Update catchment_min_dict with pixel sheds minimum. - for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using elevation_raster_object as template - elevation_window = elevation_raster_object.read(1,window=window).ravel() # Define elevation_window - zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define zone_window - cost_window = cost_distance_raster_object.read(1, window=window).ravel() # Define cost_window - - # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. - zone_min_dict = make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, int(cost_distance_tolerance), meta['nodata']) - - # ------------------------------------------------------------------------------------------------------------------------ # - - elevation_raster_object.close() - allocation_zone_raster_object.close() - cost_distance_raster_object.close() - - # ------------------------------------------- Assign zonal min to thalweg ------------------------------------------------ # @njit def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window): @@ -82,30 +56,46 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w return(dem_window_to_return) - # Specify raster object metadata. - elevation_raster_object = rasterio.open(elevation_raster) - allocation_zone_raster_object = rasterio.open(allocation_raster) - thalweg_object = rasterio.open(stream_raster) + # Open files. + with rasterio.open(elevation_raster) as elevation_raster_object, rasterio.open(allocation_raster) as allocation_zone_raster_object: + + with rasterio.open(cost_distance_raster) as cost_distance_raster_object: + + meta = elevation_raster_object.meta.copy() + meta['tiled'], meta['compress'] = True, 'lzw' + ndv = meta['nodata'] + + # -- Create zone_min_dict -- # + zone_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums + # Update catchment_min_dict with pixel sheds minimum. + for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using elevation_raster_object as template + elevation_window = elevation_raster_object.read(1,window=window).ravel() # Define elevation_window + zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define zone_window + cost_window = cost_distance_raster_object.read(1, window=window).ravel() # Define cost_window + + # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. + zone_min_dict = make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, int(cost_distance_tolerance), ndv) + + # ------------------------------------------------------------------------------------------------------------------------ # + - dem_lateral_thalweg_adj_object = rasterio.open(dem_lateral_thalweg_adj, 'w', **meta) + # Specify raster object metadata. + with rasterio.open(stream_raster) as thalweg_object, rasterio.open(dem_lateral_thalweg_adj, 'w', **meta) as dem_lateral_thalweg_adj_object: - for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template - dem_window = elevation_raster_object.read(1,window=window) # Define dem_window - window_shape = dem_window.shape - dem_window = dem_window.ravel() + for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template + dem_window = elevation_raster_object.read(1,window=window) # Define dem_window + window_shape = dem_window.shape + dem_window = dem_window.ravel() - zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define catchments_window - thalweg_window = thalweg_object.read(1,window=window).ravel() # Define thalweg_window + zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define catchments_window + thalweg_window = thalweg_object.read(1,window=window).ravel() # Define thalweg_window - # Call numba-optimized function to reassign thalweg cell values to catchment minimum value. - minimized_dem_window = minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window) - minimized_dem_window = minimized_dem_window.reshape(window_shape).astype(np.float32) + # Call numba-optimized function to reassign thalweg cell values to catchment minimum value. + minimized_dem_window = minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window) + minimized_dem_window = minimized_dem_window.reshape(window_shape).astype(np.float32) - dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1) + dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1) - elevation_raster_object.close() - allocation_zone_raster_object.close() - cost_distance_raster_object.close() if __name__ == '__main__': diff --git a/src/aggregate_branch_lists.py b/src/aggregate_branch_lists.py new file mode 100755 index 000000000..9cff6bb13 --- /dev/null +++ b/src/aggregate_branch_lists.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd +import sys + +from os.path import join +from glob import glob + +def aggregate_branch_lists(output_dir, file_name, output_file_name): + + file_names = glob(join(output_dir, '*', file_name)) + + if (len(file_names) == 0): + print ("Error: No Branches available to aggregate. Program terminated.", flush=True) + sys.exit(1) + + df_combined = pd.concat([pd.read_csv(f, header=None, dtype ='str') for f in file_names ], ignore_index=True) + + df_combined.to_csv(output_file_name, index=False, header=False) + +if __name__ == '__main__': + + # This tool takes in a single file name and searchs all directories + # recusively for the same file name and merges them. + + parser = argparse.ArgumentParser(description='Aggregate') + parser.add_argument('-d','--output_dir', help='output run data directory', required=True) + parser.add_argument('-f','--file_name', help='file name to match', required=True) + parser.add_argument('-o','--output_file_name', help='output file name', required=True) + + args = vars(parser.parse_args()) + + aggregate_branch_lists(**args) diff --git a/src/aggregate_fim_outputs.sh b/src/aggregate_fim_outputs.sh deleted file mode 100755 index 7e632d586..000000000 --- a/src/aggregate_fim_outputs.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -e - -#outputRunDataDir=$1 - -echo "Aggregating FIM required outputs" - -# make aggregate fim outputs dir -outputRunDataDir=$1 -fimAggregateOutputsDir=$outputRunDataDir/aggregate_fim_outputs -mkdir $fimAggregateOutputsDir - -# cd to make vrt paths relative -cd $fimAggregateOutputsDir - -# build rem vrt -gdalbuildvrt -q rem.vrt ../*/rem_zeroed_masked.tif - -# build catchments vrt -gdalbuildvrt -q catchments.vrt ../*/gw_catchments_reaches_filtered_addedAttributes.tif - -# aggregate hydro-table -i=0 #inialize counter variable -for f in $(find $outputRunDataDir -type f -name hydroTable.csv); do - if [ "$i" -gt 0 ];then # aggregate remaining files without header - tail -n+2 $f >> $fimAggregateOutputsDir/hydroTable.csv - else # copy first file over with header - cat $f > $fimAggregateOutputsDir/hydroTable.csv - fi - ((i=i+1)) #counter variable -done - -# cd back -cd $OLDPWD diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index aa47342b8..32b25739e 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -14,18 +14,7 @@ from shapely.wkb import dumps, loads import pygeos -nhdplus_vectors_dir = os.environ.get('nhdplus_vectors_dir') -wbd_filename = os.environ.get('wbd_filename') -nwm_streams_orig_filename = os.environ.get('nwm_streams_orig_filename') -nwm_streams_all_filename = os.environ.get('nwm_streams_all_filename') -nwm_headwaters_filename = os.environ.get('nwm_headwaters_filename') -nwm_catchments_orig_filename = os.environ.get('nwm_catchments_orig_filename') -nwm_catchments_all_filename = os.environ.get('nwm_catchments_all_filename') -ahps_filename = os.environ.get('ahps_filename') -nwm_huc4_intersections_filename = os.environ.get('nwm_huc4_intersections_filename') -nhd_huc8_intersections_filename = os.environ.get('nhd_huc8_intersections_filename') -agg_nhd_headwaters_adj_fileName = os.environ['agg_nhd_headwaters_adj_fileName'] -agg_nhd_streams_adj_fileName = os.environ['agg_nhd_streams_adj_fileName'] + def identify_nwm_ms_streams(nwm_streams_filename,ahps_filename,nwm_streams_all_filename): @@ -314,37 +303,6 @@ def subset_stream_networks(args, huc): print(f"finished stream subset for HUC {huc}",flush=True) -def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list): - - for huc in huc_list: - - # aggregated final filenames - nhd_agg_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') - nhd_agg_adj_headwaters_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') - - if os.path.isfile(nhd_agg_adj_huc_subset): - adj_nhd_streams_all = gpd.read_file(nhd_agg_adj_huc_subset) - - # Write out FR adjusted - if os.path.isfile(agg_nhd_streams_adj_fileName): - adj_nhd_streams_all.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False, mode='a') - else: - adj_nhd_streams_all.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False) - - del adj_nhd_streams_all - - if os.path.isfile(nhd_agg_adj_headwaters_subset): - adj_nhd_headwater_points_all = gpd.read_file(nhd_agg_adj_headwaters_subset) - - # Write out FR adjusted - if os.path.isfile(agg_nhd_headwaters_adj_fileName): - adj_nhd_headwater_points_all.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False, mode='a') - else: - adj_nhd_headwater_points_all.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False) - - del adj_nhd_headwater_points_all - - def clean_up_intermediate_files(nhdplus_vectors_dir): for huc in os.listdir(nhdplus_vectors_dir): @@ -393,9 +351,5 @@ def clean_up_intermediate_files(nhdplus_vectors_dir): del wbd4,wbd8 - # Aggregate subset nhd networks for entire nwm domain - print ('Aggregating subset NHD networks for entire NWM domain') - aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,missing_subsets) - # Remove intermediate files # clean_up_intermediate_files(nhdplus_vectors_dir) diff --git a/src/agreedem.py b/src/agreedem.py index 7d15b7f80..88e293fcd 100755 --- a/src/agreedem.py +++ b/src/agreedem.py @@ -3,14 +3,14 @@ import numpy as np import os import argparse -from r_grow_distance import r_grow_distance +import whitebox from utils.shared_functions import mem_profile @mem_profile -def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buffer_dist, smooth_drop, sharp_drop, delete_intermediate_data): +def agreedem(rivers_raster, dem, output_raster, workspace, buffer_dist, smooth_drop, sharp_drop, delete_intermediate_data): ''' - Produces a hydroconditioned raster using the AGREE DEM methodology as described by Ferdi Hellweger (https://www.caee.utexas.edu/prof/maidment/gishydro/ferdi/research/agree/agree.html). The GRASS gis tool r.grow.distance is used to calculate intermediate allocation and proximity rasters. + Produces a hydroconditioned raster using the AGREE DEM methodology as described by Ferdi Hellweger (https://www.caee.utexas.edu/prof/maidment/gishydro/ferdi/research/agree/agree.html). Whiteboxtools is used to calculate intermediate allocation and proximity rasters. Parameters ---------- @@ -22,8 +22,6 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff Path to output raster. For example, dem_burned.tif workspace : STR Path to workspace to save all intermediate files. - grass_workspace : STR - Path to the temporary workspace for grass inputs. This temporary workspace is deleted once grass datasets are produced and exported to tif files. buffer_dist : FLOAT AGREE stream buffer distance (in meters) on either side of stream. smooth_drop : FLOAT @@ -38,6 +36,10 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff None. ''' + # Set wbt envs + wbt = whitebox.WhiteboxTools() + wbt.set_verbose_mode(False) + #------------------------------------------------------------------ # 1. From Hellweger documentation: Compute the vector grid # (vectgrid). The cells in the vector grid corresponding to the @@ -45,182 +47,192 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff # data. # Import dem layer and river layer and get dem profile. - elev = rasterio.open(dem) - dem_profile = elev.profile - - rivers = rasterio.open(rivers_raster) - - # Define smogrid profile and output file - smo_profile = dem_profile.copy() - smo_profile.update(nodata = 0) - smo_profile.update(dtype = 'float32') - smo_output = os.path.join(workspace, 'agree_smogrid.tif') - - # Windowed reading/calculating/writing - with rasterio.Env(): - with rasterio.open(smo_output, 'w', **smo_profile) as raster: - for ji, window in elev.block_windows(1): - # read elevation data and mask information - elev_data_window = elev.read(1, window = window) - elev_mask_window = elev.read_masks(1, window = window).astype('bool') - # Import boolean river raster and apply same NODATA mask as dem - # layer. In case rivers extend beyond valid data regions of DEM. - river_raw_data_window = rivers.read(1, window = window) - river_data_window = np.where(elev_mask_window == True, river_raw_data_window, 0) - - #--------------------------------------------------------------- - # 2. From Hellweger documentation: Compute the smooth drop/raise - # grid (smogrid). The cells in the smooth drop/raise grid - # corresponding to the vector lines have an elevation equal to that - # of the original DEM (oelevgrid) plus a certain distance - # (smoothdist). All other cells have no data. - - # Assign smooth distance and calculate the smogrid. - smooth_dist = -1 * smooth_drop # in meters. - smogrid_window = river_data_window*(elev_data_window + smooth_dist) - - # Write out raster - raster.write(smogrid_window.astype('float32'), indexes = 1, window = window) - - elev.close() - rivers.close() - raster.close() - #------------------------------------------------------------------ - # 3. From Hellweger documentation: Compute the vector distance grids - # (vectdist and vectallo). The cells in the vector distance grid - # (vectdist) store the distance to the closest vector cell. The - # cells in vector allocation grid (vectallo) store the elevation of - # the closest vector cell. - - # Compute allocation and proximity grid using GRASS gis - # r.grow.distance tool. Output distance grid in meters. Set datatype - # for output allocation and proximity grids to float32. - vectdist_grid, vectallo_grid = r_grow_distance(smo_output, grass_workspace, 'Float32', 'Float32') - - #------------------------------------------------------------------ - # 4. From Hellweger documentation: Compute the buffer grid - # (bufgrid2). The cells in the buffer grid outside the buffer - # distance (buffer) store the original elevation. The cells in the - # buffer grid inside the buffer distance have no data. - - # Open distance, allocation, elevation grids. - vectdist = rasterio.open(vectdist_grid) - vectallo = rasterio.open(vectallo_grid) - elev = rasterio.open(dem) - - # Define bufgrid profile and output file. - buf_output = os.path.join(workspace, 'agree_bufgrid.tif') - buf_profile = dem_profile.copy() - buf_profile.update(dtype = 'float32') - - # Windowed reading/calculating/writing - with rasterio.Env(): - with rasterio.open(buf_output, 'w', **buf_profile) as raster: - for ji, window in elev.block_windows(1): - # read distance, allocation, and elevation datasets - vectdist_data_window = vectdist.read(1, window = window) - vectallo_data_window = vectallo.read(1, window = window) - elev_data_window = elev.read(1, window = window) - - # Define buffer distance and calculate adjustment to compute the - # bufgrid. - # half_res adjustment equal to half distance of one cell - half_res = elev.res[0]/2 - final_buffer = buffer_dist - half_res # assume all units in meters. - - # Calculate bufgrid. Assign NODATA to areas where vectdist_data <= - # buffered value. - bufgrid_window = np.where(vectdist_data_window > final_buffer, elev_data_window, dem_profile['nodata']) - - # Write out raster. - raster.write(bufgrid_window.astype('float32'), indexes = 1, window = window) - - vectdist.close() - vectallo.close() - elev.close() - #------------------------------------------------------------------ - # 5. From Hellweger documentation: Compute the buffer distance grids - # (bufdist and bufallo). The cells in the buffer distance grid - # (bufdist) store the distance to the closest valued buffer grid - # cell (bufgrid2). The cells in buffer allocation grid (bufallo) - # store the elevation of the closest valued buffer cell. - - # Compute allocation and proximity grid using GRASS gis - # r.grow.distance. Output distance grid in meters. Set datatype for - # output allocation and proximity grids to float32. - bufdist_grid, bufallo_grid = r_grow_distance(buf_output, grass_workspace, 'Float32', 'Float32') - - # Open distance, allocation, elevation grids. - bufdist = rasterio.open(bufdist_grid) - bufallo = rasterio.open(bufallo_grid) - vectdist = rasterio.open(vectdist_grid) - vectallo = rasterio.open(vectallo_grid) - rivers = rasterio.open(rivers_raster) - elev = rasterio.open(dem) - - # Define profile output file. - agree_output = output_raster - agree_profile = dem_profile.copy() - agree_profile.update(dtype = 'float32') - - # Windowed reading/calculating/writing - with rasterio.Env(): - with rasterio.open(agree_output, 'w', **agree_profile) as raster: - for ji, window in elev.block_windows(1): - # Read elevation data and mask, distance and allocation grids, and river data. - elev_data_window = elev.read(1, window = window) - elev_mask_window = elev.read_masks(1, window = window).astype('bool') - bufdist_data_window = bufdist.read(1, window = window) - bufallo_data_window = bufallo.read(1, window = window) - vectdist_data_window = vectdist.read(1, window = window) - vectallo_data_window = vectallo.read(1, window = window) - river_raw_data_window = rivers.read(1, window = window) - - - river_data_window = np.where(elev_mask_window == True, river_raw_data_window, -20.0) - #------------------------------------------------------------------ - # 6. From Hellweger documentation: Compute the smooth modified - # elevation grid (smoelev). The cells in the smooth modified - # elevation grid store the results of the smooth surface - # reconditioning process. Note that for cells outside the buffer the - # equation below assigns the original elevation. - - # Calculate smoelev. - smoelev_window = vectallo_data_window + ((bufallo_data_window - vectallo_data_window)/(bufdist_data_window + vectdist_data_window)) * vectdist_data_window - - #------------------------------------------------------------------ - # 7. From Hellweger documentation: Compute the sharp drop/raise grid - # (shagrid). The cells in the sharp drop/raise grid corresponding to - # the vector lines have an elevation equal to that of the smooth - # modified elevation grid (smoelev) plus a certain distance - # (sharpdist). All other cells have no data. - - # Define sharp drop distance and calculate the sharp drop grid where - # only river cells are dropped by the sharp_dist amount. - sharp_dist = -1 * sharp_drop # in meters. - shagrid_window = (smoelev_window + sharp_dist) * river_data_window - - #------------------------------------------------------------------ - # 8. From Hellweger documentation: Compute the modified elevation - # grid (elevgrid). The cells in the modified elevation grid store - # the results of the surface reconditioning process. Note that for - # cells outside the buffer the the equation below assigns the - # original elevation. - - # Merge sharp drop grid with smoelev grid. Then apply the same - # NODATA mask as original elevation grid. - elevgrid_window = np.where(river_data_window == 0, smoelev_window, shagrid_window) - agree_dem_window = np.where(elev_mask_window == True, elevgrid_window, dem_profile['nodata']) - - # Write out to raster - raster.write(agree_dem_window.astype('float32'), indexes = 1, window = window) - - bufdist.close() - bufallo.close() - vectdist.close() - vectallo.close() - rivers.close() - elev.close() + with rasterio.open(dem) as elev, rasterio.open(rivers_raster) as rivers: + dem_profile = elev.profile + + # Define smogrid profile and output file + smo_profile = dem_profile.copy() + smo_profile.update(nodata = 0) + smo_profile.update(dtype = 'float32') + smo_output = os.path.join(workspace, 'agree_smogrid.tif') + vectdist_grid = os.path.join(workspace,'agree_smogrid_dist.tif') + vectallo_grid = os.path.join(workspace,'agree_smogrid_allo.tif') + + # Windowed reading/calculating/writing + with rasterio.Env(): + with rasterio.open(smo_output, 'w', **smo_profile) as raster: + for ji, window in elev.block_windows(1): + # read elevation data and mask information + elev_data_window = elev.read(1, window = window) + elev_mask_window = elev.read_masks(1, window = window).astype('bool') + # Import boolean river raster and apply same NODATA mask as dem + # layer. In case rivers extend beyond valid data regions of DEM. + river_raw_data_window = rivers.read(1, window = window) + river_data_window = np.where(elev_mask_window == True, river_raw_data_window, 0) + + #--------------------------------------------------------------- + # 2. From Hellweger documentation: Compute the smooth drop/raise + # grid (smogrid). The cells in the smooth drop/raise grid + # corresponding to the vector lines have an elevation equal to that + # of the original DEM (oelevgrid) plus a certain distance + # (smoothdist). All other cells have no data. + + # Assign smooth distance and calculate the smogrid. + smooth_dist = -1 * smooth_drop # in meters. + smogrid_window = river_data_window*(elev_data_window + smooth_dist) + + # Write out raster + raster.write(smogrid_window.astype('float32'), indexes = 1, window = window) + + #------------------------------------------------------------------ + # 3. From Hellweger documentation: Compute the vector distance grids + # (vectdist and vectallo). The cells in the vector distance grid + # (vectdist) store the distance to the closest vector cell. The + # cells in vector allocation grid (vectallo) store the elevation of + # the closest vector cell. + + # Compute allocation and proximity grid using WhiteboxTools + smo_output_zerod = os.path.join(workspace, 'agree_smogrid_zerod.tif') + wbt.euclidean_distance(rivers_raster,vectdist_grid) + wbt.convert_nodata_to_zero(smo_output,smo_output_zerod) + wbt.euclidean_allocation(smo_output_zerod,vectallo_grid) + + #------------------------------------------------------------------ + # 4. From Hellweger documentation: Compute the buffer grid + # (bufgrid2). The cells in the buffer grid outside the buffer + # distance (buffer) store the original elevation. The cells in the + # buffer grid inside the buffer distance have no data. + + # Open distance, allocation, elevation grids. + with rasterio.open(vectdist_grid) as vectdist: + + # Define bufgrid profile and output file. + buf_output = os.path.join(workspace, 'agree_bufgrid.tif') + bufdist_grid = os.path.join(workspace,'agree_bufgrid_dist.tif') + bufallo_grid = os.path.join(workspace,'agree_bufgrid_allo.tif') + buf_profile = dem_profile.copy() + buf_profile.update(dtype = 'float32') + + # Windowed reading/calculating/writing + with rasterio.Env(): + with rasterio.open(buf_output, 'w', **buf_profile) as raster: + for ji, window in elev.block_windows(1): + # read distance, allocation, and elevation datasets + vectdist_data_window = vectdist.read(1, window = window) + elev_data_window = elev.read(1, window = window) + + # Define buffer distance and calculate adjustment to compute the + # bufgrid. + # half_res adjustment equal to half distance of one cell + half_res = elev.res[0]/2 + final_buffer = buffer_dist - half_res # assume all units in meters. + + # Calculate bufgrid. Assign NODATA to areas where vectdist_data <= + # buffered value. + bufgrid_window = np.where(vectdist_data_window > final_buffer, elev_data_window, dem_profile['nodata']) + + # Write out raster. + raster.write(bufgrid_window.astype('float32'), indexes = 1, window = window) + + + #------------------------------------------------------------------ + # 5. From Hellweger documentation: Compute the buffer distance grids + # (bufdist and bufallo). The cells in the buffer distance grid + # (bufdist) store the distance to the closest valued buffer grid + # cell (bufgrid2). The cells in buffer allocation grid (bufallo) + # store the elevation of the closest valued buffer cell. + + # # Transform the buffer grid (bufgrid2) to binary raster + bin_buf_output = os.path.join(workspace, 'agree_binary_bufgrid.tif') + with rasterio.open(buf_output) as agree_bufgrid: + agree_bufgrid_profile = agree_bufgrid.profile + bin_buf_output_profile = agree_bufgrid_profile.copy() + bin_buf_output_profile.update(dtype = 'float32') + + with rasterio.Env(): + with rasterio.open(bin_buf_output, 'w', **bin_buf_output_profile) as raster: + for ji, window in agree_bufgrid.block_windows(1): + # read distance, allocation, and elevation datasets + agree_bufgrid_data_window = agree_bufgrid.read(1, window = window) + + # Calculate bufgrid. Assign NODATA to areas where vectdist_data <= + agree_bufgrid_data_window = np.where(agree_bufgrid_data_window>-10000, 1, 0) + + # Write out raster. + raster.write(agree_bufgrid_data_window.astype('float32'), indexes = 1, window = window) + + # Compute allocation and proximity grid using WhiteboxTools + buf_output_zerod = os.path.join(workspace, 'agree_bufgrid_zerod.tif') + wbt.euclidean_distance(bin_buf_output,bufdist_grid) + wbt.convert_nodata_to_zero(buf_output,buf_output_zerod) + wbt.euclidean_allocation(buf_output_zerod,bufallo_grid) + + # Open distance, allocation, elevation grids. + with rasterio.open(bufdist_grid) as bufdist, rasterio.open(bufallo_grid) as bufallo, rasterio.open(vectallo_grid) as vectallo: + + # Define profile output file. + agree_output = output_raster + agree_profile = dem_profile.copy() + agree_profile.update(dtype = 'float32') + + # Windowed reading/calculating/writing + with rasterio.Env(): + with rasterio.open(agree_output, 'w', **agree_profile) as raster: + for ji, window in elev.block_windows(1): + # Read elevation data and mask, distance and allocation grids, and river data. + elev_data_window = elev.read(1, window = window) + elev_mask_window = elev.read_masks(1, window = window).astype('bool') + bufdist_data_window = bufdist.read(1, window = window) + bufallo_data_window = bufallo.read(1, window = window) + vectdist_data_window = vectdist.read(1, window = window) + vectallo_data_window = vectallo.read(1, window = window) + river_raw_data_window = rivers.read(1, window = window) + + bufallo_data_window = np.where(bufallo_data_window == -32768., elev_data_window, bufallo_data_window) + + vectallo_data_window = np.where(vectallo_data_window == -32768., elev_data_window-10, vectallo_data_window) + + river_raw_data_window = river_raw_data_window.astype(np.float32) + + river_data_window = np.where(elev_mask_window == True, river_raw_data_window, -20.0) + #------------------------------------------------------------------ + # 6. From Hellweger documentation: Compute the smooth modified + # elevation grid (smoelev). The cells in the smooth modified + # elevation grid store the results of the smooth surface + # reconditioning process. Note that for cells outside the buffer the + # equation below assigns the original elevation. + + # Calculate smoelev. + smoelev_window = vectallo_data_window + ((bufallo_data_window - vectallo_data_window)/(bufdist_data_window + vectdist_data_window)) * vectdist_data_window + + #------------------------------------------------------------------ + # 7. From Hellweger documentation: Compute the sharp drop/raise grid + # (shagrid). The cells in the sharp drop/raise grid corresponding to + # the vector lines have an elevation equal to that of the smooth + # modified elevation grid (smoelev) plus a certain distance + # (sharpdist). All other cells have no data. + + # Define sharp drop distance and calculate the sharp drop grid where + # only river cells are dropped by the sharp_dist amount. + sharp_dist = -1 * sharp_drop # in meters. + shagrid_window = (smoelev_window + sharp_dist) * river_data_window + + #------------------------------------------------------------------ + # 8. From Hellweger documentation: Compute the modified elevation + # grid (elevgrid). The cells in the modified elevation grid store + # the results of the surface reconditioning process. Note that for + # cells outside the buffer the the equation below assigns the + # original elevation. + + # Merge sharp drop grid with smoelev grid. Then apply the same + # NODATA mask as original elevation grid. + elevgrid_window = np.where(river_data_window == 0, smoelev_window, shagrid_window) + agree_dem_window = np.where(elev_mask_window == True, elevgrid_window, dem_profile['nodata']) + + # Write out to raster + raster.write(agree_dem_window.astype('float32'), indexes = 1, window = window) + + # If the '-t' flag is called, intermediate data is removed. if delete_intermediate_data: os.remove(smo_output) @@ -229,6 +241,9 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff os.remove(vectallo_grid) os.remove(bufdist_grid) os.remove(bufallo_grid) + os.remove(bin_buf_output) + os.remove(buf_output_zerod) + os.remove(smo_output_zerod) if __name__ == '__main__': @@ -238,7 +253,6 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff parser.add_argument('-r', '--rivers', help = 'flows grid boolean layer', required = True) parser.add_argument('-d', '--dem_m', help = 'DEM raster in meters', required = True) parser.add_argument('-w', '--workspace', help = 'Workspace', required = True) - parser.add_argument('-g', '--grass_workspace', help = 'Temporary GRASS workspace', required = True) parser.add_argument('-o', '--output', help = 'Path to output raster', required = True) parser.add_argument('-b', '--buffer', help = 'Buffer distance (m) on either side of channel', required = True) parser.add_argument('-sm', '--smooth', help = 'Smooth drop (m)', required = True) @@ -252,7 +266,6 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff rivers_raster = args['rivers'] dem = args['dem_m'] workspace = args['workspace'] - grass_workspace = args['grass_workspace'] output_raster = args['output'] buffer_dist = float(args['buffer']) smooth_drop = float(args['smooth']) @@ -260,4 +273,4 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff delete_intermediate_data = args['del'] #Run agreedem - agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buffer_dist, smooth_drop, sharp_drop, delete_intermediate_data) + agreedem(rivers_raster, dem, output_raster, workspace, buffer_dist, smooth_drop, sharp_drop, delete_intermediate_data) diff --git a/src/associate_levelpaths_with_levees.py b/src/associate_levelpaths_with_levees.py new file mode 100644 index 000000000..0dbf7327c --- /dev/null +++ b/src/associate_levelpaths_with_levees.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 + +import os +import argparse +import numpy as np +import pandas as pd +import geopandas as gpd + +def associate_levelpaths_with_levees(levees_filename:str, levee_id_attribute:str, leveed_areas_filename:str, levelpaths_filename:str, branch_id_attribute:str, levee_buffer:float, out_filename:str): + """ + Finds the level path associated with each levee. Ignores level paths that cross a levee exactly once. + + Parameters + ---------- + levees_filename: str + Path to levees file. + levee_id_attribute: str + Name of levee ID attribute. + leveed_areas_filename: str + Path to levee-protected areas file. + levelpaths_filename: str + Path to level paths file. + branch_id_attribute: str + Name of branch ID attribute. + levee_buffer: float + Distance to buffer from levee. + out_filename: str + Path to write output CSV file. + """ + + if os.path.exists(levees_filename) and os.path.exists(leveed_areas_filename) and os.path.exists(levelpaths_filename): + # Read in geodataframes + levees = gpd.read_file(levees_filename) + leveed_areas = gpd.read_file(leveed_areas_filename) + levelpaths = gpd.read_file(levelpaths_filename) + + levees[levee_id_attribute] = levees[levee_id_attribute].astype(int) + leveed_areas[levee_id_attribute] = leveed_areas[levee_id_attribute].astype(int) + levelpaths[branch_id_attribute] = levelpaths[branch_id_attribute].astype(int) + + # Buffer each side of levee line + levees_buffered_left = levees.copy() + levees_buffered_right = levees.copy() + levees_buffered_left.geometry = levees.buffer(levee_buffer, single_sided=True) + levees_buffered_right.geometry = levees.buffer(-levee_buffer, single_sided=True) + + # Intersect leveed areas with single-sided levee buffers + leveed_left = gpd.overlay(levees_buffered_left, leveed_areas, how='intersection') + leveed_right = gpd.overlay(levees_buffered_right, leveed_areas, how='intersection') + + # Find leveed areas not intersected by either buffer + leveed_intersected = [] + if not leveed_left.empty: + [leveed_intersected.append(x) for x in leveed_left[f'{levee_id_attribute}_1'].values] + + # Associate levees and leveed areas + matches_left = np.where(leveed_left[f'{levee_id_attribute}_1']==leveed_left[f'{levee_id_attribute}_2'])[0] + + leveed_left = leveed_left.loc[matches_left] + + # Get area of associated leveed areas + leveed_left['leveed_area'] = leveed_left.area + + leveed_left = leveed_left[[f'{levee_id_attribute}_1', 'leveed_area', 'geometry']] + + if not leveed_right.empty: + [leveed_intersected.append(x) for x in leveed_right[f'{levee_id_attribute}_1'].values] + + # Associate levees and leveed areas + matches_right = np.where(leveed_right[f'{levee_id_attribute}_1']==leveed_right[f'{levee_id_attribute}_2'])[0] + + leveed_right = leveed_right.loc[matches_right] + + # Get area of associated leveed areas + leveed_right['leveed_area'] = leveed_right.area + + leveed_right = leveed_right[[f'{levee_id_attribute}_1', 'leveed_area', 'geometry']] + + if len(leveed_intersected) > 0: + levees_not_found = leveed_areas[~leveed_areas[levee_id_attribute].isin(leveed_intersected)] + + # Merge left and right levee protected areas + if leveed_left.empty and leveed_right.empty: + return + + elif not leveed_left.empty and not leveed_right.empty: + leveed = leveed_left.merge(leveed_right, on=f'{levee_id_attribute}_1', how='outer', suffixes=['_left', '_right']) + + # Set unmatched areas to zero + leveed.loc[np.isnan(leveed['leveed_area_left']), 'leveed_area_left'] = 0. + leveed.loc[np.isnan(leveed['leveed_area_right']), 'leveed_area_right'] = 0. + + elif leveed_left.empty: + leveed = leveed_right.rename(columns={'leveed_area': 'leveed_area_right'}) + leveed['leveed_area_left'] = 0. + + elif leveed_right.empty: + leveed = leveed_left.rename(columns={'leveed_area': 'leveed_area_left'}) + leveed['leveed_area_right'] = 0. + + # Determine which side the levee is protecting (opposite of levee protected area) + leveed['levee_side'] = np.where(leveed['leveed_area_left'] < leveed['leveed_area_right'], 'left', 'right') + + # Split into sides + left_ids = leveed.loc[leveed['levee_side']=='left', f'{levee_id_attribute}_1'] + right_ids = leveed.loc[leveed['levee_side']=='right', f'{levee_id_attribute}_1'] + + # Associate level paths with levee buffers + levee_levelpaths_left = gpd.sjoin(levees_buffered_left, levelpaths) + levee_levelpaths_right = gpd.sjoin(levees_buffered_right, levelpaths) + + levee_levelpaths_left = levee_levelpaths_left[[levee_id_attribute, branch_id_attribute]] + levee_levelpaths_right = levee_levelpaths_right[[levee_id_attribute, branch_id_attribute]] + + # Select streams on the correct side of levee + levee_levelpaths_left = levee_levelpaths_left[levee_levelpaths_left[levee_id_attribute].isin(left_ids)] + levee_levelpaths_right = levee_levelpaths_right[levee_levelpaths_right[levee_id_attribute].isin(right_ids)] + + # Join left and right + out_df = pd.concat([levee_levelpaths_right[[levee_id_attribute, branch_id_attribute]], levee_levelpaths_left[[levee_id_attribute, branch_id_attribute]]]).drop_duplicates().reset_index(drop=True) + + # Add level paths to levees not found + if len(levees_not_found) > 0: + levees_not_found.geometry = levees_not_found.buffer(2*levee_buffer) + levees_not_found = gpd.sjoin(levees_not_found, levelpaths) + + # Add to out_df + out_df = pd.concat([out_df[[levee_id_attribute, branch_id_attribute]], levees_not_found[[levee_id_attribute, branch_id_attribute]]]).drop_duplicates().reset_index(drop=True) + + # Remove levelpaths that cross the levee exactly once + for j, row in out_df.iterrows(): + # Intersect levees and levelpaths + row_intersections = gpd.overlay(levees[levees[levee_id_attribute] == row[levee_id_attribute]], levelpaths[levelpaths[branch_id_attribute] == row[branch_id_attribute]], how='intersection', keep_geom_type=False) + + # Convert MultiPoint to Point + row_intersections = row_intersections.explode() + + # Select Point geometry type + row_intersections = row_intersections[row_intersections.geom_type =='Point'] + + if len(row_intersections) == 1: + out_df = out_df.drop(j) + + out_df.to_csv(out_filename, columns=[levee_id_attribute, branch_id_attribute], index=False) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Associate level paths with levees') + parser.add_argument('-nld','--levees-filename', help='NLD levees filename', required=True, type=str) + parser.add_argument('-l','--levee-id-attribute', help='Levee ID attribute name', required=True, type=str) + parser.add_argument('-out','--out-filename', help='out CSV filename', required=True, type=str) + parser.add_argument('-s', '--levelpaths-filename', help='Level path layer filename', required=True, type=str) + parser.add_argument('-b','--branch-id-attribute', help='Level path ID attribute name', required=True, type=str) + parser.add_argument('-lpa', '--leveed-areas-filename', help='NLD levee-protected areas filename', required=True, type=str) + parser.add_argument('-w', '--levee-buffer', help='Buffer width (in meters)', required=True, type=float) + + args = vars(parser.parse_args()) + + associate_levelpaths_with_levees(**args) diff --git a/src/bash_functions.env b/src/bash_functions.env index 54eb9fca8..11ed8ea46 100644 --- a/src/bash_functions.env +++ b/src/bash_functions.env @@ -12,8 +12,23 @@ Tstart () { Tcount () { t2=`date +%s` - echo "Time = `expr $t2 \- $t1`sec" - echo "Cumulative_Time = `expr $t2 \- $t0`sec" + #echo "Time = `expr $t2 \- $t1`sec" + #echo "Cumulative_Time = `expr $t2 \- $t0`sec" + + local total_sec=$(( $t2 - $t1)) + local dur_min=$((total_sec / 60)) + local dur_remainder_sec=$((total_sec % 60)) + echo "Cumulative Time = $dur_min min(s) and $dur_remainder_sec sec" +} + +Calc_Duration() { + start_time=$1 + end_time=`date +%s` + + total_sec=$(( $end_time - $start_time )) + dur_min=$((total_sec / 60)) + dur_remainder_sec=$((total_sec % 60)) + echo "Duration = $dur_min min(s) and $dur_remainder_sec sec" } export -f T_total_start diff --git a/src/bash_variables.env b/src/bash_variables.env new file mode 100644 index 000000000..8d48ce733 --- /dev/null +++ b/src/bash_variables.env @@ -0,0 +1,18 @@ +## Define inputs +# NOTE: $inputDataDir is defined in Dockerfile +export DEFAULT_FIM_PROJECTION_CRS=EPSG:5070 +export input_DEM=$inputDataDir/3dep_dems/10m_5070/fim_seamless_3dep_dem_10m_5070.vrt +export input_DEM_domain=$inputDataDir/3dep_dems/10m_5070/HUC6_dem_domain.gpkg +export input_GL_boundaries=$inputDataDir/landsea/gl_water_polygons.gpkg +export input_NLD=$inputDataDir/nld_vectors/System_Routes_NLDFS_5070_230314.gpkg +export input_levees_preprocessed=$inputDataDir/nld_vectors/3d_nld_preprocessed_230314.gpkg +export input_nld_levee_protected_areas=$inputDataDir/nld_vectors/Leveed_Areas_NLDFS_5070_230314.gpkg +export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg +export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg +export input_nwm_headwaters=$inputDataDir/nwm_hydrofabric/nwm_headwaters.gpkg +export input_nwm_lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg +export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg + +# Styling +export startDiv="\n-----------------------------------------------------------------\n" +export stopDiv="\n-----------------------------------------------------------------\n" diff --git a/src/bathy_rc_adjust.py b/src/bathy_rc_adjust.py new file mode 100755 index 000000000..ebda61044 --- /dev/null +++ b/src/bathy_rc_adjust.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +from os import environ +import geopandas as gpd +import pandas as pd +import numpy as np + +sa_ratio_flag = float(environ['surf_area_thalweg_ratio_flag']) #10x +thal_stg_limit = float(environ['thalweg_stg_search_max_limit']) #3m +bankful_xs_ratio_flag = float(environ['bankful_xs_area_ratio_flag']) #10x +bathy_xsarea_flag = float(environ['bathy_xs_area_chg_flag']) #1x +thal_hyd_radius_flag = float(environ['thalweg_hyd_radius_flag']) #10x + +def bathy_rc_lookup(input_src_base,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName,): + ## Convert input_src_base featureid to integer + if input_src_base.feature_id.dtype != 'int': input_src_base.feature_id = input_src_base.feature_id.astype(int) + + ## Read in the bankfull channel geometry text file + input_bathy = pd.read_csv(input_bathy_fileName, dtype= {'COMID': int}) + + ## Merge input_bathy and modified_src_base df using feature_id/COMID attributes + input_bathy = input_bathy.rename(columns={'COMID':'feature_id','BANKFULL_WIDTH':'BANKFULL_WIDTH (m)','BANKFULL_XSEC_AREA':'BANKFULL_XSEC_AREA (m2)'}) + modified_src_base = input_src_base.merge(input_bathy.loc[:,['feature_id','BANKFULL_WIDTH (m)','BANKFULL_XSEC_AREA (m2)']],how='left',on='feature_id') + + ## Check that the merge process returned matching feature_id entries + if modified_src_base['BANKFULL_WIDTH (m)'].count() == 0: + print('No matching feature_id found between input bathy data and src_base --> No bathy calculations added to SRC!') + return(input_src_base) + else: + ## Use SurfaceArea variable to identify thalweg-restricted stage values for each hydroid + ## Calculate the interrow SurfaceArea ratio n/(n-1) + modified_src_base['SA_div'] = modified_src_base['SurfaceArea (m2)'].div(modified_src_base['SurfaceArea (m2)'].shift(1)) + ## Mask SA_div when Stage = 0 or when the SA_div value (n / n-1) is > threshold value (i.e. 10x) + modified_src_base['SA_div'].mask((modified_src_base['Stage']==0) | (modified_src_base['SA_div'] this is used to mask the discharge after Manning's equation + modified_src_base = modified_src_base.merge(find_thalweg_notch.loc[:,['HydroID','Thalweg_burn_elev']],how='left',on='HydroID') + + ## Calculate bankfull vs top width difference for each feature_id + modified_src_base['Top Width Diff (m)'] = (modified_src_base['TopWidth (m)'] - modified_src_base['BANKFULL_WIDTH (m)']).abs() + ## Calculate XS Area field (Channel Volume / Stream Length) + modified_src_base['XS Area (m2)'] = modified_src_base['Volume (m3)'] / (modified_src_base['LENGTHKM'] * 1000) + + ## Groupby HydroID and find min of Top Width Diff (m) + output_bathy = modified_src_base[['feature_id','HydroID','order_','Stage','SurfaceArea (m2)','Thalweg_burn_elev','BANKFULL_WIDTH (m)','TopWidth (m)','XS Area (m2)','BANKFULL_XSEC_AREA (m2)','Top Width Diff (m)']] + ## filter out stage = 0 rows in SRC (assuming geom at stage 0 is not a valid channel geom) + output_bathy = output_bathy[output_bathy['Stage'] > 0] + ## filter SRC rows identified as Thalweg burned + output_bathy['Top Width Diff (m)'].mask(output_bathy['Stage'] <= output_bathy['Thalweg_burn_elev'],inplace=True) + ## ignore hydroid/featureid that did not have a valid Bankfull lookup (areas outside CONUS - i.e. Canada) + output_bathy = output_bathy[output_bathy['BANKFULL_XSEC_AREA (m2)'].notnull()] + ## ignore SRC entries with 0 surface area --> handles input SRC artifacts/errors in Great Lakes region + output_bathy = output_bathy[output_bathy['SurfaceArea (m2)'] > 0] + ## find index of minimum top width difference --> this will be used as the SRC "bankfull" row for future calcs + output_bathy = output_bathy.loc[output_bathy.groupby('HydroID')['Top Width Diff (m)'].idxmin()].reset_index(drop=True) + print('Average: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].mean())) + print('Minimum: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].min())) + print('Maximum: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].max())) + print('STD: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].std()) +'\n' + '#################') + + ## Calculate XS Area difference between SRC and Bankfull database + output_bathy['XS Area Diff (m2)'] = (output_bathy['BANKFULL_XSEC_AREA (m2)'] - output_bathy['XS Area (m2)']) + output_bathy['XS Bankfull Area Ratio'] = (output_bathy['BANKFULL_XSEC_AREA (m2)'] / output_bathy['XS Area (m2)']).round(2) + ## masking negative XS Area Diff and XS Area = 0 + output_bathy['XS Bankfull Area Ratio'].mask((output_bathy['XS Area Diff (m2)']<0) | (output_bathy['XS Area (m2)'] == 0),inplace=True) + ## masking negative XS Area Diff and XS Area = 0 + output_bathy['XS Area Diff (m2)'].mask((output_bathy['XS Area Diff (m2)']<0) | (output_bathy['XS Area (m2)'] == 0),inplace=True) + ## remove bogus values where bankfull area ratio > threshold --> 10x (topwidth crosswalk issues or bad bankfull regression data points??) + output_bathy['XS Area Diff (m2)'].mask(output_bathy['XS Bankfull Area Ratio']>bankful_xs_ratio_flag,inplace=True) + ## remove bogus values where bankfull area ratio > threshold --> 10x (topwidth crosswalk issues or bad bankfull regression data points??) + output_bathy['XS Bankfull Area Ratio'].mask(output_bathy['XS Bankfull Area Ratio']>bankful_xs_ratio_flag,inplace=True) + ## Print XS Area Diff statistics + print('Average: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].mean())) + print('Minimum: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].min())) + print('Maximum: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].max())) + print('STD: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].std())) + + ## Bin XS Bankfull Area Ratio by stream order + stream_order_bathy_ratio = output_bathy[['order_','Stage','XS Bankfull Area Ratio']].copy() + ## mask stage values when XS Bankfull Area Ratio is null (need to filter to calculate the median for valid values below) + stream_order_bathy_ratio['Stage'].mask(stream_order_bathy_ratio['XS Bankfull Area Ratio'].isnull(),inplace=True) + stream_order_bathy_ratio = stream_order_bathy_ratio.groupby('order_').agg(count=('XS Bankfull Area Ratio','count'),mean_xs_area_ratio=('XS Bankfull Area Ratio','mean'),median_stage_bankfull=('Stage','median')) + ## fill XS Bankfull Area Ratio and Stage values if no values were found in the grouby calcs + stream_order_bathy_ratio = (stream_order_bathy_ratio.ffill()+stream_order_bathy_ratio.bfill())/2 + ## fill first and last stream order values if needed + stream_order_bathy_ratio = stream_order_bathy_ratio.bfill().ffill() + ## Get count_total tally of the total number of stream order hydroids in the HUC (not filtering anything out) + stream_order_bathy_ratio_count = output_bathy.groupby('order_').agg(count_total=('Stage','count')) + stream_order_bathy_ratio = stream_order_bathy_ratio.merge(stream_order_bathy_ratio_count,how='left',on='order_') + ## Fill any remaining null values: mean_xs_area_ratio --> 1 median_stage_bankfull --> 0 + stream_order_bathy_ratio['mean_xs_area_ratio'].mask(stream_order_bathy_ratio['mean_xs_area_ratio'].isnull(),1,inplace=True) + stream_order_bathy_ratio['median_stage_bankfull'].mask(stream_order_bathy_ratio['median_stage_bankfull'].isnull(),0,inplace=True) + + ## Combine SRC df and df of XS Area for each hydroid and matching stage and order from bins above + output_bathy = output_bathy.merge(stream_order_bathy_ratio,how='left',on='order_') + modified_src_base = modified_src_base.merge(stream_order_bathy_ratio,how='left',on='order_') + + ## Calculate stage vs median_stage_bankfull difference for bankfull lookup + modified_src_base['lookup_stage_diff'] = (modified_src_base[['median_stage_bankfull','Thalweg_burn_elev']].max(axis=1) - modified_src_base['Stage']).abs() + + ## If median_stage_bankfull is null then set lookup_stage_diff to 999 at stage 0 (handles errors for channels outside CONUS) + modified_src_base['lookup_stage_diff'].mask((modified_src_base['Stage'] == 0) & (modified_src_base['median_stage_bankfull'].isnull()),999,inplace=True) + + ## Groupby HydroID again and find min of lookup_stage_diff + xs_area_hydroid_lookup = modified_src_base[['HydroID','BANKFULL_XSEC_AREA (m2)','XS Area (m2)','Stage','Thalweg_burn_elev','median_stage_bankfull','lookup_stage_diff','mean_xs_area_ratio']] + xs_area_hydroid_lookup = xs_area_hydroid_lookup.loc[xs_area_hydroid_lookup.groupby('HydroID')['lookup_stage_diff'].idxmin()].reset_index(drop=True) + + ## Calculate bathy adjusted XS Area ('XS Area (m2)' mutliplied by mean_xs_area_ratio) + xs_area_hydroid_lookup['bathy_calc_xs_area'] = (xs_area_hydroid_lookup['XS Area (m2)'] * xs_area_hydroid_lookup['mean_xs_area_ratio']) - xs_area_hydroid_lookup['XS Area (m2)'] + + ## Calculate the ratio btw the lookup SRC XS_Area and the Bankfull_XSEC_AREA --> use this as a flag for potentially bad XS data + xs_area_hydroid_lookup['bankfull_XS_ratio_flag'] = (xs_area_hydroid_lookup['bathy_calc_xs_area'] / xs_area_hydroid_lookup['BANKFULL_XSEC_AREA (m2)']) + ## Set bath_cal_xs_area to 0 if the bankfull_XS_ratio_flag is > threshold --> 5x (assuming too large of difference to be a reliable bankfull calculation) + xs_area_hydroid_lookup['bathy_calc_xs_area'].mask(xs_area_hydroid_lookup['bankfull_XS_ratio_flag']>bathy_xsarea_flag,xs_area_hydroid_lookup['BANKFULL_XSEC_AREA (m2)'],inplace=True) + xs_area_hydroid_lookup['bathy_calc_xs_area'].mask(xs_area_hydroid_lookup['bankfull_XS_ratio_flag'].isnull(),0,inplace=True) + + ## Merge bathy_calc_xs_area to the modified_src_base + modified_src_base = modified_src_base.merge(xs_area_hydroid_lookup.loc[:,['HydroID','bathy_calc_xs_area']],how='left',on='HydroID') + + ## Calculate new bathy adjusted channel geometry variables + modified_src_base = modified_src_base.rename(columns={'Discharge (m3s-1)':'Discharge (m3s-1)_nobathy'}) + modified_src_base['XS Area (m2)_bathy_adj'] = modified_src_base['XS Area (m2)'] + modified_src_base['bathy_calc_xs_area'] + modified_src_base['Volume (m3)_bathy_adj'] = modified_src_base['XS Area (m2)_bathy_adj'] * modified_src_base['LENGTHKM'] * 1000 + modified_src_base['WetArea (m2)_bathy_adj'] = modified_src_base['Volume (m3)_bathy_adj']/modified_src_base['LENGTHKM']/1000 + modified_src_base['HydraulicRadius (m)_bathy_adj'] = modified_src_base['WetArea (m2)_bathy_adj']/modified_src_base['WettedPerimeter (m)'] + modified_src_base['HydraulicRadius (m)_bathy_adj'].fillna(0, inplace=True) + ## mask out negative top width differences (avoid thalweg burn notch) + modified_src_base['HydraulicRadius (m)_bathy_adj'].mask((modified_src_base['HydraulicRadius (m)_bathy_adj']>thal_hyd_radius_flag) & (modified_src_base['Stage'] do we need SRC to start at 0?? + modified_src_base['Discharge (m3s-1)'].mask(modified_src_base['Stage'] == 0,0,inplace=True) + modified_src_base['Discharge (m3s-1)'].mask(modified_src_base['Stage'] == modified_src_base['Thalweg_burn_elev'],0,inplace=True) + modified_src_base['Discharge (m3s-1)'].mask(modified_src_base['Stage'] < modified_src_base['Thalweg_burn_elev'],-999,inplace=True) + + ## Organize bathy calc output variables for csv + output_bathy = output_bathy[['HydroID','order_','Stage','SurfaceArea (m2)','TopWidth (m)','BANKFULL_WIDTH (m)','Top Width Diff (m)','XS Area (m2)','BANKFULL_XSEC_AREA (m2)','XS Area Diff (m2)','XS Bankfull Area Ratio','count','median_stage_bankfull','mean_xs_area_ratio']] + + ## Export bathy/bankful calculation tables for easy viewing + output_bathy.to_csv(output_bathy_fileName,index=False) + stream_order_bathy_ratio.to_csv(output_bathy_streamorder_fileName,index=True) + find_thalweg_notch.to_csv(output_bathy_thalweg_fileName,index=True) + xs_area_hydroid_lookup.to_csv(output_bathy_xs_lookup_fileName,index=True) + + print('Completed Bathy Calculations...') + return(modified_src_base) diff --git a/src/buffer_stream_branches.py b/src/buffer_stream_branches.py new file mode 100755 index 000000000..06313f5ad --- /dev/null +++ b/src/buffer_stream_branches.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +import os +import geopandas as gpd +from stream_branches import StreamNetwork +from stream_branches import StreamBranchPolygons +import argparse + + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Generates branch polygons') + parser.add_argument('-a', '--dem-domain', help='DEM domain file', required=False, type=str) + parser.add_argument('-s','--streams', help='Streams file to branch', required=True) + parser.add_argument('-i','--branch-id', help='Attribute with branch ids', required=True) + parser.add_argument('-d','--buffer-distance', help='Distance to buffer branches to create branch polygons', required=True,type=int) + parser.add_argument('-b','--branches', help='Branch polygons out file name', required=False,default=None) + parser.add_argument('-v','--verbose', help='Verbose printing', required=False,default=None,action='store_true') + + # extract to dictionary + args = vars(parser.parse_args()) + + streams_file, branch_id_attribute, buffer_distance, stream_polygons_file, dem_domain, verbose = args["streams"], args["branch_id"] , args["buffer_distance"], args["branches"], args['dem_domain'] , args["verbose"] + + if os.path.exists(streams_file): + # load file + stream_network = StreamNetwork.from_file( filename=streams_file,branch_id_attribute=branch_id_attribute, + values_excluded=None, attribute_excluded=None, verbose = verbose) + + # make stream polygons + stream_polys = StreamBranchPolygons.buffer_stream_branches( stream_network, + buffer_distance=buffer_distance, + verbose=verbose ) + + # Clip to DEM domain + if os.path.exists(dem_domain): + dem_domain = gpd.read_file(dem_domain) + stream_polys.geometry = gpd.clip(stream_polys, dem_domain).geometry + + stream_polys.write(stream_polygons_file,verbose=verbose) \ No newline at end of file diff --git a/src/burn_in_levees.py b/src/burn_in_levees.py index 93acbc1e4..ea8acd92c 100755 --- a/src/burn_in_levees.py +++ b/src/burn_in_levees.py @@ -18,7 +18,7 @@ def burn_in_levees(dem_filename,nld_filename,out_dem_filename): no_data = nld.nodata - nld_m = np.where(nld_data == int(no_data), -9999.0, (nld_data*0.3048).astype(rasterio.float32)) + nld_m = np.where(nld_data == int(no_data), -9999.0, (nld_data).astype(rasterio.float32)) dem_profile = dem.profile.copy() diff --git a/src/check_huc_inputs.py b/src/check_huc_inputs.py index cd471cd88..f44725180 100755 --- a/src/check_huc_inputs.py +++ b/src/check_huc_inputs.py @@ -1,58 +1,92 @@ #!/usr/bin/env python3 + import os import argparse +import string +import pathlib from glob import glob - +from logging import exception def __read_included_files(parent_dir_path): filename_patterns = glob(os.path.join(parent_dir_path,'included_huc*.lst')) - + accepted_hucs_set = set() for filename in filename_patterns: - f = open(filename,'r') - fList = f.readlines() - f.close() - - fList = [fl.rstrip() for fl in fList] - - accepted_hucs_set.update(fList) + with open(filename,'r') as huc_list_file: + file_lines = huc_list_file.readlines() + f_list = [fl.rstrip() for fl in file_lines] + accepted_hucs_set.update(f_list) return(accepted_hucs_set) def __read_input_hucs(hucs): - hucs = [h.split() for h in hucs][0] + huc_list = set() if os.path.isfile(hucs[0]): + + source_file_extension = pathlib.Path(hucs[0]).suffix + + if (source_file_extension.lower() != ".lst" ): + raise Exception("Incoming file must be in .lst format if submitting a file name and path.") + with open(hucs[0],'r') as hucs_file: - hucs = hucs_file.readlines() - hucs = [h.split() for h in hucs][0] - - return(hucs) - + file_lines = hucs_file.readlines() + f_list = [__clean_huc_value(fl) for fl in file_lines] + huc_list.update(f_list) + else: + if (len(hucs) > 0): + for huc in hucs: + huc_list.add(__clean_huc_value(huc)) + else: + huc_list.add(__clean_huc_value(hucs[0])) + + return(huc_list) + +def __clean_huc_value(huc): + + # Strips the newline character plus + # single or double quotes (which sometimes happens) + huc = huc.strip().replace("\"", "") + huc = huc.replace("\'", "") + return huc def __check_for_membership(hucs,accepted_hucs_set): for huc in hucs: + if (type(huc) == string) and (not huc.isnumeric()): + msg = f"Huc value of {huc} does not appear to be a number." + msg += "It could be an incorrect value but also could be that the huc list " + msg += "(if you used one), is not unix encoded." + raise KeyError(msg) + if huc not in accepted_hucs_set: raise KeyError("HUC {} not found in available inputs. Edit HUC inputs or acquire datasets and try again".format(huc)) def check_hucs(hucs): - - accepted_hucs = __read_included_files(os.path.join(os.environ['inputDataDir'],'huc_lists')) - hucs = __read_input_hucs(hucs) - __check_for_membership(hucs,accepted_hucs) + + huc_list_path = os.path.join(os.environ['inputDataDir'],'huc_lists') + accepted_hucs = __read_included_files(huc_list_path) + list_hucs = __read_input_hucs(hucs) + __check_for_membership(list_hucs, accepted_hucs) + + # we need to return the number of hucs being used. + # it is not easy to return a value to bash, except with standard out. + # so we will just to a print line back (Note: This means there can be no other + # print commands in this file, even for debugging, as bash will pick up the + # very first "print" + print(len(list_hucs)) if __name__ == '__main__': # parse arguments parser = argparse.ArgumentParser(description='Checks input hucs for availability within inputs') - parser.add_argument('-u','--hucs',help='Line-delimited file or list of HUCs to check availibility for',required=True,nargs='+') + parser.add_argument('-u','--hucs',help='Line-delimited file or list of HUCs to check availibility for',required=True, nargs='+') # extract to dictionary args = vars(parser.parse_args()) diff --git a/src/check_unit_errors.py b/src/check_unit_errors.py new file mode 100644 index 000000000..5eb467320 --- /dev/null +++ b/src/check_unit_errors.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +import argparse +import math +import os +import sys + +from utils.shared_variables import (UNIT_ERRORS_MIN_NUMBER_THRESHOLD, + UNIT_ERRORS_MIN_PERCENT_THRESHOLD) +from utils.fim_enums import FIM_exit_codes + +""" + Calculates the number of units/hucs that errored during processing. + Based on a percentage of original number of hucs to be processed, + this could respond with an abort processing code. It will also + only throw that code if a minimum number of error exist. + There should always be at least one (non_zero_exit_codes.log) + + Note: The percentage number as a whole number and the min number of + errors are stored in the utils/shared_variables.py (kinda like constants) + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. (ie output_run_data_dir) + number_of_input_hucs : int + Number of hucs originally submitted for processing. + + Returns + ---------- + return_code of 0 (success) or 62 (from fim_enums) +""" +def check_unit_errors(fim_dir, number_of_input_hucs): + + return_code = 0 # default success return code. + + if (not os.path.isdir(fim_dir)): + raise Exception(f"The fim output directory of {fim_dir} does not exist") + + unit_errors_dir = os.path.join(fim_dir, "unit_errors") + + if (not os.path.isdir(unit_errors_dir)): + raise Exception("The unit errors directory inside the fim output"\ + f" directory of {fim_dir} does not exist") + + error_file_count = 0 + for path in os.listdir(unit_errors_dir): + if ( os.path.isfile(os.path.join(unit_errors_dir, path)) and + ("non_zero_exit_codes.log" not in path)): + error_file_count += 1 + + # We will only error out if it is more than the min number of error files. + # This is done because sometimes during dev, you are expecting a bunch of errors + # and sometimes, the number of errors is too small to worry about. + + if (error_file_count > UNIT_ERRORS_MIN_NUMBER_THRESHOLD): + + percentage_of_errors = error_file_count / number_of_input_hucs * 100 + + if (percentage_of_errors >= UNIT_ERRORS_MIN_PERCENT_THRESHOLD): + + errMsg = "Too many unit errors exist to continue,"\ + f" code:{FIM_exit_codes.EXCESS_UNIT_ERRORS.value}" + raise Exception(errMsg) + + return return_code + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Check number of unit errors to determine if continue') + parser.add_argument('-f', '--fim_dir',help='root output folder for the process (output + name)', + required=True) + parser.add_argument('-n', '--number_of_input_hucs', help = 'Original number of hucs to process', + type=int, required=True) + + # extract to dictionary + args = vars(parser.parse_args()) + + # call function + check_unit_errors(**args) diff --git a/src/clip_rasters_to_branches.py b/src/clip_rasters_to_branches.py new file mode 100755 index 000000000..7057c830f --- /dev/null +++ b/src/clip_rasters_to_branches.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + + +from stream_branches import StreamNetwork +from stream_branches import StreamBranchPolygons +import argparse +from tqdm import tqdm + + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Clips rasters to branch polygons') + parser.add_argument('-b','--branches', help='Branch polygons file name', required=True,default=None) + parser.add_argument('-d','--branch-id', help='Branch ID attribute', required=True,default=None) + parser.add_argument('-i','--branch-id-attribute', help='Branch ID attribute', required=True,default=None) + parser.add_argument('-r','--rasters', help='Raster file name to clip', required=True,default=None,nargs="+") + parser.add_argument('-c','--clipped-rasters', help='Branch polygons out file name', required=False,default=None,nargs="+") + parser.add_argument('-v','--verbose', help='Verbose printing', required=False,default=None,action='store_true') + + # extract to dictionary + args = vars(parser.parse_args()) + + stream_polygons_file, branch_id, branch_id_attribute, rasters, clipped_rasters, verbose = args["branches"], args["branch_id"],args["branch_id_attribute"],args["rasters"] , args["clipped_rasters"], args["verbose"] + + # load file + stream_polys = StreamBranchPolygons.from_file( filename=stream_polygons_file, + branch_id_attribute=branch_id_attribute, + values_excluded=None,attribute_excluded=None, verbose = verbose) + + for raster, clipped_raster in tqdm(zip(rasters,clipped_rasters),disable=(not verbose),total=len(rasters)): + if verbose: + print("Clipping \'{}\' to branch polygons ...".format(raster.split('/')[-1].split('.')[0])) + + stream_polys.clip(raster,clipped_raster,branch_id,branch_id_attribute) + + diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py old mode 100755 new mode 100644 index e5053627a..5ff1c8c9a --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -3,188 +3,190 @@ import sys import geopandas as gpd import argparse +import rasterio as rio +import fiona from shapely.geometry import MultiPolygon,Polygon +from utils.shared_variables import DEFAULT_FIM_PROJECTION_CRS from utils.shared_functions import getDriver, mem_profile - @mem_profile -def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance): - - hucUnitLength = len(str(hucCode)) +def subset_vector_layers(subset_nwm_lakes, + subset_nwm_streams, + hucCode, + subset_nwm_headwaters, + wbd_buffer_filename, + wbd_filename, + dem_filename, + dem_domain, + nwm_lakes, + nwm_catchments, + subset_nwm_catchments, + nld_lines, + nld_lines_preprocessed, + landsea, + nwm_streams, + subset_landsea, + nwm_headwaters, + subset_nld_lines, + subset_nld_lines_preprocessed, + wbd_buffer_distance, + levee_protected_areas, + subset_levee_protected_areas): + + print("Getting Cell Size", flush=True) + with rio.open(dem_filename) as dem_raster: + dem_cellsize = max(dem_raster.res) + + # Erase area outside 3DEP domain + print("Erase area outside 3DEP domain", flush=True) + wbd = gpd.read_file(wbd_filename) + dem_domain = gpd.read_file(dem_domain) + wbd = gpd.clip(wbd, dem_domain) + wbd.to_file(wbd_filename, layer='WBDHU8', crs=DEFAULT_FIM_PROJECTION_CRS) # Get wbd buffer - wbd = gpd.read_file(wbd_filename) + print("Create wbd buffer", flush=True) wbd_buffer = wbd.copy() - wbd_buffer.geometry = wbd.geometry.buffer(wbd_buffer_distance,resolution=32) - projection = wbd_buffer.crs - - great_lakes = gpd.read_file(great_lakes_filename, mask = wbd_buffer).reset_index(drop=True) - - if not great_lakes.empty: - print("Masking Great Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - - # Clip excess lake area - great_lakes = gpd.clip(great_lakes, wbd_buffer) + wbd_buffer.geometry = wbd_buffer.geometry.buffer(wbd_buffer_distance, resolution=32) + wbd_buffer = gpd.clip(wbd_buffer, dem_domain) - # Buffer remaining lake area - great_lakes.geometry = great_lakes.buffer(lake_buffer_distance) + # Make the streams buffer smaller than the wbd_buffer so streams don't reach the edge of the DEM + wbd_streams_buffer = wbd_buffer.copy() + wbd_streams_buffer.geometry = wbd_streams_buffer.geometry.buffer(-3*dem_cellsize, resolution=32) - # Removed buffered GL from WBD buffer - wbd_buffer = gpd.overlay(wbd_buffer, great_lakes, how='difference') - wbd_buffer = wbd_buffer[['geometry']] - wbd_buffer.to_file(wbd_buffer_filename,driver=getDriver(wbd_buffer_filename),index=False) - - else: - wbd_buffer = wbd_buffer[['geometry']] - wbd_buffer.to_file(wbd_buffer_filename,driver=getDriver(wbd_buffer_filename),index=False) - - del great_lakes + wbd_buffer = wbd_buffer[['geometry']] + wbd_streams_buffer = wbd_streams_buffer[['geometry']] + wbd_buffer.to_file(wbd_buffer_filename, driver=getDriver(wbd_buffer_filename), index=False, crs=DEFAULT_FIM_PROJECTION_CRS) # Clip ocean water polygon for future masking ocean areas (where applicable) - landsea = gpd.read_file(landsea_filename, mask = wbd_buffer) + landsea = gpd.read_file(landsea, mask=wbd_buffer) if not landsea.empty: - landsea.to_file(subset_landsea_filename,driver=getDriver(subset_landsea_filename),index=False) + print("Create landsea gpkg", flush=True) + landsea.to_file(subset_landsea, driver = getDriver(subset_landsea), index=False, crs=DEFAULT_FIM_PROJECTION_CRS) del landsea + # Clip levee-protected areas polygons for future masking ocean areas (where applicable) + print("Subsetting Levee Protected Areas", flush=True) + levee_protected_areas = gpd.read_file(levee_protected_areas, mask=wbd_buffer) + if not levee_protected_areas.empty: + levee_protected_areas.to_file(subset_levee_protected_areas, driver = getDriver + (subset_levee_protected_areas), index=False, crs=DEFAULT_FIM_PROJECTION_CRS) + del levee_protected_areas + # Find intersecting lakes and writeout - print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_lakes = gpd.read_file(nwm_lakes_filename, mask = wbd_buffer) + print("Subsetting NWM Lakes", flush=True) + nwm_lakes = gpd.read_file(nwm_lakes, mask = wbd_buffer) nwm_lakes = nwm_lakes.loc[nwm_lakes.Shape_Area < 18990454000.0] if not nwm_lakes.empty: # Perform fill process to remove holes/islands in the NWM lake polygons nwm_lakes = nwm_lakes.explode() - nwm_lakes_fill_holes=MultiPolygon(Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries + nwm_lakes_fill_holes = MultiPolygon(Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries # Loop through the filled polygons and insert the new geometry for i in range(len(nwm_lakes_fill_holes)): - nwm_lakes.loc[i,'geometry'] = nwm_lakes_fill_holes[i] - nwm_lakes.to_file(subset_nwm_lakes_filename,driver=getDriver(subset_nwm_lakes_filename),index=False) + nwm_lakes.loc[i, 'geometry'] = nwm_lakes_fill_holes[i] + nwm_lakes.to_file(subset_nwm_lakes, driver = getDriver(subset_nwm_lakes), index=False, crs=DEFAULT_FIM_PROJECTION_CRS) del nwm_lakes # Find intersecting levee lines - print("Subsetting NLD levee lines for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nld_lines = gpd.read_file(nld_lines_filename, mask = wbd_buffer) + print("Subsetting NLD levee lines", flush=True) + nld_lines = gpd.read_file(nld_lines, mask = wbd_buffer) if not nld_lines.empty: - nld_lines.to_file(subset_nld_lines_filename,driver=getDriver(subset_nld_lines_filename),index=False) + nld_lines.to_file(subset_nld_lines, driver = getDriver(subset_nld_lines), index=False, crs=DEFAULT_FIM_PROJECTION_CRS) del nld_lines - # Subset nhd headwaters - print("Subsetting NHD Headwater Points for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nhd_headwaters = gpd.read_file(nhd_headwaters_filename, mask = wbd_buffer) - if extent == 'MS': - nhd_headwaters = nhd_headwaters.loc[nhd_headwaters.mainstem==1] + # Preprocced levee lines for burning + nld_lines_preprocessed = gpd.read_file(nld_lines_preprocessed, mask = wbd_buffer) + if not nld_lines_preprocessed.empty: + nld_lines_preprocessed.to_file(subset_nld_lines_preprocessed, driver = getDriver(subset_nld_lines_preprocessed), index=False, crs=DEFAULT_FIM_PROJECTION_CRS) + del nld_lines_preprocessed - if len(nhd_headwaters) > 0: - nhd_headwaters.to_file(subset_nhd_headwaters_filename,driver=getDriver(subset_nhd_headwaters_filename),index=False) - else: - print ("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") - sys.exit(0) - del nhd_headwaters + # Subset NWM headwaters + print("Subsetting NWM Headwater Points", flush=True) + nwm_headwaters = gpd.read_file(nwm_headwaters, mask=wbd_streams_buffer) - # Subset nhd streams - print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd_buffer) - - if extent == 'MS': - nhd_streams = nhd_streams.loc[nhd_streams.mainstem==1] - - if len(nhd_streams) > 0: - - # Find incoming stream segments (to WBD buffer) and identify which are upstream - threshold_segments = gpd.overlay(nhd_streams, wbd_buffer, how='symmetric_difference') - from_list = threshold_segments.FromNode.to_list() - to_list = nhd_streams.ToNode.to_list() - missing_segments = list(set(from_list) - set(to_list)) - - # special case: stream meanders in and out of WBD buffer boundary - if str(hucCode) == '10030203': - missing_segments = missing_segments + [23001300001840.0, 23001300016571.0] - - if str(hucCode) == '08030100': - missing_segments = missing_segments + [20000600011559.0, 20000600045761.0, 20000600002821.0] - - # Remove incoming stream segment so it won't be routed as outflow during hydroconditioning - nhd_streams = nhd_streams.loc[~nhd_streams.FromNode.isin(missing_segments)] - - nhd_streams.to_file(subset_nhd_streams_filename,driver=getDriver(subset_nhd_streams_filename),index=False) + if len(nwm_headwaters) > 0: + nwm_headwaters.to_file(subset_nwm_headwaters, driver=getDriver(subset_nwm_headwaters), index=False, crs=DEFAULT_FIM_PROJECTION_CRS) else: - print ("No NHD streams within HUC " + str(hucCode) + " boundaries.") + print ("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") sys.exit(0) - del nhd_streams + del nwm_headwaters # Find intersecting nwm_catchments - print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) - if extent == 'MS': - nwm_catchments = nwm_catchments.loc[nwm_catchments.mainstem==1] + print("Subsetting NWM Catchments", flush=True) + nwm_catchments = gpd.read_file(nwm_catchments, mask=wbd_buffer) if len(nwm_catchments) > 0: - nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) + nwm_catchments.to_file(subset_nwm_catchments, driver=getDriver(subset_nwm_catchments), index=False, crs=DEFAULT_FIM_PROJECTION_CRS) else: - print ("No NHD catchments within HUC " + str(hucCode) + " boundaries.") + print ("No NWM catchments within HUC " + str(hucCode) + " boundaries.") sys.exit(0) del nwm_catchments # Subset nwm streams - print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_streams = gpd.read_file(nwm_streams_filename, mask = wbd_buffer) - if extent == 'MS': - nwm_streams = nwm_streams.loc[nwm_streams.mainstem==1] + print("Subsetting NWM Streams", flush=True) + + nwm_streams = gpd.read_file(nwm_streams, mask = wbd) + + # NWM can have duplicate records, but appear to always be identical duplicates + nwm_streams.drop_duplicates(subset="ID", keep="first", inplace=True) + if len(nwm_streams) > 0: - nwm_streams.to_file(subset_nwm_streams_filename,driver=getDriver(subset_nwm_streams_filename),index=False) + nwm_streams = gpd.clip(nwm_streams, wbd_streams_buffer) + + nwm_streams.to_file(subset_nwm_streams, driver=getDriver(subset_nwm_streams), index=False, crs=DEFAULT_FIM_PROJECTION_CRS) else: - print ("No NWM stream segments within HUC " + str(hucCode) + " boundaries.") + print ("No NWM stream segments within HUC " + str(hucCode) + " boundaries.") sys.exit(0) del nwm_streams if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Subset vector layers') - parser.add_argument('-d','--hucCode', help='HUC boundary ID', required=True,type=str) - parser.add_argument('-w','--nwm-streams', help='NWM flowlines', required=True) - parser.add_argument('-s','--nhd-streams',help='NHDPlus HR burnline',required=True) - parser.add_argument('-l','--nwm-lakes', help='NWM Lakes', required=True) - parser.add_argument('-r','--nld-lines', help='Levee vectors to use within project path', required=True) - parser.add_argument('-g','--wbd',help='HUC boundary',required=True) - parser.add_argument('-f','--wbd-buffer',help='Buffered HUC boundary',required=True) - parser.add_argument('-m','--nwm-catchments', help='NWM catchments', required=True) - parser.add_argument('-y','--nhd-headwaters',help='NHD headwaters',required=True) - parser.add_argument('-v','--landsea',help='LandSea - land boundary',required=True) - parser.add_argument('-c','--subset-nhd-streams',help='NHD streams subset',required=True) - parser.add_argument('-z','--subset-nld-lines',help='Subset of NLD levee vectors for HUC',required=True) - parser.add_argument('-a','--subset-lakes',help='NWM lake subset',required=True) - parser.add_argument('-n','--subset-catchments',help='NWM catchments subset',required=True) - parser.add_argument('-e','--subset-nhd-headwaters',help='NHD headwaters subset',required=True,default=None) - parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) - parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) - parser.add_argument('-extent','--extent',help='FIM extent',required=True) - parser.add_argument('-gl','--great-lakes-filename',help='Great Lakes layer',required=True) - parser.add_argument('-wb','--wbd-buffer-distance',help='WBD Mask buffer distance',required=True,type=int) - parser.add_argument('-lb','--lake-buffer-distance',help='Great Lakes Mask buffer distance',required=True,type=int) + #print(sys.argv) + parser = argparse.ArgumentParser(description='Subset vector layers') + parser.add_argument('-a','--subset-nwm-lakes', help='NWM lake subset', + required=True) + parser.add_argument('-b','--subset-nwm-streams', help='NWM streams subset', + required=True) + parser.add_argument('-d','--hucCode', help='HUC boundary ID', required=True, + type=str) + parser.add_argument('-e','--subset-nwm-headwaters', help='NWM headwaters subset', + required=True, default=None) + parser.add_argument('-f','--wbd_buffer_filename', help='Buffered HUC boundary', + required=True) + parser.add_argument('-g','--wbd-filename', help='HUC boundary', required=True) + parser.add_argument('-i','--dem-filename', help='DEM filename', required=True) + parser.add_argument('-j','--dem-domain', help='DEM domain polygon', required=True) + parser.add_argument('-l','--nwm-lakes', help='NWM Lakes', required=True) + parser.add_argument('-m','--nwm-catchments', help='NWM catchments', + required=True) + parser.add_argument('-n','--subset-nwm-catchments', help='NWM catchments subset', + required=True) + parser.add_argument('-r','--nld-lines', help='Levee vectors to use within project path', + required=True) + parser.add_argument('-rp','--nld-lines-preprocessed', help='Levee vectors to use for DEM burning', + required=True) + parser.add_argument('-v','--landsea', help='LandSea - land boundary', + required=True) + parser.add_argument('-w','--nwm-streams', help='NWM flowlines', + required=True) + parser.add_argument('-x','--subset-landsea', help='LandSea subset', + required=True) + parser.add_argument('-y','--nwm-headwaters', help='NWM headwaters', + required=True) + parser.add_argument('-z','--subset-nld-lines', help='Subset of NLD levee vectors for HUC', + required=True) + parser.add_argument('-zp','--subset-nld-lines-preprocessed', help='Subset of NLD levee vectors for burning elevations into DEMs', + required=True) + parser.add_argument('-wb','--wbd-buffer-distance', help='WBD Mask buffer distance', + required=True, type=int) + parser.add_argument('-lpf','--levee-protected-areas', + help='Levee-protected areas filename', required=True) + parser.add_argument('-lps','--subset-levee-protected-areas', + help='Levee-protected areas subset', required=True) + args = vars(parser.parse_args()) - hucCode = args['hucCode'] - nwm_streams_filename = args['nwm_streams'] - nhd_streams_filename = args['nhd_streams'] - nwm_lakes_filename = args['nwm_lakes'] - nld_lines_filename = args['nld_lines'] - wbd_filename = args['wbd'] - wbd_buffer_filename = args['wbd_buffer'] - nwm_catchments_filename = args['nwm_catchments'] - nhd_headwaters_filename = args['nhd_headwaters'] - landsea_filename = args['landsea'] - subset_nhd_streams_filename = args['subset_nhd_streams'] - subset_nld_lines_filename = args['subset_nld_lines'] - subset_nwm_lakes_filename = args['subset_lakes'] - subset_nwm_catchments_filename = args['subset_catchments'] - subset_nhd_headwaters_filename = args['subset_nhd_headwaters'] - subset_nwm_streams_filename = args['subset_nwm_streams'] - subset_landsea_filename = args['subset_landsea'] - extent = args['extent'] - great_lakes_filename = args['great_lakes_filename'] - wbd_buffer_distance = args['wbd_buffer_distance'] - lake_buffer_distance = args['lake_buffer_distance'] - - subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance) + subset_vector_layers(**args) diff --git a/src/crosswalk_nwm_demDerived.py b/src/crosswalk_nwm_demDerived.py new file mode 100755 index 000000000..55ef7e36e --- /dev/null +++ b/src/crosswalk_nwm_demDerived.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 + +import geopandas as gpd +import pandas as pd +import numpy as np +import argparse +import stream_branches as sb + +from utils.shared_functions import getDriver +from utils.shared_variables import FIM_ID +from shapely.geometry import MultiLineString + +def Crosswalk_nwm_demDerived(nwm_streams, demDerived, wbd=None, node_prefix=None, sampling_size=None, + crosswalk_outfile=None, + demDerived_outfile=None, nwm_outfile=None, verbose=False): + + # load nwm streams + if isinstance(nwm_streams,sb.StreamNetwork): + pass + elif isinstance(nwm_streams,str): + nwm_streams = sb.StreamNetwork.from_file(nwm_streams) + else: + raise TypeError("For nwm_streams pass file path string or GeoDataFrame object") + + # load demDerived + if isinstance(demDerived,sb.StreamNetwork): + pass + elif isinstance(demDerived,str): + demDerived = sb.StreamNetwork.from_file(demDerived) + else: + raise TypeError("demDerived pass file path string or GeoDataFrame object") + + # clip nwm_streams + if wbd is not None: + nwm_streams = nwm_streams.clip(wbd,keep_geom_type=True,verbose=verbose) + + # build traversal to nwm + nwm_streams = Add_traversal_to_NWM(nwm_streams,node_prefix=node_prefix, + outfile=nwm_outfile, + verbose=verbose) + + # create points for nwm and demDerived networks + nwm_points = nwm_streams.explode_to_points(sampling_size=sampling_size, + verbose=verbose) + demDerived_points = demDerived.explode_to_points(sampling_size=sampling_size, + verbose=verbose) + + # conflate points + crosswalk_table = sb.StreamNetwork.conflate_points(demDerived_points, nwm_points, + source_reach_id_attribute='HydroID', + target_reach_id_attribute='ID', + verbose=verbose) + + # merge crosswalk table + crosswalk_table.rename(columns={'ID':'feature_id'},inplace=True) + demDerived.drop(columns='feature_id',inplace=True,errors='raise') + demDerived['HydroID'] = demDerived['HydroID'].astype(int) + demDerived = demDerived.merge(crosswalk_table, how='left', left_on='HydroID', right_index=True) + + if demDerived_outfile is not None: + demDerived.write(demDerived_outfile,index=False,verbose=verbose) + + if crosswalk_outfile is not None: + crosswalk_table.to_csv(crosswalk_outfile,index=True) + + #print(demDerived, crosswalk_table) + return(demDerived, crosswalk_table) + + +def Add_traversal_to_NWM(nwm_streams,node_prefix=None,outfile=None,verbose=False): + + if isinstance(nwm_streams,sb.StreamNetwork): + pass + elif isinstance(nwm_streams,str): + nwm_streams = sb.StreamNetwork.from_file(nwm_streams) + else: + raise TypeError("nwm_streams_file pass file path string or GeoDataFrame object") + + # remove multilinestrings if any + anyMultiLineStrings = np.any(np.array([isinstance(g, MultiLineString) for g in nwm_streams.geometry])) + if anyMultiLineStrings: + nwm_streams = nwm_streams.dissolve_by_branch(branch_id_attribute='ID', attribute_excluded=None, + values_excluded=None, verbose=verbose) + + + # create stream node ids + nwm_streams = nwm_streams.derive_nodes(toNode_attribute='To_Node',fromNode_attribute='From_Node', + reach_id_attribute='ID', + outlet_linestring_index=-1,node_prefix=node_prefix, + max_node_digits=8,verbose=verbose) + # inlets and outlets + nwm_streams = nwm_streams.derive_outlets(toNode_attribute='To_Node',fromNode_attribute='From_Node', + outlets_attribute='outlet_id',verbose=verbose) + nwm_streams = nwm_streams.derive_inlets(toNode_attribute='To_Node',fromNode_attribute='From_Node', + inlets_attribute='inlet_id',verbose=verbose) + + # upstream and downstream dictionaries + upstreams,downstreams = nwm_streams.make_up_and_downstream_dictionaries(reach_id_attribute='ID', + toNode_attribute='To_Node', + fromNode_attribute='From_Node', + verbose=verbose) + + # derive arbolate sum + nwm_streams = nwm_streams.get_arbolate_sum(arbolate_sum_attribute='arbolate_sum',inlets_attribute='inlet_id', + reach_id_attribute='ID',length_conversion_factor_to_km = 0.001, + upstreams=upstreams, downstreams=downstreams, + toNode_attribute='To_Node', + fromNode_attribute='From_Node', + verbose=verbose + ) + + # derive levelpaths + nwm_streams = nwm_streams.derive_stream_branches(toNode_attribute='To_Node', + fromNode_attribute='From_Node', + upstreams=upstreams, + outlet_attribute='outlet_id', + branch_id_attribute='levpa_id', + reach_id_attribute='ID', + comparison_attributes='order_', + comparison_function=max, + max_branch_id_digits=6, + verbose=verbose) + + #nwm_streams = nwm_streams.dissolve_by_branch(branch_id_attribute='levpa_id', attribute_excluded=None, + # values_excluded=None, verbose=verbose) + + nwm_streams.reset_index(drop=True,inplace=True) + + if outfile is not None: + nwm_streams.write(outfile,index=False,verbose=verbose) + + return(nwm_streams) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Crosswalking') + parser.add_argument('-n','--nwm-streams', help='NWM Streams', required=True) + parser.add_argument('-d','--demDerived', help='demDerived Streams', required=True) + parser.add_argument('-w','--wbd', help='WBD File', required=False,default=None) + parser.add_argument('-p','--node-prefix', help='Node Prefix', required=False,default=None) + parser.add_argument('-a','--sampling-size', help='Sample size for Points', required=False,default=None,type=int) + parser.add_argument('-c','--crosswalk-outfile', help='Crosswalk Out File', required=False,default=None) + parser.add_argument('-e','--demDerived-outfile', help='demDerived Out File', required=False,default=None) + parser.add_argument('-m','--nwm-outfile', help='NWM Streams Out File', required=False,default=None) + parser.add_argument('-v','--verbose', help='Verbose', required=False,default=False,action='store_true') + + kwargs = vars(parser.parse_args()) + + Crosswalk_nwm_demDerived(**kwargs) diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh new file mode 100755 index 000000000..51cbad515 --- /dev/null +++ b/src/delineate_hydros_and_produce_HAND.sh @@ -0,0 +1,187 @@ +#!/bin/bash -e + +## Level is equal to the parent script: 'unit' or 'branch' +level=$1 + +## INITIALIZE TOTAL TIME TIMER ## +T_total_start + +## MASK LEVEE-PROTECTED AREAS FROM DEM ## +if [ "$mask_leveed_area_toggle" = "True" ] && [ -f $outputHucDataDir/LeveeProtectedAreas_subset.gpkg ]; then + echo -e $startDiv"Mask levee-protected areas from DEM (*Overwrite dem_meters.tif output) $hucNumber $branch_zero_id" + date -u + Tstart + python3 -m memory_profiler $srcDir/mask_dem.py -dem $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -nld $outputHucDataDir/LeveeProtectedAreas_subset.gpkg -out $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -b $branch_id_attribute -i $current_branch_id -b0 $branch_zero_id -csv $outputHucDataDir/levee_levelpaths.csv -l $levee_id_attribute + Tcount +fi + +## D8 FLOW ACCUMULATIONS ## +echo -e $startDiv"D8 Flow Accumulations $hucNumber $current_branch_id" +date -u +Tstart +$taudemDir/aread8 -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -ad8 $outputCurrentBranchDataDir/flowaccum_d8_burned_filled_$current_branch_id.tif -wg $outputCurrentBranchDataDir/headwaters_$current_branch_id.tif -nc +Tcount + +# THRESHOLD ACCUMULATIONS ## +echo -e $startDiv"Threshold Accumulations $hucNumber $current_branch_id" +date -u +Tstart +$taudemDir/threshold -ssa $outputCurrentBranchDataDir/flowaccum_d8_burned_filled_$current_branch_id.tif -src $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -thresh 1 +Tcount + +## PREPROCESSING FOR LATERAL THALWEG ADJUSTMENT ### +echo -e $startDiv"Preprocessing for lateral thalweg adjustment $hucNumber $current_branch_id" +date -u +Tstart +python3 -m memory_profiler $srcDir/unique_pixel_and_allocation.py -s $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/demDerived_streamPixels_ids_$current_branch_id.tif +Tcount + +## ADJUST THALWEG MINIMUM USING LATERAL ZONAL MINIMUM ## +echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber $current_branch_id" +date -u +Tstart +python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -s $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -a $outputCurrentBranchDataDir/demDerived_streamPixels_ids_"$current_branch_id"_allo.tif -d $outputCurrentBranchDataDir/demDerived_streamPixels_ids_"$current_branch_id"_dist.tif -t 50 -o $outputCurrentBranchDataDir/dem_lateral_thalweg_adj_$current_branch_id.tif -th $thalweg_lateral_elev_threshold +Tcount + +## MASK BURNED DEM FOR STREAMS ONLY ### +echo -e $startDiv"Mask Burned DEM for Thalweg Only $hucNumber $current_branch_id" +date -u +Tstart +gdal_calc.py --quiet --type=Int32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -B $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif --calc="A/B" --outfile="$outputCurrentBranchDataDir/flowdir_d8_burned_filled_flows_$current_branch_id.tif" --NoDataValue=0 +Tcount + +## FLOW CONDITION STREAMS ## +echo -e $startDiv"Flow Condition Thalweg $hucNumber $current_branch_id" +date -u +Tstart +$taudemDir/flowdircond -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_flows_$current_branch_id.tif -z $outputCurrentBranchDataDir/dem_lateral_thalweg_adj_$current_branch_id.tif -zfdc $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif +Tcount + +## D8 SLOPES ## +echo -e $startDiv"D8 Slopes from DEM $hucNumber $current_branch_id" +date -u +Tstart +mpiexec -n $ncores_fd $taudemDir2/d8flowdir -fel $outputCurrentBranchDataDir/dem_lateral_thalweg_adj_$current_branch_id.tif -sd8 $outputCurrentBranchDataDir/slopes_d8_dem_meters_$current_branch_id.tif +Tcount + +## STREAMNET FOR REACHES ## +echo -e $startDiv"Stream Net for Reaches $hucNumber $current_branch_id" +date -u +Tstart +$taudemDir/streamnet -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -fel $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -ad8 $outputCurrentBranchDataDir/flowaccum_d8_burned_filled_$current_branch_id.tif -src $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -ord $outputCurrentBranchDataDir/streamOrder_$current_branch_id.tif -tree $outputCurrentBranchDataDir/treeFile_$current_branch_id.txt -coord $outputCurrentBranchDataDir/coordFile_$current_branch_id.txt -w $outputCurrentBranchDataDir/sn_catchments_reaches_$current_branch_id.tif -net $outputCurrentBranchDataDir/demDerived_reaches_$current_branch_id.shp +Tcount + +## SPLIT DERIVED REACHES ## +echo -e $startDiv"Split Derived Reaches $hucNumber $current_branch_id" +date -u +Tstart +$srcDir/split_flows.py -f $outputCurrentBranchDataDir/demDerived_reaches_$current_branch_id.shp -d $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -s $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -p $outputCurrentBranchDataDir/demDerived_reaches_split_points_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -l $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_$current_branch_id.gpkg +Tcount + +## GAGE WATERSHED FOR REACHES ## +echo -e $startDiv"Gage Watershed for Reaches $hucNumber $current_branch_id" +date -u +Tstart +mpiexec -n $ncores_gw $taudemDir/gagewatershed -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -gw $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.tif -o $outputCurrentBranchDataDir/demDerived_reaches_split_points_$current_branch_id.gpkg -id $outputCurrentBranchDataDir/idFile_$current_branch_id.txt +Tcount + +## VECTORIZE FEATURE ID CENTROIDS ## +echo -e $startDiv"Vectorize Pixel Centroids $hucNumber $current_branch_id" +date -u +Tstart +$srcDir/reachID_grid_to_vector_points.py -r $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -i featureID -p $outputCurrentBranchDataDir/flows_points_pixels_$current_branch_id.gpkg +Tcount + +## GAGE WATERSHED FOR PIXELS ## +echo -e $startDiv"Gage Watershed for Pixels $hucNumber $current_branch_id" +date -u +Tstart +mpiexec -n $ncores_gw $taudemDir/gagewatershed -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_"$current_branch_id".tif -gw $outputCurrentBranchDataDir/gw_catchments_pixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/flows_points_pixels_$current_branch_id.gpkg -id $outputCurrentBranchDataDir/idFile_$current_branch_id.txt +Tcount + +# D8 REM ## +echo -e $startDiv"D8 REM $hucNumber $current_branch_id" +date -u +Tstart +$srcDir/make_rem.py -d $outputCurrentBranchDataDir/dem_thalwegCond_"$current_branch_id".tif -w $outputCurrentBranchDataDir/gw_catchments_pixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/rem_$current_branch_id.tif -t $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif +Tcount + +## BRING DISTANCE DOWN TO ZERO & MASK TO CATCHMENTS## +echo -e $startDiv"Bring negative values in REM to zero and mask to catchments $hucNumber $current_branch_id" +date -u +gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/rem_$current_branch_id.tif -B $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.tif --calc="(A*(A>=0)*(B>0))" --NoDataValue=$ndv --outfile=$outputCurrentBranchDataDir/"rem_zeroed_masked_$current_branch_id.tif" +Tcount + +## RASTERIZE LANDSEA (OCEAN AREA) POLYGON (IF APPLICABLE) ## +if [ -f $outputHucDataDir/LandSea_subset.gpkg ]; then + echo -e $startDiv"Rasterize filtered/dissolved ocean/Glake polygon $hucNumber $current_branch_id" + date -u + Tstart + gdal_rasterize -ot Int32 -burn $ndv -a_nodata $ndv -init 1 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/LandSea_subset.gpkg $outputCurrentBranchDataDir/LandSea_subset_$current_branch_id.tif + Tcount +fi + +## POLYGONIZE REACH WATERSHEDS ## +echo -e $startDiv"Polygonize Reach Watersheds $hucNumber $current_branch_id" +date -u +Tstart +gdal_polygonize.py -8 -f GPKG $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.tif $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.gpkg catchments HydroID +Tcount + +## PROCESS CATCHMENTS AND MODEL STREAMS STEP 1 ## +echo -e $startDiv"Process catchments and model streams $hucNumber $current_branch_id" +date -u +Tstart +python3 -m memory_profiler $srcDir/filter_catchments_and_add_attributes.py -i $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -c $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -o $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -u $hucNumber +Tcount + +## RASTERIZE NEW CATCHMENTS AGAIN ## +echo -e $startDiv"Rasterize filtered catchments $hucNumber $current_branch_id" +date -u +Tstart +gdal_rasterize -ot Int32 -a HydroID -a_nodata 0 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.tif +Tcount + +## MASK SLOPE TO CATCHMENTS ## +echo -e $startDiv"Mask to slopes to catchments $hucNumber $current_branch_id" +date -u +gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/slopes_d8_dem_meters_$current_branch_id.tif -B $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.tif --calc="A*(B>0)" --NoDataValue=$ndv --outfile=$outputCurrentBranchDataDir/slopes_d8_dem_meters_masked_$current_branch_id.tif +Tcount + +## MAKE CATCHMENT AND STAGE FILES ## +echo -e $startDiv"Generate Catchment List and Stage List Files $hucNumber $current_branch_id" +date -u +Tstart +$srcDir/make_stages_and_catchlist.py -f $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -c $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -s $outputCurrentBranchDataDir/stage_$current_branch_id.txt -a $outputCurrentBranchDataDir/catch_list_$current_branch_id.txt -m $stage_min_meters -i $stage_interval_meters -t $stage_max_meters +Tcount + +## MASK REM RASTER TO REMOVE OCEAN AREAS ## +if [ -f $outputCurrentBranchDataDir/LandSea_subset_$current_branch_id.tif ]; then + echo -e $startDiv"Additional masking to REM raster to remove ocean/Glake areas $hucNumber $current_branch_id" + date -u + Tstart + gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/rem_zeroed_masked_$current_branch_id.tif -B $outputCurrentBranchDataDir/LandSea_subset_$current_branch_id.tif --calc="(A*B)" --NoDataValue=$ndv --outfile=$outputCurrentBranchDataDir/"rem_zeroed_masked_$current_branch_id.tif" + Tcount +fi + +## HYDRAULIC PROPERTIES ## +echo -e $startDiv"Sample reach averaged parameters $hucNumber $current_branch_id" +date -u +Tstart +$taudemDir/catchhydrogeo -hand $outputCurrentBranchDataDir/rem_zeroed_masked_$current_branch_id.tif -catch $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.tif -catchlist $outputCurrentBranchDataDir/catch_list_$current_branch_id.txt -slp $outputCurrentBranchDataDir/slopes_d8_dem_meters_masked_$current_branch_id.tif -h $outputCurrentBranchDataDir/stage_$current_branch_id.txt -table $outputCurrentBranchDataDir/src_base_$current_branch_id.csv +Tcount + +## FINALIZE CATCHMENTS AND MODEL STREAMS ## +echo -e $startDiv"Finalize catchments and model streams $hucNumber $current_branch_id" +date -u +Tstart +if [ "$level" = "branch" ]; then + b_arg=$outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_$current_branch_id.gpkg + z_arg=$outputCurrentBranchDataDir/nwm_catchments_proj_subset_levelPaths_$current_branch_id.gpkg +elif [ "$level" = "unit" ]; then + # Branch zero has a different source for -b and -z arguments + b_arg=$outputHucDataDir/nwm_subset_streams.gpkg + z_arg=$outputHucDataDir/nwm_catchments_proj_subset.gpkg +fi +python3 -m memory_profiler $srcDir/add_crosswalk.py -d $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -a $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -s $outputCurrentBranchDataDir/src_base_$current_branch_id.csv -l $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -r $outputCurrentBranchDataDir/src_full_crosswalked_$current_branch_id.csv -j $outputCurrentBranchDataDir/src_$current_branch_id.json -x $outputCurrentBranchDataDir/crosswalk_table_$current_branch_id.csv -t $outputCurrentBranchDataDir/hydroTable_$current_branch_id.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $b_arg -y $outputCurrentBranchDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $z_arg -k $outputCurrentBranchDataDir/small_segments_$current_branch_id.csv +Tcount diff --git a/src/derive_level_paths.py b/src/derive_level_paths.py new file mode 100755 index 000000000..f34dc494c --- /dev/null +++ b/src/derive_level_paths.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +import os +import argparse +import geopandas as gpd +import sys + +from stream_branches import StreamNetwork +from utils.fim_enums import FIM_exit_codes + +def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribute, + out_stream_network_dissolved=None, huc_id=None, + headwaters_outfile=None, catchments=None, waterbodies=None, + catchments_outfile=None, + branch_inlets_outfile=None, + toNode_attribute='To_Node', fromNode_attribute='From_Node', + reach_id_attribute='HydroID', verbose=False): + + + if verbose: + print("Deriving level paths ...") + + # load file + if verbose: + print("Loading stream network ...") + + if os.path.exists(in_stream_network): + stream_network = StreamNetwork.from_file(filename=in_stream_network) + else: + print("Sorry, no branches exist and processing can not continue. This could be an empty file.") + sys.exit(FIM_exit_codes.UNIT_NO_BRANCHES.value) # will send a 60 back + + # if there are no reaches at this point + if (len(stream_network) == 0): + # This is technically not an error but we need to have it logged so the user know what + # happened to it and we need the huc to not be included in future processing. + # We need it to be not included in the fim_input.csv at the end of the unit processing. + # Throw an exception with valid text. This will show up in the non-zero exit codes and explain why an error. + # Later, we can look at creating custom sys exit codes + # raise UserWarning("Sorry, no branches exist and processing can not continue. This could be an empty file.") + print("Sorry, no branches exist and processing can not continue. This could be an empty file.") + sys.exit(FIM_exit_codes.UNIT_NO_BRANCHES.value) # will send a 60 back + + # values_exluded of 1 and 2 mean where are dropping stream orders 1 and 2. We are leaving those + # for branch zero. + stream_network = stream_network.exclude_attribute_values(branch_id_attribute="order_", values_excluded=[1,2] ) + + # if there are no reaches at this point (due to filtering) + if (len(stream_network) == 0): + print("No branches exist but branch zero processing will continue. This could be due to stream order filtering.") + return + + inlets_attribute = 'inlet_id' + outlets_attribute = 'outlet_id' + outlet_linestring_index = -1 + + # converts multi-linestrings to linestrings + stream_network = stream_network.multilinestrings_to_linestrings() + + # derive nodes + stream_network = stream_network.derive_nodes(toNode_attribute=toNode_attribute, + fromNode_attribute=fromNode_attribute, + reach_id_attribute=reach_id_attribute, + outlet_linestring_index=outlet_linestring_index, + node_prefix=None, + verbose=verbose) + + # derive outlets and inlets + stream_network = stream_network.derive_outlets(toNode_attribute, + fromNode_attribute, + outlets_attribute=outlets_attribute, + verbose=verbose) + + stream_network = stream_network.derive_inlets(toNode_attribute, + fromNode_attribute, + inlets_attribute=inlets_attribute, + verbose=verbose + ) # derive up and downstream networks + upstreams, downstreams = stream_network.make_up_and_downstream_dictionaries( + reach_id_attribute=reach_id_attribute, + toNode_attribute=toNode_attribute, + fromNode_attribute=fromNode_attribute, + verbose=True) + + # derive arbolate sum + stream_network = stream_network.get_arbolate_sum(arbolate_sum_attribute='arbolate_sum', + inlets_attribute=inlets_attribute, + reach_id_attribute=reach_id_attribute, + upstreams=upstreams, + downstreams=downstreams, + length_conversion_factor_to_km = 0.001, + verbose=verbose) + + # derive stream branches + stream_network = stream_network.derive_stream_branches(toNode_attribute=toNode_attribute, + fromNode_attribute=fromNode_attribute, + upstreams=upstreams, + branch_id_attribute=branch_id_attribute, + reach_id_attribute=reach_id_attribute, + comparison_attributes=['arbolate_sum', 'order_'], + comparison_function=max, + verbose=verbose) + + # filter out streams without catchments + if (catchments is not None) & (catchments_outfile is not None): + catchments = gpd.read_file(catchments) + + stream_network = stream_network.remove_branches_without_catchments( + catchments, + reach_id_attribute=reach_id_attribute, + branch_id_attribute=branch_id_attribute, + reach_id_attribute_in_catchments=reach_id_attribute, + verbose=verbose) + + # subset which columns to merge + stream_network_to_merge = stream_network.filter(items = [reach_id_attribute,inlets_attribute, + outlets_attribute,branch_id_attribute]) + + catchments = catchments.merge(stream_network_to_merge,how='inner', + left_on=reach_id_attribute, + right_on=reach_id_attribute) + + catchments.reset_index(drop=True, inplace=True) + + catchments.to_file(catchments_outfile, index=False, driver='GPKG') + + # derive headwaters + if (headwaters_outfile is not None): + headwaters = stream_network.derive_headwater_points_with_inlets( + fromNode_attribute=fromNode_attribute, + inlets_attribute=inlets_attribute, + outlet_linestring_index=outlet_linestring_index) + # headwaters write + headwaters.to_file(headwaters_outfile, index=False, driver='GPKG') + + if out_stream_network is not None: + if verbose: + print("Writing stream branches ...") + stream_network.write(out_stream_network, index=True) + + if out_stream_network_dissolved is not None: + stream_network = stream_network.trim_branches_in_waterbodies(branch_id_attribute=branch_id_attribute, + verbose=verbose) + + # dissolve by levelpath + stream_network = stream_network.dissolve_by_branch(branch_id_attribute=branch_id_attribute, + attribute_excluded=None, #'order_', + values_excluded=None, #[1,2], + out_vector_files=out_stream_network_dissolved, + verbose=verbose) + + stream_network = stream_network.remove_branches_in_waterbodies(waterbodies=waterbodies, + out_vector_files=out_stream_network_dissolved, + verbose=False) + + if branch_inlets_outfile is not None: + branch_inlets = stream_network.derive_inlet_points_by_feature(feature_attribute=branch_id_attribute, + outlet_linestring_index=outlet_linestring_index) + + branch_inlets.to_file(branch_inlets_outfile, index=False, driver='GPKG') + + return(stream_network) + + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Create stream network level paths') + parser.add_argument('-i','--in-stream-network', help='Input stream network', required=True) + parser.add_argument('-b','--branch-id-attribute', help='Name of the branch attribute desired', required=True) + parser.add_argument('-u','--huc-id', help='Current HUC ID', required=False, default=None) + parser.add_argument('-r','--reach-id-attribute', help='Reach ID attribute to use in source file', required=False, default='HydroID') + parser.add_argument('-c','--catchments', help='NWM catchments to append level path data to', required=False, default=None) + parser.add_argument('-t','--catchments-outfile', help='NWM catchments outfile with appended level path data', required=False, default=None) + parser.add_argument('-w','--waterbodies', help='NWM waterbodies to eliminate branches from', required=False, default=None) + parser.add_argument('-n','--branch_inlets_outfile', help='Output level paths inlets', required=False, default=None) + parser.add_argument('-o','--out-stream-network', help='Output stream network', required=False, default=None) + parser.add_argument('-e','--headwaters-outfile', help='Output stream network headwater points', required=False, default=None) + parser.add_argument('-d','--out-stream-network-dissolved', help='Dissolved output stream network', required=False, default=None) + parser.add_argument('-v','--verbose', help='Verbose output', required=False, default=False, action='store_true') + + args = vars(parser.parse_args()) + + Derive_level_paths(**args) + diff --git a/src/edit_points.py b/src/edit_points.py new file mode 100755 index 000000000..400386cc9 --- /dev/null +++ b/src/edit_points.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import geopandas as gpd +import argparse + + +def Edit_points(stream_reaches,branch_id_attribute,reach_points, + out_reach_points=None,out_pixel_points=None,verbose=False): + + if verbose: + print("Editing points files ...") + + if verbose: + print("Loading files ...") + stream_reaches = gpd.read_file(stream_reaches) + stream_reaches = stream_reaches.astype({'HydroID':int}) + + reach_points=gpd.read_file(reach_points) + reach_points['HydroID'] = reach_points['id'].copy() + + # merge + if verbose: + print("Merging points ...") + reach_points = reach_points.merge(stream_reaches.loc[:,["HydroID",branch_id_attribute]],how='inner',on='HydroID') + + # join on HydroID to add branch_id + if out_reach_points is not None: + reach_points.to_file(out_reach_points,driver='GPKG',index=False) + + # make pixel points + if verbose: + print("Generating pixel points ...") + + pixel_points = reach_points.copy() + pixel_points['id'] = list(range(1,len(pixel_points)+1)) + + if out_pixel_points is not None: + pixel_points.to_file(out_pixel_points,driver='GPKG',index=False) + + return(reach_points,pixel_points) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Edit points to include branch ids') + parser.add_argument('-i','--stream-reaches', help='Input stream network', required=True) + parser.add_argument('-b','--branch-id-attribute', help='Name of the branch attribute desired', required=True) + parser.add_argument('-r','--reach-points', help='Name of the branch attribute desired', required=True) + parser.add_argument('-o','--out-reach-points', help='Output stream network', required=False,default=None) + parser.add_argument('-p','--out-pixel-points', help='Dissolved output stream network', required=False,default=None) + parser.add_argument('-v','--verbose', help='Verbose output', required=False,default=False,action='store_true') + + args = vars(parser.parse_args()) + + Edit_points(**args) diff --git a/src/filter_catchments_and_add_attributes.py b/src/filter_catchments_and_add_attributes.py index 17aa5c611..7801b613d 100755 --- a/src/filter_catchments_and_add_attributes.py +++ b/src/filter_catchments_and_add_attributes.py @@ -4,27 +4,38 @@ import geopandas as gpd import numpy as np import sys + from utils.shared_variables import FIM_ID from utils.shared_functions import mem_profile - +from utils.fim_enums import FIM_exit_codes @mem_profile -def filter_catchments_and_add_attributes(input_catchments_filename, input_flows_filename, output_catchments_filename, output_flows_filename, wbd_filename, huc_code): +def filter_catchments_and_add_attributes(input_catchments_filename, + input_flows_filename, + output_catchments_filename, + output_flows_filename, + wbd_filename, + huc_code): + input_catchments = gpd.read_file(input_catchments_filename) wbd = gpd.read_file(wbd_filename) input_flows = gpd.read_file(input_flows_filename) # filter segments within huc boundary select_flows = tuple(map(str,map(int,wbd[wbd.HUC8.str.contains(huc_code)][FIM_ID]))) - - if input_flows.HydroID.dtype != 'str': input_flows.HydroID = input_flows.HydroID.astype(str) + + if input_flows.HydroID.dtype != 'str': + input_flows.HydroID = input_flows.HydroID.astype(str) output_flows = input_flows[input_flows.HydroID.str.startswith(select_flows)].copy() - if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) + + if output_flows.HydroID.dtype != 'int': + output_flows.HydroID = output_flows.HydroID.astype(int) if len(output_flows) > 0: # merges input flows attributes and filters hydroids - if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) + if input_catchments.HydroID.dtype != 'int': + input_catchments.HydroID = input_catchments.HydroID.astype(int) output_catchments = input_catchments.merge(output_flows.drop(['geometry'],axis=1),on='HydroID') # filter out smaller duplicate features @@ -40,19 +51,21 @@ def filter_catchments_and_add_attributes(input_catchments_filename, input_flows_ # add geometry column output_catchments['areasqkm'] = output_catchments.geometry.area/(1000**2) - output_catchments.to_file(output_catchments_filename, driver="GPKG",index=False) - output_flows.to_file(output_flows_filename, driver="GPKG", index=False) + try: + output_catchments.to_file(output_catchments_filename, driver="GPKG",index=False) + output_flows.to_file(output_flows_filename, driver="GPKG", index=False) + except ValueError: + # this is not an exception, but a custom exit code that can be trapped + print("There are no flowlines in the HUC after stream order filtering.") + sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back + else: + # this is not an exception, but a custom exit code that can be trapped + print("There are no flowlines in the HUC after stream order filtering.") + sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back if __name__ == '__main__': - input_catchments_filename = sys.argv[1] - input_flows_filename = sys.argv[2] - output_catchments_filename = sys.argv[3] - output_flows_filename = sys.argv[4] - wbd_filename = sys.argv[5] - huc_code = str(sys.argv[6]) - # Parse arguments. parser = argparse.ArgumentParser(description='filter_catchments_and_add_attributes.py') parser.add_argument('-i', '--input-catchments-filename', help='input-catchments-filename', required=True) @@ -61,7 +74,7 @@ def filter_catchments_and_add_attributes(input_catchments_filename, input_flows_ parser.add_argument('-o', '--output-flows-filename', help='output-flows-filename', required=True) parser.add_argument('-w', '--wbd-filename', help='wbd-filename', required=True) parser.add_argument('-u', '--huc-code', help='huc-code', required=True) - + # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) diff --git a/src/filter_inputs_by_huc.py b/src/filter_inputs_by_huc.py new file mode 100755 index 000000000..4543adacc --- /dev/null +++ b/src/filter_inputs_by_huc.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 + +import pandas as pd +import argparse + +def filter_inputs_by_huc(fim_inputs, hucs, fim_outputs): + + try: + with open(hucs[0]) as hf: + hucsList = set([str(h).rstrip() for h in hf]) + except FileNotFoundError: + hucsList = set(hucs) + + fim_inputs = pd.read_csv(fim_inputs,header=None,dtype=str) + fim_inputs_mask = fim_inputs.loc[:,0].isin(hucsList) + fim_inputs = fim_inputs.loc[fim_inputs_mask,:] + + assert len(fim_inputs) > 0, "Filtered FIM list is empty" + + fim_inputs.to_csv(fim_outputs, index=False, header=False) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Adjusts the elevation of the thalweg to the lateral zonal minimum.') + parser.add_argument('-g','--fim-inputs',help='Raster of elevation.',required=True) + parser.add_argument('-u','--hucs',help='Raster of elevation.',required=True,nargs='+') + parser.add_argument('-o','--fim-outputs',help='Raster of elevation.',required=True) + + args = vars(parser.parse_args()) + + filter_inputs_by_huc(**args) diff --git a/src/finalize_srcs.py b/src/finalize_srcs.py new file mode 100755 index 000000000..46b61ca8b --- /dev/null +++ b/src/finalize_srcs.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import geopandas as gpd +import pandas as pd +from numpy import unique +import json +import argparse +from utils.shared_functions import getDriver + +def finalize_srcs(srcbase,srcfull,hydrotable,output_srcfull=None,output_hydrotable=None): + + + # calculate src_full + srcbase = pd.read_csv(srcbase, dtype= {'CatchId': int}) + srcbase.rename(columns={'CatchId':'HydroID'},inplace=True) + srcbase = srcbase.rename(columns=lambda x: x.strip(" ")) + + # read and merge in attributes from base hydrofabric src full + srcfull = pd.read_csv(srcfull, dtype={'CatchId': int}) + srcfull.rename(columns={'CatchId':'HydroID'},inplace=True) + srcfull = srcfull.loc[:,["ManningN","HydroID","feature_id"]].drop_duplicates(subset='HydroID') + + srcbase = srcbase.merge(srcfull,how='inner',left_on='HydroID',right_on='HydroID') + + srcbase = srcbase.apply(pd.to_numeric,**{'errors' : 'coerce'}) + srcbase['TopWidth (m)'] = srcbase['SurfaceArea (m2)']/srcbase['LENGTHKM']/1000 + srcbase['WettedPerimeter (m)'] = srcbase['BedArea (m2)']/srcbase['LENGTHKM']/1000 + srcbase['WetArea (m2)'] = srcbase['Volume (m3)']/srcbase['LENGTHKM']/1000 + srcbase['HydraulicRadius (m)'] = srcbase['WetArea (m2)']/srcbase['WettedPerimeter (m)'] + srcbase['HydraulicRadius (m)'].fillna(0, inplace=True) + srcbase['Discharge (m3s-1)'] = srcbase['WetArea (m2)']* \ + pow(srcbase['HydraulicRadius (m)'],2.0/3)* \ + pow(srcbase['SLOPE'],0.5)/srcbase['ManningN'] + + # set nans to 0 + srcbase.loc[srcbase['Stage']==0,['Discharge (m3s-1)']] = 0 + + if output_srcfull is not None: + srcbase.to_csv(output_srcfull,index=False) + + hydrotable = pd.read_csv(hydrotable) + hydrotable.drop(columns=['stage','discharge_cms'],inplace=True) + + hydrotable.drop_duplicates(subset='HydroID',inplace=True) + #srcfull = srcfull.loc[:,["ManningN","HydroID","feature_id"]].drop_duplicates(subset='HydroID') + hydrotable = hydrotable.merge(srcbase.loc[:,['HydroID','Stage','Discharge (m3s-1)']],how='right',left_on='HydroID',right_on='HydroID') + hydrotable.rename(columns={'Stage' : 'stage','Discharge (m3s-1)':'discharge_cms'},inplace=True) + #hydrotable.drop_duplicates(subset='stage',inplace=True) + + if output_hydrotable is not None: + hydrotable.to_csv(output_hydrotable,index=False) + + return(srcbase,hydrotable) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='') + parser.add_argument('-b','--srcbase', help='Base synthetic rating curve table', required=True) + parser.add_argument('-f','--srcfull', help='Base synthetic rating curve table', required=True) + parser.add_argument('-w','--hydrotable',help='Input Hydro-Table',required=False) + parser.add_argument('-r','--output-srcfull', help='Output crosswalked synthetic rating curve table', required=False,default=None) + parser.add_argument('-t','--output-hydrotable',help='Hydrotable',required=False,default=None) + + args = vars(parser.parse_args()) + + finalize_srcs(**args) diff --git a/src/fr_to_ms_raster_mask.py b/src/fr_to_ms_raster_mask.py deleted file mode 100755 index 2ebc713f0..000000000 --- a/src/fr_to_ms_raster_mask.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env python3 - -''' - Description: Mask raster layers using 'mainstems' stream buffer -''' - -import sys -import os -import argparse -import geopandas as gpd -import rasterio.mask -from utils.shared_functions import mem_profile - - -@mem_profile -def fr_to_ms_raster_mask(ms_buffer_dist, split_flows_filename, fdr_fr, dem_fr, slope_fr, fdr_ms_filename, dem_ms_filename, slope_ms_filename, str_pixel_fr, str_pixel_ms_filename): - # create output layer names - split_flows = gpd.read_file(split_flows_filename) - - # Limit the rasters to the buffer distance around the draft streams. - print ("Limiting rasters to buffer area ({} meters) around model streams".format(str(ms_buffer_dist))) - - split_flows_ms_buffer = split_flows.unary_union.buffer(ms_buffer_dist) - - print('Writing raster outputs ...') - - # Mask nhddem - with rasterio.open(dem_fr) as src: - out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) - out_meta = src.meta - - out_meta.update({"driver": "GTiff", - "height": out_image.shape[1], - "width": out_image.shape[2], - "transform": out_transform}) - - with rasterio.open(os.path.join(os.path.dirname(dem_fr), dem_ms_filename), "w", **out_meta) as dest: - dest.write(out_image) - - # Mask fdr - with rasterio.open(fdr_fr) as src: - out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) - out_meta = src.meta - - out_meta.update({"driver": "GTiff", - "height": out_image.shape[1], - "width": out_image.shape[2], - "transform": out_transform}) - - with rasterio.open(os.path.join(os.path.dirname(fdr_fr), fdr_ms_filename), "w", **out_meta) as dest: - dest.write(out_image) - - # Mask slope - with rasterio.open(slope_fr) as src: - out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) - out_meta = src.meta - - out_meta.update({"driver": "GTiff", - "height": out_image.shape[1], - "width": out_image.shape[2], - "transform": out_transform}) - - with rasterio.open(os.path.join(os.path.dirname(slope_fr), slope_ms_filename), "w", **out_meta) as dest: - dest.write(out_image) - - # Mask stream pixels - with rasterio.open(str_pixel_fr) as src: - out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) - out_meta = src.meta - - out_meta.update({"driver": "GTiff", - "height": out_image.shape[1], - "width": out_image.shape[2], - "transform": out_transform}) - - with rasterio.open(os.path.join(os.path.dirname(str_pixel_fr), str_pixel_ms_filename), "w", **out_meta) as dest: - dest.write(out_image) - - -if __name__ == '__main__': - ms_buffer_dist = int(os.environ['ms_buffer_dist']) - - # Parse arguments. - parser = argparse.ArgumentParser(description='fr_to_ms_raster_mask.py') - parser.add_argument('-s', '--split-flows-filename', help='split-flows-filename', required=True) - parser.add_argument('-f', '--fdr-fr', help='fdr-fr', required=True) - parser.add_argument('-d', '--dem-fr', help='dem-fr', required=True) - parser.add_argument('-r', '--slope-fr', help='slope-fr', required=True) - parser.add_argument('-m', '--fdr-ms-filename', help='fdr-ms-filename', required=True) - parser.add_argument('-n', '--dem-ms-filename', help='dem-ms-filename', required=True) - parser.add_argument('-o', '--slope-ms-filename', help='slope-ms-filename', required=True) - parser.add_argument('-p', '--str-pixel-fr', help='str-pixel-fr', required=True) - parser.add_argument('-q', '--str-pixel-ms-filename', help='str-pixel-ms-filename', required=True) - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - fr_to_ms_raster_mask(ms_buffer_dist, **args) diff --git a/src/generate_branch_list.py b/src/generate_branch_list.py new file mode 100755 index 000000000..2529a223e --- /dev/null +++ b/src/generate_branch_list.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 + +import os +import argparse +import pandas as pd +import sys + +from stream_branches import StreamNetwork + +def generate_branch_list(stream_network_dissolved, branch_id_attribute, + output_branch_list_file): + + ''' + Processing: + This create a branch_ids.lst file which is required at the very start of processing + hucs. This becomes the list that run_unit_wb.sh needs to iterate over branches + + Note: The .csv twin to this is appended to each time a branch completes, + resulting in a list that only contains successfully processed branches. + Params: + - stream_network_dissolved (str): the gkpg that contains the list of disolved branch ids + - branch_id_attribute (str): the id of the field in the gkpg that has the branch ids. + (ie. like levpa_id (from params_template.env) ) + - output_branch_list_file (str): file name and path of the list to be created. + Output: + - create a file (likely a .lst file) with branch ids (not including branch zero) + ''' + + if os.path.exists(stream_network_dissolved): + # load stream network + stream_network_dissolved = StreamNetwork.from_file( stream_network_dissolved, + branch_id_attribute=branch_id_attribute ) + # reduce to branch id attribute and convert to pandas df + stream_network_dissolved = stream_network_dissolved.loc[:,branch_id_attribute] + + # write out the list version (just branch numbers) + stream_network_dissolved.to_csv(output_branch_list_file, sep= " ", index=False, header=False) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Create branch list') + parser.add_argument('-d','--stream-network-dissolved', help='Dissolved stream network', required=True) + parser.add_argument('-b','--branch-id-attribute', help='Branch ID attribute to use in dissolved stream network', required=True) + parser.add_argument('-o','--output-branch-list-file', help='Output branch list', required=True) + + args = vars(parser.parse_args()) + + generate_branch_list(**args) diff --git a/src/generate_branch_list_csv.py b/src/generate_branch_list_csv.py new file mode 100755 index 000000000..e355e9f25 --- /dev/null +++ b/src/generate_branch_list_csv.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +import os +import argparse +import pandas as pd +import pathlib + +def generate_branch_list_csv(huc_id, branch_id, output_branch_csv): + + ''' + Processing: + This create a branch_ids.csv file which is required for various post processing tasks. + If the csv already, then the new huc, branch id wil be appended. + If it does not yet exist, a new csv will be created + + Params: + - huc_id + - branch_id + - output_branch_csv (str): csv file name and path of the list to be created. (likely branch_list.csv) + + Output: + - create a csv file (assuming the format coming in is a csv + ''' + # validations + file_extension = pathlib.Path(output_branch_csv).suffix + + if (file_extension != ".csv"): + raise ValueError("The output branch csv file does not have a .csv extension") + + if (len(huc_id) != 8) or (not huc_id.isnumeric()): + raise ValueError("The huc_id does not appear to be an eight digit number") + + if (not branch_id.isnumeric()): + raise ValueError("The branch_id does not appear to be a valid number") + + df_csv = None + new_data = [[huc_id, branch_id]] + col_names = ["huc_id","branch_id"] + df_csv = pd.DataFrame(new_data, columns=col_names) + + if (not os.path.exists(output_branch_csv)): + df_csv.to_csv(output_branch_csv, index=False, header=False) + else: + df_csv.to_csv(output_branch_csv, mode='a', index=False, header=False) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Create branch list') + parser.add_argument('-b','--branch-id', help='Branch ID', required=True) + parser.add_argument('-o','--output-branch-csv', help='Output branch csv list', required=True) + parser.add_argument('-u','--huc-id', help='HUC number being aggregated', required=True) + args = vars(parser.parse_args()) + + generate_branch_list_csv(**args) diff --git a/src/get_all_huc_in_inputs.py b/src/get_all_huc_in_inputs.py deleted file mode 100755 index af507b10b..000000000 --- a/src/get_all_huc_in_inputs.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 - -import geopandas as gpd -import pandas as pd -import numpy as np -from glob import glob -import argparse -from os.path import join -from tqdm import tqdm - - -def find_unique_hucs(inputsDir,hucLength): - - # get file list with glob - huc4_vaa_list = glob(join(inputsDir,'NHDPlusFlowlineVAA_*.gpkg')) - - unique_hucs = np.array([]) - for vaa_file in tqdm(huc4_vaa_list): - reachCodes = gpd.read_file(vaa_file)['ReachCode'] - reachCodes = reachCodes.astype(str) - reachCodes = reachCodes.apply(lambda x : x[0:8]) - unique_hucs = np.append(unique_hucs,reachCodes.apply(lambda x: x[0:hucLength]).unique()) - - unique_hucs = pd.Series(unique_hucs) - unique_hucs.to_csv(join(inputsDir,'included_huc{}.lst'.format(hucLength)),header=False,index=False) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Get unique HUCs in results data dir') - parser.add_argument('-i','--inputs-directory',help='Basins polygons to use within project path',required=True) - parser.add_argument('-l','--huc-length',help='Basins polygons to use within proiject path',required=True,type=int) - - args = vars(parser.parse_args()) - - inputsDir = args['inputs_directory'] - hucLength = args['huc_length'] - - find_unique_hucs(inputsDir,hucLength) diff --git a/src/identify_src_bankfull.py b/src/identify_src_bankfull.py index 3be19fae1..585e6ae74 100755 --- a/src/identify_src_bankfull.py +++ b/src/identify_src_bankfull.py @@ -1,123 +1,146 @@ #!/usr/bin/env python3 +import argparse import os import sys import pandas as pd -import argparse import matplotlib.pyplot as plt +import multiprocessing import seaborn as sns -from functools import reduce -from multiprocessing import Pool -from os.path import isfile, join, dirname, isdir import shutil import warnings -from pathlib import Path import datetime as dt +import re +import traceback +from pathlib import Path +from functools import reduce +from multiprocessing import Pool +from os.path import isfile, join, dirname, isdir +from tqdm import tqdm sns.set_theme(style="whitegrid") warnings.simplefilter(action='ignore', category=FutureWarning) """ - Identify the SRC bankfull stage values using the NWM 1.5yr flows + Identify the SRC bankfull stage values using the NWM bankfull estimated flows Parameters ---------- fim_dir : str Directory containing FIM output folders. bankfull_flow_dir : str - Directory containing "bankfull" flows files (e.g. NWM 1.5yr recurr). + Directory containing "bankfull" flows files (e.g. NWM bankfull estimated recurr). number_of_jobs : str Number of jobs. plots : str - Flag to create SRC plots for all hydroids (True/False) + Optional: Flag to create SRC plots for all hydroids (True/False) """ def src_bankfull_lookup(args): src_full_filename = args[0] - src_modify_filename = args[1] + src_usecols = args[1] df_bflows = args[2] huc = args[3] - src_plot_option = args[4] - huc_output_dir = args[5] + branch_id = args[4] + src_plot_option = args[5] + huc_output_dir = args[6] ## Read the src_full_crosswalked.csv - #print('Processing: ' + str(huc)) - log_text = 'Calculating: ' + str(huc) + '\n' - df_src = pd.read_csv(src_full_filename,dtype={'HydroID': int,'feature_id': int}) - - ## NWM recurr rename discharge var - df_bflows = df_bflows.rename(columns={'discharge':'discharge_1_5'}) - - ## Combine the nwm 1.5yr flows into the SRC via feature_id - df_src = df_src.merge(df_bflows,how='left',on='feature_id') - - ## Check if there are any missing data, negative or zero flow values in the discharge_1_5 - check_null = df_src['discharge_1_5'].isnull().sum() - if check_null > 0: - log_text += 'Missing feature_id in crosswalk for huc: ' + str(huc) + ' --> these featureids will be ignored in bankfull calcs (~' + str(check_null/84) + ' features) \n' - ## Fill missing/nan nwm discharge_1_5 values with -999 to handle later - df_src['discharge_1_5'] = df_src['discharge_1_5'].fillna(-999) - negative_flows = len(df_src.loc[(df_src.discharge_1_5 <= 0) & (df_src.discharge_1_5 != -999)]) - if negative_flows > 0: - log_text += 'HUC: ' + str(huc) + ' --> Negative or zero flow values found (likely lakeid loc)\n' - - ## Define the channel geometry variable names to use from the src - hradius_var = 'HydraulicRadius (m)' - volume_var = 'Volume (m3)' - - ## Locate the closest SRC discharge value to the NWM 1.5yr flow - df_src['Q_1_5_find'] = (df_src['discharge_1_5'] - df_src['Discharge (m3s-1)']).abs() - - ## Check for any missing/null entries in the input SRC - if df_src['Q_1_5_find'].isnull().values.any(): # there may be null values for lake or coastal flow lines (need to set a value to do groupby idxmin below) - log_text += 'HUC: ' + str(huc) + ' --> Null values found in "Q_1_5_find" calc. These will be filled with 999999 () \n' - ## Fill missing/nan nwm 'Discharge (m3s-1)' values with 999999 to handle later - df_src['Q_1_5_find'] = df_src['Q_1_5_find'].fillna(999999) - if df_src['HydroID'].isnull().values.any(): - log_text += 'HUC: ' + str(huc) + ' --> Null values found in "HydroID"... \n' - - df_1_5 = df_src[['Stage','HydroID',volume_var,hradius_var,'Q_1_5_find']] # create new subset df to perform the Q_1_5 lookup - df_1_5 = df_1_5[df_1_5['Stage'] > 0.0] # Ensure bankfull stage is greater than stage=0 - df_1_5.reset_index(drop=True, inplace=True) - df_1_5 = df_1_5.loc[df_1_5.groupby('HydroID')['Q_1_5_find'].idxmin()].reset_index(drop=True) # find the index of the Q_1_5_find (closest matching flow) - df_1_5 = df_1_5.rename(columns={'Stage':'Stage_1_5',volume_var:'Volume_bankfull',hradius_var:'HRadius_bankfull'}) # rename volume to use later for channel portion calc - df_src = df_src.merge(df_1_5[['Stage_1_5','HydroID','Volume_bankfull','HRadius_bankfull']],how='left',on='HydroID') - df_src.drop(['Q_1_5_find'], axis=1, inplace=True) - - ## Calculate the channel portion of bankfull Volume - df_src['chann_volume_ratio'] = 1.0 # At stage=0 set channel_ratio to 1.0 (avoid div by 0) - df_src['chann_volume_ratio'].where(df_src['Stage'] == 0, df_src['Volume_bankfull'] / (df_src[volume_var]),inplace=True) - #df_src['chann_volume_ratio'] = df_src['chann_volume_ratio'].clip_upper(1.0) - df_src['chann_volume_ratio'].where(df_src['chann_volume_ratio'] <= 1.0, 1.0, inplace=True) # set > 1.0 ratio values to 1.0 (these are within the channel) - df_src['chann_volume_ratio'].where(df_src['discharge_1_5'] > 0.0, 0.0, inplace=True) # if the discharge_1_5 value <= 0 then set channel ratio to 0 (will use global overbank manning n) - #df_src.drop(['Volume_bankfull'], axis=1, inplace=True) - - ## Calculate the channel portion of bankfull Hydraulic Radius - df_src['chann_hradius_ratio'] = 1.0 # At stage=0 set channel_ratio to 1.0 (avoid div by 0) - df_src['chann_hradius_ratio'].where(df_src['Stage'] == 0, df_src['HRadius_bankfull'] / (df_src[hradius_var]),inplace=True) - #df_src['chann_hradius_ratio'] = df_src['HRadius_bankfull'] / (df_src[hradius_var]+.0001) # old adding 0.01 to avoid dividing by 0 at stage=0 - df_src['chann_hradius_ratio'].where(df_src['chann_hradius_ratio'] <= 1.0, 1.0, inplace=True) # set > 1.0 ratio values to 1.0 (these are within the channel) - df_src['chann_hradius_ratio'].where(df_src['discharge_1_5'] > 0.0, 0.0, inplace=True) # if the discharge_1_5 value <= 0 then set channel ratio to 0 (will use global overbank manning n) - #df_src.drop(['HRadius_bankfull'], axis=1, inplace=True) - - ## mask bankfull variables when the 1.5yr flow value is <= 0 - df_src['Stage_1_5'].mask(df_src['discharge_1_5'] <= 0.0,inplace=True) - - ## Create a new column to identify channel/floodplain via the bankfull stage value - df_src.loc[df_src['Stage'] <= df_src['Stage_1_5'], 'channel_fplain_1_5'] = 'channel' - df_src.loc[df_src['Stage'] > df_src['Stage_1_5'], 'channel_fplain_1_5'] = 'floodplain' - df_src['channel_fplain_1_5'] = df_src['channel_fplain_1_5'].fillna('channel') - - ## Output new SRC with bankfull column - df_src.to_csv(src_modify_filename,index=False) - log_text += 'Completed: ' + str(huc) - - ## plot rating curves (optional arg) - if src_plot_option == 'True': - if isdir(huc_output_dir) == False: - os.mkdir(huc_output_dir) - generate_src_plot(df_src, huc_output_dir) - + print('Calculating bankfull: ' + str(huc) + ' branch id: ' + str(branch_id)) + log_text = 'Calculating: ' + str(huc) + ' branch id: ' + str(branch_id) + '\n' + try: + df_src = pd.read_csv(src_full_filename,usecols=src_usecols,dtype={'HydroID': int,'feature_id': int}) + + ## NWM recurr rename discharge var + df_bflows = df_bflows.rename(columns={'discharge':'bankfull_flow'}) + + ## Combine the nwm bankfull estimated flows into the SRC via feature_id + df_src = df_src.merge(df_bflows,how='left',on='feature_id') + + ## Check if there are any missing data, negative or zero flow values in the bankfull_flow + check_null = df_src['bankfull_flow'].isnull().sum() + if check_null > 0: + log_text += 'WARNING: Missing feature_id in crosswalk for huc: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> these featureids will be ignored in bankfull calcs (~' + str(check_null/84) + ' features) \n' + ## Fill missing/nan nwm bankfull_flow values with -999 to handle later + df_src['bankfull_flow'] = df_src['bankfull_flow'].fillna(-999) + negative_flows = len(df_src.loc[(df_src.bankfull_flow <= 0) & (df_src.bankfull_flow != -999)]) + if negative_flows > 0: + log_text += 'WARNING: HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> Negative or zero flow values found in the input bankfull flows csv (posible lakeid loc)\n' + + ## Define the channel geometry variable names to use from the src + hradius_var = 'HydraulicRadius (m)' + volume_var = 'Volume (m3)' + surface_area_var = 'SurfaceArea (m2)' + bedarea_var = 'BedArea (m2)' + + ## Locate the closest SRC discharge value to the NWM bankfull estimated flow + df_src['Q_bfull_find'] = (df_src['bankfull_flow'] - df_src['Discharge (m3s-1)']).abs() + + ## Check for any missing/null entries in the input SRC + if df_src['Q_bfull_find'].isnull().values.any(): # there may be null values for lake or coastal flow lines (need to set a value to do groupby idxmin below) + log_text += 'WARNING: HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> Null values found in "Q_bfull_find" calc. These will be filled with 999999 () \n' + ## Fill missing/nan nwm 'Discharge (m3s-1)' values with 999999 to handle later + df_src['Q_bfull_find'] = df_src['Q_bfull_find'].fillna(999999) + if df_src['HydroID'].isnull().values.any(): + log_text += 'WARNING: HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> Null values found in "HydroID"... \n' + + df_bankfull_calc = df_src[['Stage','HydroID',bedarea_var,volume_var,hradius_var,surface_area_var,'Q_bfull_find']] # create new subset df to perform the Q_1_5 lookup + df_bankfull_calc = df_bankfull_calc[df_bankfull_calc['Stage'] > 0.0] # Ensure bankfull stage is greater than stage=0 + df_bankfull_calc.reset_index(drop=True, inplace=True) + df_bankfull_calc = df_bankfull_calc.loc[df_bankfull_calc.groupby('HydroID')['Q_bfull_find'].idxmin()].reset_index(drop=True) # find the index of the Q_bfull_find (closest matching flow) + df_bankfull_calc = df_bankfull_calc.rename(columns={'Stage':'Stage_bankfull',bedarea_var:'BedArea_bankfull',volume_var:'Volume_bankfull',hradius_var:'HRadius_bankfull',surface_area_var:'SurfArea_bankfull'}) # rename volume to use later for channel portion calc + df_src = df_src.merge(df_bankfull_calc[['Stage_bankfull','HydroID','BedArea_bankfull','Volume_bankfull','HRadius_bankfull','SurfArea_bankfull']],how='left',on='HydroID') + df_src.drop(['Q_bfull_find'], axis=1, inplace=True) + + ## The bankfull ratio variables below were previously used for the composite variable roughness routine (not currently implimented) + # ## Calculate the channel portion of bankfull Volume + # df_src['chann_volume_ratio'] = 1.0 # At stage=0 set channel_ratio to 1.0 (avoid div by 0) + # df_src['chann_volume_ratio'].where(df_src['Stage'] == 0, df_src['Volume_bankfull'] / (df_src[volume_var]),inplace=True) + # #df_src['chann_volume_ratio'] = df_src['chann_volume_ratio'].clip_upper(1.0) + # df_src['chann_volume_ratio'].where(df_src['chann_volume_ratio'] <= 1.0, 1.0, inplace=True) # set > 1.0 ratio values to 1.0 (these are within the channel) + # df_src['chann_volume_ratio'].where(df_src['bankfull_flow'] > 0.0, 0.0, inplace=True) # if the bankfull_flow value <= 0 then set channel ratio to 0 (will use global overbank manning n) + # #df_src.drop(['Volume_bankfull'], axis=1, inplace=True) + + # ## Calculate the channel portion of bankfull Hydraulic Radius + # df_src['chann_hradius_ratio'] = 1.0 # At stage=0 set channel_ratio to 1.0 (avoid div by 0) + # df_src['chann_hradius_ratio'].where(df_src['Stage'] == 0, df_src['HRadius_bankfull'] / (df_src[hradius_var]),inplace=True) + # #df_src['chann_hradius_ratio'] = df_src['HRadius_bankfull'] / (df_src[hradius_var]+.0001) # old adding 0.01 to avoid dividing by 0 at stage=0 + # df_src['chann_hradius_ratio'].where(df_src['chann_hradius_ratio'] <= 1.0, 1.0, inplace=True) # set > 1.0 ratio values to 1.0 (these are within the channel) + # df_src['chann_hradius_ratio'].where(df_src['bankfull_flow'] > 0.0, 0.0, inplace=True) # if the bankfull_flow value <= 0 then set channel ratio to 0 (will use global overbank manning n) + # #df_src.drop(['HRadius_bankfull'], axis=1, inplace=True) + + # ## Calculate the channel portion of bankfull Surface Area + # df_src['chann_surfarea_ratio'] = 1.0 # At stage=0 set channel_ratio to 1.0 (avoid div by 0) + # df_src['chann_surfarea_ratio'].where(df_src['Stage'] == 0, df_src['SurfArea_bankfull'] / (df_src[surface_area_var]),inplace=True) + # df_src['chann_surfarea_ratio'].where(df_src['chann_surfarea_ratio'] <= 1.0, 1.0, inplace=True) # set > 1.0 ratio values to 1.0 (these are within the channel) + # df_src['chann_surfarea_ratio'].where(df_src['bankfull_flow'] > 0.0, 0.0, inplace=True) # if the bankfull_flow value <= 0 then set channel ratio to 0 (will use global overbank manning n) + # #df_src.drop(['HRadius_bankfull'], axis=1, inplace=True) + + ## mask bankfull variables when the bankfull estimated flow value is <= 0 + df_src['Stage_bankfull'].mask(df_src['bankfull_flow'] <= 0.0,inplace=True) + + ## Create a new column to identify channel/floodplain via the bankfull stage value + df_src.loc[df_src['Stage'] <= df_src['Stage_bankfull'], 'bankfull_proxy'] = 'channel' + df_src.loc[df_src['Stage'] > df_src['Stage_bankfull'], 'bankfull_proxy'] = 'floodplain' + df_src['bankfull_proxy'] = df_src['bankfull_proxy'].fillna('channel') + + ## Output new SRC with bankfull column + df_src.to_csv(src_full_filename,index=False) + log_text += 'Completed: ' + str(huc) + + ## plot rating curves (optional arg) + if src_plot_option: + if isdir(huc_output_dir) == False: + os.mkdir(huc_output_dir) + generate_src_plot(df_src, huc_output_dir) + + except Exception as ex: + summary = traceback.StackSummary.extract( + traceback.walk_stack(None)) + print(str(huc) + ' branch id: ' + str(branch_id) + " failed for some reason") + print(f"*** {ex}") + print(''.join(summary.format())) + log_text += 'ERROR --> ' + str(huc) + ' branch id: ' + str(branch_id) + " failed (details: " + (f"*** {ex}") + (''.join(summary.format())) + '\n' return(log_text) def generate_src_plot(df_src, plt_out_dir): @@ -136,86 +159,104 @@ def generate_src_plot(df_src, plt_out_dir): axes[1].set_title('Channel Volume vs. HRadius Ratio') sns.despine(fig, left=True, bottom=True) sns.scatterplot(x='Discharge (m3s-1)', y='Stage', data=plot_df, ax=axes[0]) - sns.lineplot(x='Discharge (m3s-1)', y='Stage_1_5', data=plot_df, color='green', ax=axes[0]) - axes[0].fill_between(plot_df['Discharge (m3s-1)'], plot_df['Stage_1_5'],alpha=0.5) - axes[0].text(plot_df['Discharge (m3s-1)'].median(), plot_df['Stage_1_5'].median(), "NWM 1.5yr: " + str(plot_df['Stage_1_5'].median())) + sns.lineplot(x='Discharge (m3s-1)', y='Stage_bankfull', data=plot_df, color='green', ax=axes[0]) + axes[0].fill_between(plot_df['Discharge (m3s-1)'], plot_df['Stage_bankfull'],alpha=0.5) + axes[0].text(plot_df['Discharge (m3s-1)'].median(), plot_df['Stage_bankfull'].median(), "Bankfull Proxy Stage: " + str(plot_df['Stage_bankfull'].median())) sns.scatterplot(x='chann_volume_ratio', y='Stage', data=plot_df, ax=axes[1], label="chann_volume_ratio", s=38) sns.scatterplot(x='chann_hradius_ratio', y='Stage', data=plot_df, ax=axes[1], label="chann_hradius_ratio", s=12) + sns.scatterplot(x='chann_surfarea_ratio', y='Stage', data=plot_df, ax=axes[1], label="chann_surfarea_ratio", s=12) axes[1].legend() plt.savefig(plt_out_dir + os.sep + str(hydroid) + '_bankfull.png',dpi=100, bbox_inches='tight') plt.close() -def multi_process(src_bankfull_lookup, procs_list): +def multi_process(src_bankfull_lookup, procs_list, log_file, number_of_jobs, verbose): ## Initiate multiprocessing + available_cores = multiprocessing.cpu_count() + if number_of_jobs > available_cores: + number_of_jobs = available_cores - 2 + print("Provided job number exceeds the number of available cores. " + str(number_of_jobs) + " max jobs will be used instead.") + print(f"Identifying bankfull stage for {len(procs_list)} hucs using {number_of_jobs} jobs") with Pool(processes=number_of_jobs) as pool: - map_output = pool.map(src_bankfull_lookup, procs_list) + #progress_bar = tqdm(total=len(procs_list[0])) + if verbose: + map_output = tqdm(pool.imap(src_bankfull_lookup, procs_list), total=len(procs_list)) + tuple(map_output) # fetch the lazy results + else: + map_output = pool.map(src_bankfull_lookup, procs_list) log_file.writelines(["%s\n" % item for item in map_output]) +def run_prep(fim_dir,bankfull_flow_filepath,number_of_jobs,verbose,src_plot_option): + procs_list = [] + + ## Print message to user and initiate run clock + print('Writing progress to log file here: /logs/log_bankfull_indentify.log') + print('This may take a few minutes...') + + ## Check that the input fim_dir exists + assert os.path.isdir(fim_dir), 'ERROR: could not find the input fim_dir location: ' + str(fim_dir) + ## Check that the bankfull flow filepath exists and read to dataframe + assert os.path.isfile(bankfull_flow_filepath), 'ERROR: Can not find the input bankfull flow file: ' + str(bankfull_flow_filepath) + + ## Create a time var to log run time + begin_time = dt.datetime.now() + ## initiate log file + log_file = open(join(fim_dir,'logs','log_bankfull_indentify.log'),"w") + log_file.write('START TIME: ' + str(begin_time) + '\n') + log_file.write('#########################################################\n\n') + + ## List of columns in SRC_full_crosswalk to read in (ignores other columns that may have been added by previous post-proccessing runs) + src_usecols=['Stage','Number of Cells','SurfaceArea (m2)','BedArea (m2)','Volume (m3)','SLOPE','LENGTHKM','AREASQKM','ManningN','HydroID','NextDownID','order_','TopWidth (m)','WettedPerimeter (m)','WetArea (m2)','HydraulicRadius (m)','Discharge (m3s-1)','feature_id'] + + df_bflows = pd.read_csv(bankfull_flow_filepath,dtype={'feature_id': int}) + huc_list = os.listdir(fim_dir) + huc_list.sort() # sort huc_list for helping track progress in future print statments + huc_pass_list = [] + for huc in huc_list: + #if huc != 'logs' and huc[-3:] != 'log' and huc[-4:] != '.csv': + if re.match('\d{8}', huc): + huc_branches_dir = os.path.join(fim_dir, huc,'branches') + for branch_id in os.listdir(huc_branches_dir): + branch_dir = os.path.join(huc_branches_dir,branch_id) + src_orig_full_filename = join(branch_dir,'src_full_crosswalked_' + branch_id + '.csv') + huc_output_dir = join(branch_dir,'src_plots') + ## check if BARC modified src_full_crosswalked_BARC.csv exists otherwise use the orginial src_full_crosswalked.csv + if isfile(src_orig_full_filename): + huc_pass_list.append(str(huc) + " --> src_full_crosswalked.csv") + procs_list.append([src_orig_full_filename, src_usecols, df_bflows, huc, branch_id, src_plot_option, huc_output_dir]) + else: + print('HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + 'WARNING --> can not find the SRC crosswalked csv file in the fim output dir: ' + str(branch_dir) + ' - skipping this branch!!!\n') + log_file.write('HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + 'WARNING --> can not find the SRC crosswalked csv file in the fim output dir: ' + str(branch_dir) + ' - skipping this branch!!!\n') + + log_file.writelines(["%s\n" % item for item in huc_pass_list]) + log_file.write('#########################################################\n\n') + + ## Pass huc procs_list to multiprocessing function + multi_process(src_bankfull_lookup, procs_list, log_file, number_of_jobs, verbose) + + ## Record run time and close log file + end_time = dt.datetime.now() + log_file.write('END TIME: ' + str(end_time) + '\n') + tot_run_time = end_time - begin_time + log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) + log_file.close() if __name__ == '__main__': parser = argparse.ArgumentParser(description='Identify bankfull stage for each hydroid synthetic rating curve') parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) - parser.add_argument('-flows','--bankfull-flow-input',help='NWM recurrence flows dir',required=True,type=str) - parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) - parser.add_argument('-plots','--src-plot-option',help='Optional (True or False): use this flag to create src plots for all hydroids. WARNING - long runtime',required=False,default='False',type=str) + parser.add_argument('-flows','--bankfull-flow-input',help='NWM recurrence flows dir (flow units in CMS!!!)',required=True,type=str) + parser.add_argument('-j','--number-of-jobs',help='OPTIONAL: number of workers (default=8)',required=False,default=8,type=int) + parser.add_argument('-vb','--verbose',help='OPTIONAL: verbose progress bar',required=False,default=None,action='store_true') + parser.add_argument('-plots','--src-plot-option',help='OPTIONAL flag: use this flag to create src plots for all hydroids (helpful for evaluating). WARNING - long runtime',default=False,required=False, action='store_true') args = vars(parser.parse_args()) fim_dir = args['fim_dir'] bankfull_flow_filepath = args['bankfull_flow_input'] number_of_jobs = args['number_of_jobs'] + verbose = bool(args['verbose']) src_plot_option = args['src_plot_option'] - procs_list = [] - - ## Print message to user and initiate run clock - print('Writing progress to log file here: ' + str(join(fim_dir,'bankfull_detect.log'))) - print('This may take a few minutes...') - ## Create a time var to log run time - begin_time = dt.datetime.now() - - ## Check that the bankfull flow filepath exists and read to dataframe - if not isfile(bankfull_flow_filepath): - print('!!!ERROR: Can not find the input bankfull flow file: ' + str(bankfull_flow_filepath)) - else: - df_bflows = pd.read_csv(bankfull_flow_filepath,dtype={'feature_id': int}) - huc_list = os.listdir(fim_dir) - huc_pass_list = [] - for huc in huc_list: - if huc != 'logs' and huc[-3:] != 'log' and huc[-4:] != '.csv': - src_barc_full_filename = join(fim_dir,huc,'src_full_crosswalked_BARC.csv') - src_orig_full_filename = join(fim_dir,huc,'src_full_crosswalked.csv') - src_modify_filename = join(fim_dir,huc,'src_full_crosswalked_bankfull.csv') - huc_output_dir = join(fim_dir,huc,'src_plots') - ## check if BARC modified src_full_crosswalked_BARC.csv exists otherwise use the orginial src_full_crosswalked.csv - if isfile(src_barc_full_filename): - print(str(huc)) - huc_pass_list.append(str(huc) + " --> src_full_crosswalked_BARC.csv") - procs_list.append([src_barc_full_filename, src_modify_filename, df_bflows, huc, src_plot_option, huc_output_dir]) - elif isfile(src_orig_full_filename): - print(str(huc)) - huc_pass_list.append(str(huc) + " --> src_full_crosswalked.csv") - procs_list.append([src_orig_full_filename, src_modify_filename, df_bflows, huc, src_plot_option, huc_output_dir]) - else: - print(str(huc) + 'WARNING --> can not find the SRC crosswalked csv file in the fim output dir: ' + str(join(fim_dir,huc)) + ' - skipping this HUC!!!\n') - - ## initiate log file - print(f"Identifying bankfull stage for {len(procs_list)} hucs using {number_of_jobs} jobs") - sys.__stdout__ = sys.stdout - log_file = open(join(fim_dir,'logs','log_bankfull_indentify.log'),"w") - sys.stdout = log_file - log_file.write('START TIME: ' + str(begin_time) + '\n') - log_file.writelines(["%s\n" % item for item in huc_pass_list]) - log_file.write('#########################################################\n\n') - - ## Pass huc procs_list to multiprocessing function - multi_process(src_bankfull_lookup, procs_list) - - ## Record run time and close log file - end_time = dt.datetime.now() - log_file.write('END TIME: ' + str(end_time) + '\n') - tot_run_time = end_time - begin_time - log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) - sys.stdout = sys.__stdout__ - log_file.close() + + ## Prepare/check inputs, create log file, and spin up the proc list + run_prep(fim_dir,bankfull_flow_filepath,number_of_jobs,verbose,src_plot_option) \ No newline at end of file diff --git a/src/make_rem.py b/src/make_rem.py new file mode 100755 index 000000000..b90753fee --- /dev/null +++ b/src/make_rem.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + +from numba import njit, typeof, typed, types +import rasterio +import numpy as np +import argparse +import os +from osgeo import ogr, gdal + + +def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster): + """ + Calculates REM/HAND/Detrended DEM + + Parameters + ---------- + dem_fileName : str + File name of pit filled DEM raster. + pixel_watersheds_fileName : str + File name of stream pixel watersheds raster. + rem_fileName : str + File name of output relative elevation raster. + + """ + + + # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # + # The following creates a dictionary of the catchment ids (key) and their elevation along the thalweg (value). + @njit + def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalweg_window): + + for i,cm in enumerate(flat_catchments): + if thalweg_window[i] == 1: # Only allow reference elevation to be within thalweg. + # If the catchment really exists in the dictionary, compare elevation values. + if (cm in catchment_min_dict): + if (flat_dem[i] < catchment_min_dict[cm]): + # If the flat_dem's elevation value is less than the catchment_min_dict min, update the catchment_min_dict min. + catchment_min_dict[cm] = flat_dem[i] + else: + catchment_min_dict[cm] = flat_dem[i] + return(catchment_min_dict) + + # Open the masked gw_catchments_pixels_masked and dem_thalwegCond_masked. + gw_catchments_pixels_masked_object = rasterio.open(pixel_watersheds_fileName) + dem_thalwegCond_masked_object = rasterio.open(dem_fileName) + thalweg_raster_object = rasterio.open(thalweg_raster) + + # Specify raster object metadata. + meta = dem_thalwegCond_masked_object.meta.copy() + meta['tiled'], meta['compress'] = True, 'lzw' + + # -- Create catchment_min_dict -- # + catchment_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums. + # Update catchment_min_dict with pixel sheds minimum. + for ji, window in dem_thalwegCond_masked_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template. + dem_window = dem_thalwegCond_masked_object.read(1,window=window).ravel() # Define dem_window. + catchments_window = gw_catchments_pixels_masked_object.read(1,window=window).ravel() # Define catchments_window. + thalweg_window = thalweg_raster_object.read(1, window=window).ravel() # Define cost_window. + + # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. + catchment_min_dict = make_catchment_min_dict(dem_window, catchment_min_dict, catchments_window, thalweg_window) + + dem_thalwegCond_masked_object.close() + gw_catchments_pixels_masked_object.close() + thalweg_raster_object.close() + # ------------------------------------------------------------------------------------------------------------------------ # + + + # ------------------------------------------- Produce relative elevation model ------------------------------------------- # + @njit + def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): + + rem_window = np.zeros(len(flat_dem),dtype=np.float32) + for i,cm in enumerate(flat_catchments): + if cm in catchmentMinDict: + if catchmentMinDict[cm] == ndv or flat_dem[i] == ndv: + rem_window[i] = ndv + else: + rem_window[i] = flat_dem[i] - catchmentMinDict[cm] + + return(rem_window) + + rem_rasterio_object = rasterio.open(rem_fileName,'w',**meta) # Open rem_rasterio_object for writing to rem_fileName. + pixel_catchments_rasterio_object = rasterio.open(pixel_watersheds_fileName) # Open pixel_catchments_rasterio_object + dem_rasterio_object = rasterio.open(dem_fileName) + + for ji, window in dem_rasterio_object.block_windows(1): + dem_window = dem_rasterio_object.read(1,window=window) + window_shape = dem_window.shape + + dem_window = dem_window.ravel() + catchments_window = pixel_catchments_rasterio_object.read(1,window=window).ravel() + + rem_window = calculate_rem(dem_window, catchment_min_dict, catchments_window, meta['nodata']) + rem_window = rem_window.reshape(window_shape).astype(np.float32) + + rem_rasterio_object.write(rem_window, window=window, indexes=1) + + dem_rasterio_object.close() + pixel_catchments_rasterio_object.close() + rem_rasterio_object.close() + # ------------------------------------------------------------------------------------------------------------------------ # + + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Relative elevation from pixel based watersheds') + parser.add_argument('-d','--dem', help='DEM to use within project path', required=True) + parser.add_argument('-w','--watersheds',help='Pixel based watersheds raster to use within project path',required=True) + parser.add_argument('-t','--thalweg-raster',help='A binary raster representing the thalweg. 1 for thalweg, 0 for non-thalweg.',required=True) + parser.add_argument('-o','--rem',help='Output REM raster',required=True) + + + # extract to dictionary + args = vars(parser.parse_args()) + + # rename variable inputs + dem_fileName = args['dem'] + pixel_watersheds_fileName = args['watersheds'] + rem_fileName = args['rem'] + thalweg_raster = args['thalweg_raster'] + + rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster) diff --git a/src/make_stages_and_catchlist.py b/src/make_stages_and_catchlist.py index 524360915..677de1899 100755 --- a/src/make_stages_and_catchlist.py +++ b/src/make_stages_and_catchlist.py @@ -12,17 +12,23 @@ def make_stages_and_catchlist(flows_filename, catchments_filename, stages_filena flows = gpd.read_file(flows_filename) catchments = gpd.read_file(catchments_filename) + # Reconcile flows and catchments hydroids + flows = flows.merge(catchments[['HydroID']], on='HydroID', how='inner') + catchments = catchments.merge(flows[['HydroID']], on='HydroID', how='inner') + + stages_max = stages_max + stages_interval + stages = np.round(np.arange(stages_min,stages_max,stages_interval),4) hydroIDs = flows['HydroID'].tolist() len_of_hydroIDs = len(hydroIDs) slopes = flows['S0'].tolist() lengthkm = flows['LengthKm'].tolist() - areasqkm = catchments['areasqkm'].tolist() - - - stages_max = stages_max + stages_interval - stages = np.round(np.arange(stages_min,stages_max,stages_interval),4) + try: + areasqkm = catchments['areasqkm'].tolist() + except KeyError: + areasqkm = catchments['geometry'].area/ 10**6 + with open(stages_filename,'w') as f: f.write("Stage\n") for stage in stages: diff --git a/src/mask_dem.py b/src/mask_dem.py new file mode 100755 index 000000000..7cfab2789 --- /dev/null +++ b/src/mask_dem.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 + +import os +import pandas as pd +import geopandas as gpd +import fiona +import rasterio as rio +from rasterio.mask import mask +import argparse +from utils.shared_functions import mem_profile + +@mem_profile +def mask_dem(dem_filename:str, nld_filename:str, levee_id_attribute:str, out_dem_filename:str, branch_id_attribute:str, branch_id:int, branch_zero_id:int, levee_levelpaths:str): + """ + Masks levee-protected areas from DEM in branch 0 or if the level path is associated with a levee (determined in src/associate_levelpaths_with_levees.py). + + Parameters + ---------- + dem_filename: str + Path to DEM file. + nld_filename: str + Path to levee-protected areas file. + levee_id_attribute: str + Name of levee ID attribute. + out_dem_filename: str + Path to write masked DEM. + branch_id_attribute: str + Name of branch ID attribute. + branch_id: int + Branch ID number + branch_zero_id: int + Branch 0 ID number + levee_levelpaths: str + Path to levee-levelpath association file. + """ + + # Rasterize if branch zero + if (branch_id == branch_zero_id): + with rio.open(dem_filename) as dem, fiona.open(nld_filename) as leveed: + dem_profile = dem.profile.copy() + + geoms = [feature["geometry"] for feature in leveed] + + # Mask out levee-protected areas from DEM + out_dem_masked, _ = mask(dem, geoms, invert=True) + + with rio.open(out_dem_filename, "w", **dem_profile, BIGTIFF='YES') as dest: + dest.write(out_dem_masked[0,:,:], indexes=1) + + elif os.path.exists(levee_levelpaths): + levee_levelpaths = pd.read_csv(levee_levelpaths) + + levee_levelpaths = levee_levelpaths[levee_levelpaths[branch_id_attribute] == branch_id] + + levelpath_levees = list(levee_levelpaths[levee_id_attribute]) + + if len(levelpath_levees) > 0: + with rio.open(dem_filename) as dem:#, fiona.open(nld_filename) as leveed: + leveed = gpd.read_file(nld_filename) + dem_profile = dem.profile.copy() + + geoms = [feature['geometry'] for i, feature in leveed.iterrows() if feature[levee_id_attribute] in levelpath_levees] + + # Mask out levee-protected areas from DEM + out_dem_masked, _ = mask(dem, geoms, invert=True) + + with rio.open(out_dem_filename, "w", **dem_profile, BIGTIFF='YES') as dest: + dest.write(out_dem_masked[0,:,:], indexes=1) + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Mask levee-protected areas from DEM') + parser.add_argument('-dem','--dem-filename', help='DEM filename', required=True, type=str) + parser.add_argument('-nld','--nld-filename', help='NLD levee-protected areas filename', required=True, type=str) + parser.add_argument('-l','--levee-id-attribute', help='Levee ID attribute name', required=True,type=str) + parser.add_argument('-out','--out-dem-filename', help='out DEM filename', required=True, type=str) + parser.add_argument('-b', '--branch-id-attribute', help='Branch ID attribute name', required=True, type=str) + parser.add_argument('-i', '--branch-id', help='Branch ID', type=int, required='True') + parser.add_argument('-b0', '--branch-zero-id', help='Branch zero ID', type=int, required=False, default=0) + parser.add_argument('-csv', '--levee-levelpaths', help='Levee - levelpath layer filename', type=str, required=True) + + args = vars(parser.parse_args()) + + mask_dem(**args) diff --git a/src/output_cleanup.py b/src/output_cleanup.py deleted file mode 100755 index 2491f05a5..000000000 --- a/src/output_cleanup.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python3 -import os -import argparse -from utils.shared_functions import mem_profile - - -@mem_profile -def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_production, viz_post_processing): - ''' - Processes all the final output files to cleanup and add post-processing - - Parameters - ---------- - huc_number : STR - The HUC - output_folder_path : STR - Path to the outputs for the specific huc - additional_whitelist : STR - Additional list of files to keep during a production run - is_production : BOOL - Determine whether or not to only keep whitelisted production files - is_viz_post_processing : BOOL - Determine whether or not to process outputs for Viz - ''' - - # List of files that will be saved during a production run - production_whitelist = [ - 'rem_zeroed_masked.tif', - 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', - 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', - 'gw_catchments_reaches_filtered_addedAttributes.tif', - 'hydroTable.csv', - 'src.json', - 'small_segments.csv', - 'bathy_crosswalk_calcs.csv', - 'bathy_stream_order_calcs.csv', - 'bathy_thalweg_flag.csv', - 'bathy_xs_area_hydroid_lookup.csv', - 'src_full_crosswalked.csv', - 'usgs_elev_table.csv', - 'hand_ref_elev_table.csv', - ] - - # List of files that will be saved during a viz run - viz_whitelist = [ - 'rem_zeroed_masked.tif', - 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', - 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', - 'gw_catchments_reaches_filtered_addedAttributes.tif', - 'hydroTable.csv', - 'src.json', - 'small_segments.csv', - 'src_full_crosswalked.csv', - 'demDerived_reaches_split_points.gpkg', - 'flowdir_d8_burned_filled.tif', - 'dem_thalwegCond.tif' - ] - - # If "production" run, only keep whitelisted files - if is_production and not is_viz_post_processing: - whitelist_directory(output_folder_path, production_whitelist, additional_whitelist) - - # If Viz post-processing is enabled, form output files to Viz specifications - if is_viz_post_processing: - # Step 1, keep only files that Viz needs - whitelist_directory(output_folder_path, viz_whitelist, additional_whitelist) - - -@mem_profile -def whitelist_directory(directory_path, whitelist, additional_whitelist): - # Add any additional files to the whitelist that the user wanted to keep - if additional_whitelist: - whitelist = whitelist + [filename for filename in additional_whitelist.split(',')] - - # Delete any non-whitelisted files - directory = os.fsencode(directory_path) - for file in os.listdir(directory_path): - filename = os.fsdecode(file) - if filename not in whitelist: - os.remove(os.path.join(directory_path, filename)) - - -if __name__ == '__main__': - #Parse arguments - parser = argparse.ArgumentParser(description = 'Cleanup output files') - parser.add_argument('huc_number', type=str, help='The HUC') - parser.add_argument('output_folder_path', type=str, help='Path to the outputs for the specific huc') - parser.add_argument('-w', '--additional_whitelist', type=str, help='List of additional files to keep in a production run') - parser.add_argument('-p', '--is_production', help='Keep only white-listed files for production runs', action='store_true') - parser.add_argument('-v', '--is_viz_post_processing', help='Formats output files to be useful for Viz', action='store_true') - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - # Rename variable inputs - huc_number = args['huc_number'] - output_folder_path = args['output_folder_path'] - additional_whitelist = args['additional_whitelist'] - is_production = args['is_production'] - is_viz_post_processing = args['is_viz_post_processing'] - - # Run output_cleanup - output_cleanup(huc_number, output_folder_path, additional_whitelist, is_production, is_viz_post_processing) diff --git a/src/outputs_cleanup.py b/src/outputs_cleanup.py new file mode 100755 index 000000000..48d7ba8ea --- /dev/null +++ b/src/outputs_cleanup.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 + +import argparse +import os + +from pathlib import Path +from utils.shared_functions import FIM_Helpers as fh + +def remove_deny_list_files(src_dir, deny_list, branch_id, verbose=False): + + ''' + Overview + ---------- + Delete a set of files in a given directory (and/or subdirectories) based + on values in the deny list. + + Notes: + - Strange, but.. if you want to skip deleting file, have the value for + the 'deny_list' param to be the value of "none" (this is a by product + of using bash as part of our system) + - In the deny list, any line starting with a # will be skipped. Any line + value which contains the value of {}, will be replaced with submitted + branch id. If the line does not have a {} in it, it will be searched + and removed for an exact file match. + - Technically, we don't validate that the branch id is a number, and will + work with any value. + + Parameters + ---------- + + - src_dir : str + Folder path where the files are to be deleted (recursive). + Will error if does not exist. + + - deny_list : str + If not the value of "none" (any case), the file must exist and it contains + the list of files to be deleted. Will error if does not exist. + + - branch_id : str + Needs to have a value and will be subsituted into any {} value. + Will error if does not exist. + + Returns + ---------- + None + ''' + + # Yes.. this is a little strange. + # if the user submitts the deny list name of "none" (any case) + # we skip this + if (deny_list.upper() == 'NONE'): + print("file clean via the deny list skipped") + return + + if (not os.path.isdir(src_dir)): + raise ValueError(f"Sorry, the directory {src_dir} does not exist") + + if (branch_id.strip() == ""): + raise ValueError(f"Sorry, branch id value must exist") + + # Note: some of the deny_file_names might be a comment line + # this will validate file exists + deny_file_names = fh.load_list_file(deny_list.strip()) + + fh.vprint(f"source folder is {src_dir}", verbose) + fh.vprint(f"deny_list is {deny_list}", verbose) + + file_removed_count = 0 + + for deny_file_name in deny_file_names: + + # Only keep lines that do no start with a # + # aka.. we are only deleting files that do not start a line with # + deny_file_name = deny_file_name.strip() + if (deny_file_name.startswith("#")): + continue + + # the file name may / may not have a {} in it . If it does + # has a {} it in, we will replace it with the branch ID. + # if the file name does not have a {} in it, that file + # will be deleted. + # We will search all directories recursively. + deny_file_name = deny_file_name.replace("{}", branch_id) + + found_files = Path(src_dir).rglob(f"{deny_file_name}") + + for found_file in found_files: + fh.vprint(f"found file: {found_file}", verbose, False) + + if os.path.exists(found_file): + os.remove(found_file) + + file_removed_count +=1 + + fh.vprint(f"Removed {file_removed_count} files", verbose, True) + + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Clean up outputs given file with line delimineted files') + parser.add_argument('-d','--src_dir', help='Directory to find files', required=True) + parser.add_argument('-l','--deny_list', help='Path to deny list file. Must be line delimited', + required=True) + parser.add_argument('-b','--branch_id', help='Branch id value', required=True) + parser.add_argument('-v','--verbose', help='Verbose', required=False, + default=False, action='store_true') + + # extract to dictionary + args = vars(parser.parse_args()) + + remove_deny_list_files(**args) + diff --git a/src/preprocess_rasters.py b/src/preprocess_rasters.py deleted file mode 100755 index 635e1f227..000000000 --- a/src/preprocess_rasters.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -sys.path.append('/foss_fim/src') -from multiprocessing import Pool -import argparse -from utils.reproject_dem import reproject_dem -from utils.shared_functions import update_raster_profile -from utils.shared_variables import PREP_PROJECTION, PREP_PROJECTION_CM - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Reproject Elevation rasters and update profile') - parser.add_argument('-dem_dir','--dem-dir', help='DEM filename', required=True,type=str) - parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) - parser.add_argument('-nodata','--nodata-val', help='DEM nodata value', required=False,type=float,default=-9999.0) - parser.add_argument('-block','--blocksize', help='DEM blocksize', required=False,type=int,default=512) - parser.add_argument('-keep','--keep-intermediate', help='keep intermediate files', required=False,type=bool,default=True) - - args = vars(parser.parse_args()) - - dem_dir = args['dem_dir'] - number_of_jobs = args['number_of_jobs'] - nodata_val = args['nodata_val'] - blocksize = args['blocksize'] - keep_intermediate = args['keep_intermediate'] - - reproject_procs_list = [] - - for huc in os.listdir(dem_dir): - raster_dir = os.path.join(dem_dir,huc) - elev_cm = os.path.join(raster_dir, 'elev_cm.tif') - elev_cm_proj = os.path.join(raster_dir, 'elev_cm_proj.tif') - reproject_procs_list.append([raster_dir, elev_cm, elev_cm_proj, PREP_PROJECTION_CM]) - - # Multiprocess reprojection - with Pool(processes=number_of_jobs) as pool: - pool.map(reproject_dem, reproject_procs_list) - - profile_procs_list = [] - - for huc in os.listdir(dem_dir): - elev_m_tif = os.path.join(dem_dir,huc, 'elev_m.tif') - if not os.path.exists(elev_m_tif): - raster_dir = os.path.join(dem_dir,huc) - elev_cm_proj = os.path.join(raster_dir, 'elev_cm_proj.tif') - elev_m = os.path.join(raster_dir, 'elev_m.tif') - profile_procs_list.append([elev_cm_proj, elev_m,PREP_PROJECTION,nodata_val,blocksize,keep_intermediate]) - - # Multiprocess update profile - with Pool(processes=2) as pool: - # TODO read in windows becasue gdal rasters are massive - pool.map(update_raster_profile, profile_procs_list) diff --git a/src/process_branch.sh b/src/process_branch.sh new file mode 100755 index 000000000..bd1d4d90f --- /dev/null +++ b/src/process_branch.sh @@ -0,0 +1,57 @@ +#!/bin/bash -e + +# it is strongly recommended that you do not call directly to src/run_by_branch.sh +# but call this file and let is call run_by_branch. +# This file will auto trap any exceptions from run_by_branch. + +# also.. remember.. that this file will rarely need to be called (but can be) +# as it is usually called through a parallelizing iterator in run_unit_wb.sh + +# this also has no named command line arguments, only positional args. + +runName=$1 +hucNumber=$2 +branchId=$3 + +# outputDataDir, srcDir and others come from the Dockerfile +export outputRunDataDir=$outputDataDir/$runName +branchLogFileName=$outputRunDataDir/logs/branch/"$hucNumber"_branch_"$branchId".log +branch_list_csv_file=$outputRunDataDir/$hucNumber/branch_ids.csv + +/usr/bin/time -v $srcDir/run_by_branch.sh $hucNumber $branchId 2>&1 | tee $branchLogFileName + +#exit ${PIPESTATUS[0]} +return_codes=( "${PIPESTATUS[@]}" ) + +# we do this way instead of working directly with stderr and stdout +# as they were messing with output logs which we always want. +err_exists=0 +for code in "${return_codes[@]}" +do + # Make an extra copy of the branch log in a new folder + # Note: It was tricky to load in the fim_enum into bash, so we will just + # go with the code for now + if [ $code -eq 0 ]; then + echo + # do nothing + elif [ $code -eq 61 ]; then + echo + err_exists=1 + echo "***** Branch has no valid flowlines *****" + elif [ $code -ne 0 ]; then + echo + err_exists=1 + echo "***** An error has occured *****" + cp $branchLogFileName $outputRunDataDir/branch_errors + fi +done + +# Note: For branches, we do not copy over the log file for codes of 60 and 61. + +if [ "$err_exists" = "0" ]; then + # Only add the huc and branch number to the csv is the branch was successful at processing + # We also don't want to include 60's and 61's + $srcDir/generate_branch_list_csv.py -o $branch_list_csv_file -u $hucNumber -b $branchId +fi + +exit 0 # we always return a success at this point (so we don't stop the loops / iterator) diff --git a/src/query_vectors_by_branch_polygons.py b/src/query_vectors_by_branch_polygons.py new file mode 100755 index 000000000..27ca1a00e --- /dev/null +++ b/src/query_vectors_by_branch_polygons.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + + +from stream_branches import StreamBranchPolygons +import argparse +from tqdm import tqdm + + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Query vectors by unique attribute values') + parser.add_argument('-a','--vector-attributes', help='Vector with unique attributs', required=True,default=None) + parser.add_argument('-d','--branch-ids', help='Branch ID value', required=True,default=None, nargs='+') + parser.add_argument('-i','--attribute-id', help='Attribute Name', required=True,default=None) + parser.add_argument('-s','--subset-vectors', help='Vector file names to query by attribute', required=False,default=None,nargs="+") + parser.add_argument('-o','--out-files', help='Vector filenames to write to after query', required=False,default=None,nargs="+") + parser.add_argument('-v','--verbose', help='Verbose printing', required=False,default=None,action='store_true') + + # extract to dictionary + args = vars(parser.parse_args()) + + attributes_vector_file, branch_ids ,attribute, subset_vectors, out_files, verbose = args["vector_attributes"], args["branch_ids"],args["attribute_id"], args["subset_vectors"], args["out_files"], args["verbose"] + + # load file + #stream_polys = StreamBranchPolygons.from_file( filename=attributes_vector_file, + # branch_id_attribute=attribute, + # values_excluded=None,attribute_excluded=None, verbose = verbose) + + for subset_vector,out_file in tqdm(zip(subset_vectors,out_files),disable=(not verbose), + total=len(subset_vectors), + desc="Query vectors"): + + #if verbose: + #print("Query \'{}\' by attribute in \'{}\' ...".format(out_file.split('/')[-1].split('.')[0], + # attributes_vector_file.split('/')[-1].split('.')[0])) + StreamBranchPolygons.query_vectors_by_branch(subset_vector, + branch_ids=branch_ids, + branch_id_attribute=attribute, + out_filename_template=out_file) + diff --git a/src/r_grow_distance.py b/src/r_grow_distance.py deleted file mode 100755 index d42e88c0d..000000000 --- a/src/r_grow_distance.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python3 -from grass_session import Session -import os -import shutil -import grass.script as gscript -import argparse - - -def r_grow_distance(input_raster, grass_workspace, proximity_dtype, allocation_dtype): - ''' - Runs the r.grow.distance GRASS gis tool which given an input raster will produce an output proximity (or distance) and euclidian allocation tool. - - Parameters - ---------- - input_raster : STR - Path to input raster. For example, see flows_grid_boolean.tif - grass_workspace : STR - Path to TEMPORARY directory to store intermediate GRASS data. This directory is deleted upon completion of this function. - proximity_dtype: STR - Data type for the proximity output. Typically 'Float32'. - allocation_dtype: STR - Data type for the allocation output. Typically 'Float32' (AGREE processing) or 'Float64' (thalweg adjustment processing) - - Returns - ------- - output_proximity_path : STR - The path to the output proximity (or distance) raster (in tif format). - output_allocation_path : STR - The path to the output euclidian allocation raster (in tif format). - - ''' - - # Define parent directory of input raster and get input raster name - input_raster_directory = os.path.dirname(input_raster) - input_raster_name = os.path.splitext(os.path.basename(input_raster))[0] - - # Set up variables for use in GRASS - grass_gisdb = grass_workspace - grass_location = 'temporary_location' - grass_mapset = 'temporary_mapset' - projected_file = input_raster - - # Start and close PERMANENT session. - PERMANENT = Session() - PERMANENT.open(gisdb = grass_gisdb, location = grass_location, create_opts = projected_file) - PERMANENT.close() - - # Open a temporary session. - temporary_session = Session() - temporary_session.open(gisdb = grass_gisdb, location = grass_location, mapset = grass_mapset, create_opts = projected_file) - - #Import input raster into temporary session. - imported_grass_raster = input_raster_name + '@' + grass_mapset - gscript.run_command('r.in.gdal', input = input_raster, output = imported_grass_raster, quiet = True) - - # Define names for proximity and allocation rasters. Run - # r.grow.distance tool. - proximity_grass_name = 'proximity@' + grass_mapset - allocation_grass_name = 'allocation@'+ grass_mapset - gscript.run_command('r.grow.distance', flags = 'm', input = imported_grass_raster, distance = proximity_grass_name, value = allocation_grass_name, quiet = True) - - # Export proximity raster. Saved to same directory as input raster. - # Dtype for proximity always float32. - proximity_filename = input_raster_name + '_dist.tif' - output_proximity_path=os.path.join(input_raster_directory,proximity_filename) - gscript.run_command('r.out.gdal', flags = 'cf', input = proximity_grass_name, output = output_proximity_path, format = 'GTiff', quiet = True, type = proximity_dtype, createopt = 'COMPRESS=LZW') - - # Export allocation raster. Saved to same directory as input raster. - # Dtype assigned via the allocation_dtype input. - allocation_filename = input_raster_name + '_allo.tif' - output_allocation_path = os.path.join(input_raster_directory, allocation_filename) - gscript.run_command('r.out.gdal', flags = 'cf', input = allocation_grass_name, output = output_allocation_path, format = 'GTiff', quiet = True, type = allocation_dtype, createopt = 'COMPRESS=LZW') - - # Close down temporary session and remove temporary workspace. - temporary_session.close() - shutil.rmtree(grass_gisdb) - - return output_proximity_path,output_allocation_path - - -if __name__ == '__main__': - - #Parse arguments - parser = argparse.ArgumentParser(description = 'Calculate AGREE DEM') - parser.add_argument('-i', '--in_raster', help = 'raster to perform r.grow.distance', required = True) - parser.add_argument('-g', '--grass_workspace', help = 'Temporary GRASS workspace', required = True) - parser.add_argument('-p', '--prox_dtype', help = 'Output proximity raster datatype', required = True) - parser.add_argument('-a', '--allo_dtype', help = 'Output allocation raster datatype', required = True) - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - # Rename variable inputs - input_raster = args['in_raster'] - grass_workspace = args['grass_workspace'] - proximity_dtype = args['prox_dtype'] - allocation_dtype = args['allo_dtype'] - - # Run r_grow_distance - r_grow_distance(input_raster, grass_workspace, proximity_dtype, allocation_dtype) - \ No newline at end of file diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py deleted file mode 100644 index e11472746..000000000 --- a/src/reduce_nhd_stream_density.py +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/env python3 - -import geopandas as gpd -import pandas as pd -import numpy as np -from collections import deque -import argparse -import pygeos -from shapely.wkb import dumps -from shapely.geometry import Point -from utils.shared_functions import getDriver - - -def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False): - - headwater_streams = pd.DataFrame() - - if mainstem_flag == False: - nhd_streams = gpd.read_file(nhd_streams_) - headwater_col = 'is_headwater' - id_col = 'headwaters_id' - n = -1 - else: - nhd_streams = nhd_streams_.copy() - headwater_col = 'mainstem' - id_col = 'nws_lid' - n = '' - - # Locate the closest NHDPlus HR stream segment to NWM headwater points. Done by HUC8 to reduce processing time and to contain NWM headwater in the same HUC - for index, row in selected_wbd8.iterrows(): - huc = row["HUC8"] - - # Double check that this is a nested HUC - if huc.startswith(str(huc4)): - - huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8==huc] - huc8_mask = huc8_mask.reset_index(drop=True) - - # Masking headwaters by HUC8 - headwaters_mask = gpd.read_file(headwaters_filename, mask = huc8_mask) - headwaters_mask = headwaters_mask.reset_index(drop=True) - - # Masking subset streams by HUC8 - if mainstem_flag == False: - streams_subset = gpd.read_file(nhd_streams_, mask = huc8_mask) - else: - streams_subset = nhd_streams.loc[nhd_streams.HUC8==huc].copy() - if headwaters_mask.is_headwater.dtype != 'int': headwaters_mask.is_headwater = headwaters_mask.is_headwater.astype('int') - if headwaters_mask.is_colocated.dtype != 'int': headwaters_mask.is_colocated = headwaters_mask.is_colocated.astype('int') - headwaters_mask = headwaters_mask.loc[headwaters_mask.is_headwater==True] - - if not streams_subset.empty: - streams_subset[headwater_col] = False - streams_subset = streams_subset.reset_index(drop=True) - - # Create WKB geometry column - streams_subset['b_geom'] = None - for index, linestring in enumerate(streams_subset.geometry): - streams_subset.at[index, 'b_geom'] = dumps(linestring) - - # Create pygeos nhd stream geometries from WKB representation - streambin_geom = pygeos.io.from_wkb(streams_subset['b_geom']) - - # Add HUC8 column - streams_subset['HUC8'] = str(huc) - - # Add headwaters_id column - streams_subset[id_col] = n - distance_from_upstream = {} - for index, point in headwaters_mask.iterrows(): - - # Convert headwater point geometries to WKB representation - wkb_point = dumps(point.geometry) - - # Create pygeos headwater point geometries from WKB representation - pointbin_geom = pygeos.io.from_wkb(wkb_point) - - # Distance to each stream segment - distances = pygeos.measurement.distance(streambin_geom, pointbin_geom) - - # Find minimum distance - min_index = np.argmin(distances) - headwater_point_name = point[headwater_id] - - # Find stream segment closest to headwater point - if mainstem_flag==True: - - if point.is_colocated==True: - - closest_stream = streams_subset.iloc[min_index] - distance_to_line = point.geometry.distance(Point(closest_stream.geometry.coords[-1])) - print(f"{point.nws_lid} distance on line {closest_stream.NHDPlusID}: {np.round(distance_to_line,1)}") - - if not closest_stream.NHDPlusID in distance_from_upstream.keys(): - distance_from_upstream[closest_stream.NHDPlusID] = [point.nws_lid,distance_to_line] - - elif distance_from_upstream[closest_stream.NHDPlusID][1] > distance_to_line: - distance_from_upstream[closest_stream.NHDPlusID] = [point.nws_lid,distance_to_line] - - headwater_point_name = distance_from_upstream[closest_stream.NHDPlusID][0] - - # Closest segment to headwater - streams_subset.loc[min_index,headwater_col] = True - streams_subset.loc[min_index,id_col] = headwater_point_name - - headwater_streams = headwater_streams.append(streams_subset[['NHDPlusID',headwater_col,id_col,'HUC8']]) - - headwater_streams = headwater_streams.sort_values(headwater_col, ascending=False).drop_duplicates('NHDPlusID') # keeps headwater=True for conflicting duplicates - - if mainstem_flag == False: - nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='inner') - else: - headwater_streams = headwater_streams.drop(columns=['HUC8']) - nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='outer') - nhd_streams[id_col] = nhd_streams[id_col].fillna(n) - nhd_streams[headwater_col] = nhd_streams[headwater_col].fillna(0) - - del selected_wbd8, streams_subset, headwater_streams - - huc4_mask_buffer = huc4_mask.buffer(10) - - # Identify inflowing streams - nwm_intersections = gpd.read_file(nwm_intersections_filename, mask=huc4_mask_buffer) - - if mainstem_flag == False: - nhd_streams['downstream_of_headwater'] = False - nhd_streams['is_relevant_stream'] = nhd_streams['is_headwater'].copy() - else: - nwm_intersections = nwm_intersections.loc[nwm_intersections.mainstem==1] - - nhd_streams = nhd_streams.explode() - nhd_streams = nhd_streams.reset_index(drop=True) - - - - # Find stream segment closest to nwm intersection point - for index, point in nwm_intersections.iterrows(): - - # Distance to each stream segment - distances = nhd_streams.distance(point.geometry) - - # Find minimum distance - min_index = np.argmin(distances) - - # Update attributes for incoming stream - nhd_streams.loc[min_index,headwater_col] = True - - if mainstem_flag == False: - nhd_streams.loc[min_index,'downstream_of_headwater'] = True - nhd_streams['is_relevant_stream'] = nhd_streams[headwater_col].copy() - - # Trace down from headwaters - nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) - - nhd_streams = get_downstream_segments(nhd_streams.copy(),headwater_col,mainstem_flag) - - # nhd_streams.fillna(value = {"is_relevant_stream": False}, inplace=True) - nhd_streams = nhd_streams.loc[nhd_streams['is_relevant_stream'],:] - nhd_streams.reset_index(drop=True,inplace=True) - - return nhd_streams - - -def get_downstream_segments(streams, attribute,mainstem_flag): - - Q = deque(streams.loc[streams[attribute],'NHDPlusID'].tolist()) - visited = set() - - while Q: - q = Q.popleft() - if q in visited: - continue - - visited.add(q) - toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] - - try: - downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() - except ValueError: # 18050002 has duplicate nhd stream feature - if len(toNode.unique()) == 1: - toNode = toNode.iloc[0] - downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() - - # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) - if len(set(downstream_ids))>1: # special case: remove duplicate NHDPlusIDs - relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] - else: - relevant_ids = downstream_ids - - if mainstem_flag == False: - - streams.loc[relevant_ids,'is_relevant_stream'] = True - streams.loc[relevant_ids,'downstream_of_headwater'] = True - else: - streams.loc[relevant_ids,'mainstem'] = True - - for i in relevant_ids: - if i not in visited: - Q.append(i) - - return streams - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Reduce NHDPlus HR network based on headwater points') - parser.add_argument('-n','--huc-number',help='HUC number',required=True,type=str) - parser.add_argument('-b','--huc4-mask',help='HUC4 mask',required=True) - parser.add_argument('-w','--selected-wbd8',help='WBD8 layer',required=True) - parser.add_argument('-t','--nhd-streams',help='NHDPlus HR geodataframe',required=True) - parser.add_argument('-a','--headwaters-filename',help='Headwaters points layer name',required=True,type=str) - parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) - parser.add_argument('-i','--headwater-id',help='Headwater points ID column',required=True) - parser.add_argument('-c','--nwm-intersections-filename',help='NWM HUC4 intersection points',required=True) - parser.add_argument('-d','--mainstem-flag',help='flag for mainstems network',required=False,default=False) - - args = vars(parser.parse_args()) - - huc_number = args['huc_number'] - huc4_mask = args['huc4_mask'] - selected_wbd8 = args['selected_wbd8'] - nhd_streams = args['nhd_streams'] - headwaters_filename = args['headwaters_filename'] - subset_nhd_streams_fileName = args['subset_nhd_streams_fileName'] - headwater_id = args['headwater_id'] - nwm_intersections_filename = args['nwm_intersections_filename'] - mainstem_flag = args['mainstem_flag'] - - subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False) - - if subset_nhd_streams_fileName is not None: - subset_streams_gdf.to_file(subset_nhd_streams_fileName,driver=getDriver(subset_nhd_streams_fileName),index=False) diff --git a/src/rem.py b/src/rem.py deleted file mode 100755 index c064710ed..000000000 --- a/src/rem.py +++ /dev/null @@ -1,189 +0,0 @@ -#!/usr/bin/env python3 - -from numba import njit, typed, types -import rasterio -import numpy as np -import argparse -import pandas as pd -import geopandas as gpd -from utils.shared_functions import getDriver, mem_profile - - -@mem_profile -def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, dem_reaches_filename): - """ - Calculates REM/HAND/Detrended DEM - - Parameters - ---------- - dem_fileName : str - File name of pit filled DEM raster. - pixel_watersheds_fileName : str - File name of stream pixel watersheds raster. - rem_fileName : str - File name of output relative elevation raster. - hydroid_fileName : str - File name of the hydroid raster (i.e. gw_catchments_reaches.tif) - dem_reaches_filename - File name of the reaches layer to populate HAND elevation attribute values and overwrite as output - - """ - - # ------------------------------------------- Get catchment_hydroid_dict --------------------------------------------------- # - # The following creates a dictionary of the catchment ids (key) and their hydroid along the thalweg (value). - # This is needed to produce a HAND zero reference elevation by hydroid dataframe (helpful for evaluating rating curves & bathy properties) - @njit - def make_catchment_hydroid_dict(flat_value_raster, catchment_hydroid_dict, flat_catchments, thalweg_window): - - for i,cm in enumerate(flat_catchments): - if thalweg_window[i] == 1: # Only allow reference hydroid to be within thalweg. - catchment_hydroid_dict[cm] = flat_value_raster[i] - return(catchment_hydroid_dict) - - # Open files. - gw_catchments_pixels_masked_object = rasterio.open(pixel_watersheds_fileName) - hydroid_pixels_object = rasterio.open(hydroid_fileName) - thalweg_raster_object = rasterio.open(thalweg_raster) - - # Specify raster object metadata. - meta = hydroid_pixels_object.meta.copy() - meta['tiled'], meta['compress'] = True, 'lzw' - - # -- Create catchment_hydroid_dict -- # - catchment_hydroid_dict = typed.Dict.empty(types.int64,types.int64) # Initialize an empty dictionary to store the catchment hydroid. - # Update catchment_hydroid_dict with each pixel sheds hydroid. - # Creating dictionary containing catchment ids (key) and corresponding hydroid within the thalweg... - for ji, window in hydroid_pixels_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template - hydroid_window = hydroid_pixels_object.read(1,window=window).ravel() # Define hydroid_window - catchments_window = gw_catchments_pixels_masked_object.read(1,window=window).ravel() # Define catchments_window - thalweg_window = thalweg_raster_object.read(1, window=window).ravel() # Define cost_window - - # Call numba-optimized function to update catchment_hydroid_dict with pixel sheds overlapping hydroid. - catchment_hydroid_dict = make_catchment_hydroid_dict(hydroid_window, catchment_hydroid_dict, catchments_window, thalweg_window) - - hydroid_pixels_object.close() - gw_catchments_pixels_masked_object.close() - thalweg_raster_object.close() - # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # - # The following creates a dictionary of the catchment ids (key) and their elevation along the thalweg (value). - @njit - def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalweg_window): - - for i,cm in enumerate(flat_catchments): - if thalweg_window[i] == 1: # Only allow reference elevation to be within thalweg - # If the catchment really exists in the dictionary, compare elevation values - if (cm in catchment_min_dict): - if (flat_dem[i] < catchment_min_dict[cm]): - # If the flat_dem's elevation value is less than the catchment_min_dict min, update the catchment_min_dict min - catchment_min_dict[cm] = flat_dem[i] - else: - catchment_min_dict[cm] = flat_dem[i] - return(catchment_min_dict) - - # Open files. - gw_catchments_pixels_masked_object = rasterio.open(pixel_watersheds_fileName) - dem_thalwegCond_masked_object = rasterio.open(dem_fileName) - thalweg_raster_object = rasterio.open(thalweg_raster) - - # Specify raster object metadata. - meta = dem_thalwegCond_masked_object.meta.copy() - meta['tiled'], meta['compress'] = True, 'lzw' - - # -- Create catchment_min_dict -- # - catchment_min_dict = typed.Dict.empty(types.int64,types.float32) # Initialize an empty dictionary to store the catchment minimums - # Update catchment_min_dict with pixel sheds minimum. - # Creating dictionary containing catchment ids (key) and corresponding elevation within the thalweg (value)... - for ji, window in dem_thalwegCond_masked_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template - dem_window = dem_thalwegCond_masked_object.read(1,window=window).ravel() # Define dem_window - catchments_window = gw_catchments_pixels_masked_object.read(1,window=window).ravel() # Define catchments_window - thalweg_window = thalweg_raster_object.read(1, window=window).ravel() # Define thalweg_window - - # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. - catchment_min_dict = make_catchment_min_dict(dem_window, catchment_min_dict, catchments_window, thalweg_window) - - dem_thalwegCond_masked_object.close() - gw_catchments_pixels_masked_object.close() - thalweg_raster_object.close() - - # Merge and export dictionary to csv. - catchment_min_dict_df = pd.DataFrame.from_dict(catchment_min_dict, orient='index') # convert dict to dataframe - catchment_min_dict_df.columns = ['Median_Thal_Elev_m'] - catchment_hydroid_dict_df = pd.DataFrame.from_dict(catchment_hydroid_dict, orient='index') # convert dict to dataframe - catchment_hydroid_dict_df.columns = ['HydroID'] - merge_df = catchment_hydroid_dict_df.merge(catchment_min_dict_df, left_index=True, right_index=True) - merge_df.index.name = 'pixelcatch_id' - - # Merge the HAND reference elevation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) - min_by_hydroid = merge_df.groupby(['HydroID']).min() # min value of all med_thal_elev for pixel catchments in each HydroID reach - min_by_hydroid.columns = ['min_thal_elev'] - med_by_hydroid = merge_df.groupby(['HydroID']).median() # median value of all med_thal_elev for pixel catchments in each HydroID reach - med_by_hydroid.columns = ['med_thal_elev'] - max_by_hydroid = merge_df.groupby(['HydroID']).max() # max value of all med_thal_elev for pixel catchments in each HydroID reach - max_by_hydroid.columns = ['max_thal_elev'] - input_reaches = gpd.read_file(dem_reaches_filename) - input_reaches = input_reaches.merge(min_by_hydroid, on='HydroID') # merge dataframes by HydroID variable - input_reaches = input_reaches.merge(med_by_hydroid, on='HydroID') # merge dataframes by HydroID variable - input_reaches = input_reaches.merge(max_by_hydroid, on='HydroID') # merge dataframes by HydroID variable - input_reaches.to_file(dem_reaches_filename,driver=getDriver(dem_reaches_filename),index=False) - # ------------------------------------------------------------------------------------------------------------------------ # - - - # ------------------------------------------- Produce relative elevation model ------------------------------------------- # - @njit - def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): - rem_window = np.zeros(len(flat_dem),dtype=np.float32) - for i,cm in enumerate(flat_catchments): - if cm in catchmentMinDict: - if catchmentMinDict[cm] == ndv: - rem_window[i] = ndv - else: - rem_window[i] = flat_dem[i] - catchmentMinDict[cm] - - return(rem_window) - - rem_rasterio_object = rasterio.open(rem_fileName,'w',**meta) # Open rem_rasterio_object for writing to rem_fileName. - pixel_catchments_rasterio_object = rasterio.open(pixel_watersheds_fileName) # Open pixel_catchments_rasterio_object - dem_rasterio_object = rasterio.open(dem_fileName) - - # Producing relative elevation model raster - for ji, window in dem_rasterio_object.block_windows(1): - dem_window = dem_rasterio_object.read(1,window=window) - window_shape = dem_window.shape - - dem_window = dem_window.ravel() - catchments_window = pixel_catchments_rasterio_object.read(1,window=window).ravel() - - rem_window = calculate_rem(dem_window, catchment_min_dict, catchments_window, meta['nodata']) - rem_window = rem_window.reshape(window_shape).astype(np.float32) - - rem_rasterio_object.write(rem_window, window=window, indexes=1) - - dem_rasterio_object.close() - pixel_catchments_rasterio_object.close() - rem_rasterio_object.close() - # ------------------------------------------------------------------------------------------------------------------------ # - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Relative elevation from pixel based watersheds') - parser.add_argument('-d','--dem', help='DEM to use within project path', required=True) - parser.add_argument('-w','--watersheds',help='Pixel based watersheds raster to use within project path',required=True) - parser.add_argument('-t','--thalweg-raster',help='A binary raster representing the thalweg. 1 for thalweg, 0 for non-thalweg.',required=True) - parser.add_argument('-o','--rem',help='Output REM raster',required=True) - parser.add_argument('-i','--hydroid', help='HydroID raster to use within project path', required=True) - parser.add_argument('-s','--dem_reaches_in_out',help='DEM derived reach layer to join HAND reference elevation attribute',required=True) - - # Extract arguments to dictionary. - args = vars(parser.parse_args()) - - # Define variable inputs. - dem_fileName = args['dem'] - pixel_watersheds_fileName = args['watersheds'] - rem_fileName = args['rem'] - thalweg_raster = args['thalweg_raster'] - hydroid_fileName = args['hydroid'] - dem_reaches_filename = args['dem_reaches_in_out'] - - rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, dem_reaches_filename) diff --git a/src/reset_mannings.py b/src/reset_mannings.py new file mode 100755 index 000000000..888e156e3 --- /dev/null +++ b/src/reset_mannings.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 + +import pandas as pd +import numpy as np +import argparse +import os +from glob import iglob,glob +from stream_branches import StreamNetwork + +def Reset_mannings(hydrofabric_dir,mannings_value,overwrite_files=False): + + + src_table_filePaths, hydro_table_filePaths, stream_network_filePaths = make_file_paths_for_inputs(hydrofabric_dir) + + single_stream_network = len(stream_network_filePaths) == 1 + + if single_stream_network: + stream_network = StreamNetwork.from_file(stream_network_filePaths[0]) + + + for i,(srcFP,hydFP) in enumerate(zip(src_table_filePaths,hydro_table_filePaths)): + + src_table = load_src_table(srcFP) + hydro_table = load_hydro_table(hydFP) + + src_table, hydro_table = reset_mannings_for_a_processing_unit(src_table,hydro_table,mannings_value) + + if not single_stream_network: + stream_network = StreamNetwork.from_file( stream_network_filePaths[i] ) + + small_segments = identify_small_reaches(stream_network,min_catchment_area=None,min_stream_length=None) + src_table, hydro_table = replace_discharges_of_small_segments(small_segments, src_table, hydro_table) + + if overwrite_files: + src_table.to_csv(srcFP,index=False) + hydro_table.to_csv(hydFP,index=False) + + #yield(src_table, hydro_table) + + +def load_hydro_table(hydro_table_filePath): + + hydro_table = pd.read_csv( hydro_table_filePath, + dtype= { 'HydroID' : str, + 'feature_id' : str, + 'stage' : float, + 'discharge_cms': float, + 'HUC' : str, + 'LakeID' : str + } + ) + + return(hydro_table) + + +def load_src_table(src_table_filePath): + + src_table = pd.read_csv( src_table_filePath, + dtype= { 'HydroID' : str, + 'feature_id' : str, + 'stage' : float, + 'discharge_cms': float, + 'HUC' : str, + 'LakeID' : str + } + ) + + return(src_table) + + +def make_file_paths_for_inputs(hydrofabric_dir): + + src_table_filePath_to_glob = os.path.join(hydrofabric_dir,'**','src_full_crosswalked*.csv') + hydro_table_filePath_to_glob = os.path.join(hydrofabric_dir,'**','hydroTable*.csv') + stream_network_filePath_to_glob = os.path.join(hydrofabric_dir,'**','demDerived_reaches_split_filtered_addedAttributes_crosswalked*.gpkg') + + src_table_filePaths = iglob(src_table_filePath_to_glob,recursive=True) + hydro_table_filePaths = iglob(hydro_table_filePath_to_glob,recursive=True) + stream_network_filePaths = glob(stream_network_filePath_to_glob,recursive=True) + + + return(src_table_filePaths,hydro_table_filePaths,stream_network_filePaths) + + +def reset_mannings_for_a_processing_unit(src_table,hydro_table,mannings_value): + + src_table = override_mannings(src_table,mannings_value) + + src_table = calculate_discharge(src_table) + + hydro_table["discharge_cms"] = src_table["Discharge (m3s-1)"] + + return(src_table,hydro_table) + + +def override_mannings(table,mannings_value,mannings_attribute="ManningN"): + + table[mannings_attribute] = mannings_value + + return(table) + + +def calculate_discharge(src_table): + + src_table['Discharge (m3s-1)'] = src_table['WetArea (m2)']* \ + pow(src_table['HydraulicRadius (m)'],2.0/3)* \ + pow(src_table['SLOPE'],0.5)/src_table['ManningN'] + + # set zero stage values to zero discharge + src_table.loc[src_table['Stage']==0,['Discharge (m3s-1)']] = 0 + + + return(src_table) + + +def identify_small_reaches(stream_network,min_catchment_area=None,min_stream_length=None): + + # Adjust short model reach rating curves + sml_segs = pd.DataFrame() + + if min_catchment_area is None: + min_catchment_area = float(os.environ['min_catchment_area']) #0.25# + + if min_stream_length is None: + min_stream_length = float(os.environ['min_stream_length']) #0.5# + + # replace small segment geometry with neighboring stream + for stream_index in stream_network.index: + + if stream_network["areasqkm"][stream_index] < min_catchment_area and stream_network["LengthKm"][stream_index] < min_stream_length and stream_network["LakeID"][stream_index] < 0: + + short_id = stream_network['HydroID'][stream_index] + to_node = stream_network['To_Node'][stream_index] + from_node = stream_network['From_Node'][stream_index] + + # multiple upstream segments + if len(stream_network.loc[stream_network['NextDownID'] == short_id]['HydroID']) > 1: + max_order = max(stream_network.loc[stream_network['NextDownID'] == short_id]['order_']) # drainage area would be better than stream order but we would need to calculate + + if len(stream_network.loc[(stream_network['NextDownID'] == short_id) & (stream_network['order_'] == max_order)]['HydroID']) == 1: + update_id = stream_network.loc[(stream_network['NextDownID'] == short_id) & (stream_network['order_'] == max_order)]['HydroID'].item() + + else: + update_id = stream_network.loc[(stream_network['NextDownID'] == short_id) & (stream_network['order_'] == max_order)]['HydroID'].values[0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel) + + # single upstream segments + elif len(stream_network.loc[stream_network['NextDownID'] == short_id]['HydroID']) == 1: + update_id = stream_network.loc[stream_network.To_Node==from_node]['HydroID'].item() + + # no upstream segments; multiple downstream segments + elif len(stream_network.loc[stream_network.From_Node==to_node]['HydroID']) > 1: + max_order = max(stream_network.loc[stream_network.From_Node==to_node]['HydroID']['order_']) # drainage area would be better than stream order but we would need to calculate + + if len(stream_network.loc[(stream_network['NextDownID'] == short_id) & (stream_network['order_'] == max_order)]['HydroID']) == 1: + update_id = stream_network.loc[(stream_network.From_Node==to_node) & (stream_network['order_'] == max_order)]['HydroID'].item() + + else: + update_id = stream_network.loc[(stream_network.From_Node==to_node) & (stream_network['order_'] == max_order)]['HydroID'].values[0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel) + + # no upstream segments; single downstream segment + elif len(stream_network.loc[stream_network.From_Node==to_node]['HydroID']) == 1: + update_id = stream_network.loc[stream_network.From_Node==to_node]['HydroID'].item() + + else: + update_id = stream_network.loc[stream_network.HydroID==short_id]['HydroID'].item() + + str_order = stream_network.loc[stream_network.HydroID==short_id]['order_'].item() + sml_segs = sml_segs.append({'short_id':short_id, 'update_id':update_id, 'str_order':str_order}, ignore_index=True) + + #print("Number of short reaches [{} < {} and {} < {}] = {}".format("areasqkm", min_catchment_area, "LengthKm", min_stream_length, len(sml_segs))) + + return(sml_segs) + + +def replace_discharges_of_small_segments(sml_segs, src_table, hydro_table): + + # update rating curves + if len(sml_segs) == 0: + return(src_table,hydro_table) + + #sml_segs.to_csv(small_segments_filename,index=False) + #print("Update rating curves for short reaches.") + + for index, segment in sml_segs.iterrows(): + + short_id = segment[0] + update_id= segment[1] + new_values = src_table.loc[src_table['HydroID'] == update_id][['Stage', 'Discharge (m3s-1)']] + + for src_index, src_stage in new_values.iterrows(): + src_table.loc[(src_table['HydroID']== short_id) & (src_table['Stage']== src_stage[0]),['Discharge (m3s-1)']] = src_stage[1] + + hydro_table["discharge_cms"] = src_table["Discharge (m3s-1)"] + + return(src_table,hydro_table) + + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Overwrites mannings n values and recomputes discharge values for SRCs and Hydro-Tables') + parser.add_argument('-y','--hydrofabric-dir', help='Hydrofabric directory', required=True) + parser.add_argument('-n','--mannings-value', help='Mannings N value to use', required=True, type=float) + parser.add_argument('-o','--overwrite-files', help='Overwrites original files if used', required=False, default=False,action='store_true') + + + args = vars(parser.parse_args()) + + Reset_mannings(**args) diff --git a/src/run_by_branch.sh b/src/run_by_branch.sh new file mode 100755 index 000000000..6ecb5388d --- /dev/null +++ b/src/run_by_branch.sh @@ -0,0 +1,112 @@ +#!/bin/bash -e + +## INITIALIZE TOTAL TIME TIMER ## +T_total_start + +## SOURCE BASH FUNCTIONS +source $srcDir/bash_functions.env + +## SET VARIABLES AND FILE INPUTS ## +hucNumber="$1" +current_branch_id="$2" +hucUnitLength=${#hucNumber} +huc4Identifier=${hucNumber:0:4} +huc2Identifier=${hucNumber:0:2} + +# Skip branch zero +if [ $current_branch_id = $branch_zero_id ]; then + exit 0 +fi + +outputHucDataDir=$outputRunDataDir/$hucNumber +outputBranchDataDir=$outputHucDataDir/branches +outputCurrentBranchDataDir=$outputBranchDataDir/$current_branch_id + +## OVERWRITE +if [ -d "$outputCurrentBranchDataDir" ]; then + rm -rf $outputCurrentBranchDataDir +fi + +## MAKE OUTPUT BRANCH DIRECTORY +mkdir -p $outputCurrentBranchDataDir + +## START MESSAGE ## +echo -e $startDiv"Processing HUC: $hucNumber - branch_id: $current_branch_id" +echo + +## SUBSET VECTORS +echo -e $startDiv"Subsetting vectors to branches $hucNumber $current_branch_id" +date -u +Tstart +echo -e "Querying NWM streams ..." +ogr2ogr -f GPKG -t_srs $DEFAULT_FIM_PROJECTION_CRS -where $branch_id_attribute="$current_branch_id" $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_$current_branch_id.gpkg $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg +echo -e "Querying NWM catchments ..." +ogr2ogr -f GPKG -t_srs $DEFAULT_FIM_PROJECTION_CRS -where $branch_id_attribute="$current_branch_id" $outputCurrentBranchDataDir/nwm_catchments_proj_subset_levelPaths_$current_branch_id.gpkg $outputHucDataDir/nwm_catchments_proj_subset_levelPaths.gpkg +echo -e "Querying NWM Dissolved Levelpaths headwaters ..." +ogr2ogr -f GPKG -t_srs $DEFAULT_FIM_PROJECTION_CRS -where $branch_id_attribute="$current_branch_id" $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters_$current_branch_id.gpkg $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg +#echo -e "Querying NWM headwaters ..." +#ogr2ogr -f GPKG -t_srs $DEFAULT_FIM_PROJECTION_CRS -where $branch_id_attribute="$current_branch_id" $outputCurrentBranchDataDir/nwm_headwaters_$current_branch_id.gpkg $outputHucDataDir/nwm_headwaters.gpkg +Tcount + +## GET RASTERS FROM BRANCH ZERO AND CLIP TO CURRENT BRANCH BUFFER ## +echo -e $startDiv"Clipping rasters to branches $hucNumber $current_branch_id" +date -u +Tstart +$srcDir/clip_rasters_to_branches.py -d $current_branch_id -b $outputHucDataDir/branch_polygons.gpkg -i $branch_id_attribute -r $outputHucDataDir/dem_meters.tif $outputBranchDataDir/$branch_zero_id/flowdir_d8_burned_filled_$branch_zero_id.tif -c $outputCurrentBranchDataDir/dem_meters.tif $outputCurrentBranchDataDir/flowdir_d8_burned_filled.tif -v +Tcount + +## GET RASTER METADATA +echo -e $startDiv"Get DEM Metadata $hucNumber $current_branch_id" +date -u +Tstart +read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif) +Tcount + +## RASTERIZE REACH BOOLEAN (1 & 0) ## +echo -e $startDiv"Rasterize Reach Boolean $hucNumber $current_branch_id" +date -u +Tstart +gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_$current_branch_id.gpkg $outputCurrentBranchDataDir/flows_grid_boolean_$current_branch_id.tif +Tcount + +## RASTERIZE NWM Levelpath HEADWATERS (1 & 0) ## +echo -e $startDiv"Rasterize NHD Headwaters $hucNumber $current_branch_id" +date -u +Tstart +gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters_$current_branch_id.gpkg $outputCurrentBranchDataDir/headwaters_$current_branch_id.tif +Tcount + +## PRODUCE THE REM AND OTHER HAND FILE OUTPUTS ## +export hucNumber=$hucNumber +export current_branch_id=$current_branch_id +export outputCurrentBranchDataDir=$outputCurrentBranchDataDir +export outputHucDataDir=$outputHucDataDir +export ndv=$ndv +export xmin=$xmin +export ymin=$ymin +export xmax=$xmax +export ymax=$ymax +export ncols=$ncols +export nrows=$nrows +$srcDir/delineate_hydros_and_produce_HAND.sh "branch" + +## USGS CROSSWALK ## +if [ -f $outputHucDataDir/usgs_subset_gages.gpkg ]; then + echo -e $startDiv"USGS Crosswalk $hucNumber $current_branch_id" + date -u + Tstart + python3 $srcDir/usgs_gage_crosswalk.py -gages $outputHucDataDir/usgs_subset_gages.gpkg -flows $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -cat $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -dem $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -dem_adj $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -outtable $outputCurrentBranchDataDir/usgs_elev_table.csv -b $current_branch_id + Tcount +fi + +## REMOVE FILES FROM DENY LIST ## +if [ -f $deny_branches_list ]; then + echo -e $startDiv"Remove files $hucNumber $current_branch_id" + date -u + Tstart + $srcDir/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $deny_branches_list -b $current_branch_id + Tcount +fi + +echo -e $startDiv"End Branch Processing $hucNumber $current_branch_id ..." +echo \ No newline at end of file diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh deleted file mode 100755 index b46f7ce9a..000000000 --- a/src/run_by_unit.sh +++ /dev/null @@ -1,422 +0,0 @@ -#!/bin/bash -e - -## INITIALIZE TOTAL TIME TIMER ## -T_total_start - -echo -e $startDiv"Parameter Values" -echo -e "extent=$extent" -echo -e "agree_DEM_buffer=$agree_DEM_buffer" -echo -e "wbd_buffer=$wbd_buffer" -echo -e "ms_buffer_dist=$ms_buffer_dist" -echo -e "lakes_buffer_dist_meters=$lakes_buffer_dist_meters" -echo -e "negative_burn_value=$negative_burn_value" -echo -e "max_split_distance_meters=$max_split_distance_meters" -echo -e "mannings_n=$manning_n" -echo -e "stage_min_meters=$stage_min_meters" -echo -e "stage_interval_meters=$stage_interval_meters" -echo -e "stage_max_meters=$stage_max_meters" -echo -e "slope_min=$slope_min" -echo -e "ms_buffer_dist=$ms_buffer_dist" -echo -e "ncores_gw=$ncores_gw" -echo -e "ncores_fd=$ncores_fd" -echo -e "default_max_jobs=$default_max_jobs" -echo -e "memfree=$memfree"$stopDiv - -## SET OUTPUT DIRECTORY FOR UNIT ## -hucNumber="$1" -outputHucDataDir=$outputRunDataDir/$hucNumber -mkdir $outputHucDataDir - -## SET VARIABLES AND FILE INPUTS ## -hucUnitLength=${#hucNumber} -huc4Identifier=${hucNumber:0:4} -huc2Identifier=${hucNumber:0:2} -input_NHD_WBHD_layer=WBDHU$hucUnitLength -input_DEM=$inputDataDir/nhdplus_rasters/HRNHDPlusRasters"$huc4Identifier"/elev_m.tif -input_NLD=$inputDataDir/nld_vectors/huc2_levee_lines/nld_preprocessed_"$huc2Identifier".gpkg -input_bathy_bankfull=$inputDataDir/$bankfull_input_table - -# Define the landsea water body mask using either Great Lakes or Ocean polygon input # -if [[ $huc2Identifier == "04" ]] ; then - input_LANDSEA=$input_GL_boundaries - echo -e "Using $input_LANDSEA for water body mask (Great Lakes)" -else - input_LANDSEA=$inputDataDir/landsea/water_polygons_us.gpkg -fi - -## GET WBD ## -echo -e $startDiv"Get WBD $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/wbd.gpkg ] && \ -ogr2ogr -f GPKG $outputHucDataDir/wbd.gpkg $input_WBD_gdb $input_NHD_WBHD_layer -where "HUC$hucUnitLength='$hucNumber'" -Tcount - -## Subset Vector Layers ## -echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ] && \ -python3 -m memory_profiler $srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_nwm_flows -s $input_nhd_flowlines -l $input_nwm_lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_nwm_catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -extent $extent -gl $input_GL_boundaries -lb $lakes_buffer_dist_meters -wb $wbd_buffer -Tcount - -if [ "$extent" = "MS" ]; then - if [[ ! -f $outputHucDataDir/nhd_headwater_points_subset.gpkg ]] ; then - echo "EXIT FLAG!! (exit 55): No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - rm -rf $outputHucDataDir - exit 0 - fi -fi - -## Clip WBD8 ## -echo -e $startDiv"Clip WBD8"$stopDiv -date -u -Tstart -ogr2ogr -f GPKG -clipsrc $outputHucDataDir/wbd_buffered.gpkg $outputHucDataDir/wbd8_clp.gpkg $inputDataDir/wbd/WBD_National.gpkg WBDHU8 -Tcount - -## CLIP DEM ## -echo -e $startDiv"Clip DEM $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/dem_meters.tif ] && \ -gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Float32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" $input_DEM $outputHucDataDir/dem_meters.tif -Tcount - -## GET RASTER METADATA -echo -e $startDiv"Get DEM Metadata $hucNumber"$stopDiv -date -u -Tstart -read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/dem_meters.tif) - -## RASTERIZE NLD MULTILINES ## -echo -e $startDiv"Rasterize all NLD multilines using zelev vertices"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/nld_rasterized_elev.tif ] && [ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ -gdal_rasterize -l nld_subset_levees -3d -at -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif -Tcount - -## RASTERIZE REACH BOOLEAN (1 & 0) ## -echo -e $startDiv"Rasterize Reach Boolean $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/flows_grid_boolean.tif ] && \ -gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg $outputHucDataDir/flows_grid_boolean.tif -Tcount - -## RASTERIZE NHD HEADWATERS (1 & 0) ## -echo -e $startDiv"Rasterize NHD Headwaters $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/headwaters.tif ] && \ -gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/nhd_headwater_points_subset.gpkg $outputHucDataDir/headwaters.tif -Tcount - -if [ "$extent" = "FR" ]; then - # RASTERIZE NWM CATCHMENTS ## - echo -e $startDiv"Raster NWM Catchments $hucNumber"$stopDiv - date -u - Tstart - [ ! -f $outputHucDataDir/nwm_catchments_proj_subset.tif ] && \ - gdal_rasterize -ot Int32 -a ID -a_nodata 0 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/nwm_catchments_proj_subset.gpkg $outputHucDataDir/nwm_catchments_proj_subset.tif - Tcount -fi - -## BURN LEVEES INTO DEM ## -echo -e $startDiv"Burn nld levees into dem & convert nld elev to meters (*Overwrite dem_meters.tif output) $hucNumber"$stopDiv -date -u -Tstart -[ -f $outputHucDataDir/nld_rasterized_elev.tif ] && \ -python3 -m memory_profiler $srcDir/burn_in_levees.py -dem $outputHucDataDir/dem_meters.tif -nld $outputHucDataDir/nld_rasterized_elev.tif -out $outputHucDataDir/dem_meters.tif -Tcount - -## DEM Reconditioning ## -# Using AGREE methodology, hydroenforce the DEM so that it is consistent with the supplied stream network. -# This allows for more realistic catchment delineation which is ultimately reflected in the output FIM mapping. -echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/dem_burned.tif ] && \ -python3 -m memory_profiler $srcDir/agreedem.py -r $outputHucDataDir/flows_grid_boolean.tif -d $outputHucDataDir/dem_meters.tif -w $outputHucDataDir -g $outputHucDataDir/temp_work -o $outputHucDataDir/dem_burned.tif -b $agree_DEM_buffer -sm 10 -sh 1000 -Tcount - -## PIT REMOVE BURNED DEM ## -echo -e $startDiv"Pit remove Burned DEM $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/dem_burned_filled.tif ] && \ -rd_depression_filling $outputHucDataDir/dem_burned.tif $outputHucDataDir/dem_burned_filled.tif -Tcount - -## D8 FLOW DIR ## -echo -e $startDiv"D8 Flow Directions on Burned DEM $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/flowdir_d8_burned_filled.tif ] && \ -mpiexec -n $ncores_fd $taudemDir2/d8flowdir -fel $outputHucDataDir/dem_burned_filled.tif -p $outputHucDataDir/flowdir_d8_burned_filled.tif -Tcount - -## DINF FLOW DIR ## -# echo -e $startDiv"DINF on Filled Thalweg Conditioned DEM"$stopDiv -# date -u -# Tstart -# [ ! -f $outputHucDataDir/flowdir_dinf_thalwegCond.tif] && \ -# mpiexec -n $ncores_fd $taudemDir2/dinfflowdir -fel $outputHucDataDir/dem_thalwegCond_filled.tif -ang $outputHucDataDir/flowdir_dinf_thalwegCond.tif -slp $outputHucDataDir/slopes_dinf.tif -# Tcount - -## D8 FLOW ACCUMULATIONS ## -echo -e $startDiv"D8 Flow Accumulations $hucNumber"$stopDiv -date -u -Tstart -$taudemDir/aread8 -p $outputHucDataDir/flowdir_d8_burned_filled.tif -ad8 $outputHucDataDir/flowaccum_d8_burned_filled.tif -wg $outputHucDataDir/headwaters.tif -nc -Tcount - -# THRESHOLD ACCUMULATIONS ## -echo -e $startDiv"Threshold Accumulations $hucNumber"$stopDiv -date -u -Tstart -$taudemDir/threshold -ssa $outputHucDataDir/flowaccum_d8_burned_filled.tif -src $outputHucDataDir/demDerived_streamPixels.tif -thresh 1 -Tcount - -## PREPROCESSING FOR LATERAL THALWEG ADJUSTMENT ### -echo -e $startDiv"Preprocessing for lateral thalweg adjustment $hucNumber"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/unique_pixel_and_allocation.py -s $outputHucDataDir/demDerived_streamPixels.tif -o $outputHucDataDir/demDerived_streamPixels_ids.tif -g $outputHucDataDir/temp_grass -Tcount - -## ADJUST THALWEG MINIMUM USING LATERAL ZONAL MINIMUM ## -echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif -th $thalweg_lateral_elev_threshold -Tcount - -## MASK BURNED DEM FOR STREAMS ONLY ### -echo -e $startDiv"Mask Burned DEM for Thalweg Only $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/flowdir_d8_burned_filled_flows.tif ] && \ -gdal_calc.py --quiet --type=Int32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputHucDataDir/flowdir_d8_burned_filled.tif -B $outputHucDataDir/demDerived_streamPixels.tif --calc="A/B" --outfile="$outputHucDataDir/flowdir_d8_burned_filled_flows.tif" --NoDataValue=0 -Tcount - -## FLOW CONDITION STREAMS ## -echo -e $startDiv"Flow Condition Thalweg $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/dem_thalwegCond.tif ] && \ -$taudemDir/flowdircond -p $outputHucDataDir/flowdir_d8_burned_filled_flows.tif -z $outputHucDataDir/dem_lateral_thalweg_adj.tif -zfdc $outputHucDataDir/dem_thalwegCond.tif -Tcount - -## D8 SLOPES ## -echo -e $startDiv"D8 Slopes from DEM $hucNumber"$stopDiv -date -u -Tstart -mpiexec -n $ncores_fd $taudemDir2/d8flowdir -fel $outputHucDataDir/dem_lateral_thalweg_adj.tif -sd8 $outputHucDataDir/slopes_d8_dem_meters.tif -Tcount - -# STREAMNET FOR REACHES ## -echo -e $startDiv"Stream Net for Reaches $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/demDerived_reaches.shp ] && \ -$taudemDir/streamnet -p $outputHucDataDir/flowdir_d8_burned_filled.tif -fel $outputHucDataDir/dem_thalwegCond.tif -ad8 $outputHucDataDir/flowaccum_d8_burned_filled.tif -src $outputHucDataDir/demDerived_streamPixels.tif -ord $outputHucDataDir/streamOrder.tif -tree $outputHucDataDir/treeFile.txt -coord $outputHucDataDir/coordFile.txt -w $outputHucDataDir/sn_catchments_reaches.tif -net $outputHucDataDir/demDerived_reaches.shp -Tcount - -## SPLIT DERIVED REACHES ## -echo -e $startDiv"Split Derived Reaches $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ] && \ -python3 -m memory_profiler $srcDir/split_flows.py -f $outputHucDataDir/demDerived_reaches.shp -d $outputHucDataDir/dem_thalwegCond.tif -s $outputHucDataDir/demDerived_reaches_split.gpkg -p $outputHucDataDir/demDerived_reaches_split_points.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -l $outputHucDataDir/nwm_lakes_proj_subset.gpkg -Tcount - -if [[ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ]] ; then - echo "EXIT FLAG!! (exit 56): No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - rm -rf $outputHucDataDir - exit 0 -fi - -if [ "$extent" = "MS" ]; then - ## MASK RASTERS BY MS BUFFER ## - echo -e $startDiv"Mask Rasters with Stream Buffer $hucNumber"$stopDiv - date -u - Tstart - python3 -m memory_profiler $srcDir/fr_to_ms_raster_mask.py -s $outputHucDataDir/demDerived_reaches_split.gpkg -f $outputHucDataDir/flowdir_d8_burned_filled.tif -d $outputHucDataDir/dem_thalwegCond.tif -r $outputHucDataDir/slopes_d8_dem_meters.tif -m $outputHucDataDir/flowdir_d8_MS.tif -n $outputHucDataDir/dem_thalwegCond_MS.tif -o $outputHucDataDir/slopes_d8_dem_metersMS.tif -p $outputHucDataDir/demDerived_streamPixels.tif -q $outputHucDataDir/demDerived_streamPixelsMS.tif - Tcount - - if [[ ! -f $outputHucDataDir/dem_thalwegCond_MS.tif ]] ; then - echo "EXIT FLAG!! (exit 57): No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - rm -rf $outputHucDataDir - exit 0 - fi - - dem_thalwegCond=$outputHucDataDir/dem_thalwegCond_MS.tif - slopes_d8_dem_meters=$outputHucDataDir/slopes_d8_dem_metersMS.tif - flowdir_d8_burned_filled=$outputHucDataDir/flowdir_d8_MS.tif - demDerived_streamPixels=$outputHucDataDir/demDerived_streamPixelsMS.tif -else - dem_thalwegCond=$outputHucDataDir/dem_thalwegCond.tif - slopes_d8_dem_meters=$outputHucDataDir/slopes_d8_dem_meters.tif - flowdir_d8_burned_filled=$outputHucDataDir/flowdir_d8_burned_filled.tif - demDerived_streamPixels=$outputHucDataDir/demDerived_streamPixels.tif -fi - -## GAGE WATERSHED FOR REACHES ## -echo -e $startDiv"Gage Watershed for Reaches $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/gw_catchments_reaches.tif ] && \ -mpiexec -n $ncores_gw $taudemDir/gagewatershed -p $flowdir_d8_burned_filled -gw $outputHucDataDir/gw_catchments_reaches.tif -o $outputHucDataDir/demDerived_reaches_split_points.gpkg -id $outputHucDataDir/idFile.txt -Tcount - -## VECTORIZE FEATURE ID CENTROIDS ## -echo -e $startDiv"Vectorize Pixel Centroids $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/flows_points_pixels.gpkg ] && \ -python3 -m memory_profiler $srcDir/reachID_grid_to_vector_points.py -r $demDerived_streamPixels -i featureID -p $outputHucDataDir/flows_points_pixels.gpkg -Tcount - -## GAGE WATERSHED FOR PIXELS ## -echo -e $startDiv"Gage Watershed for Pixels $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/gw_catchments_pixels.tif ] && \ -mpiexec -n $ncores_gw $taudemDir/gagewatershed -p $flowdir_d8_burned_filled -gw $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/flows_points_pixels.gpkg -id $outputHucDataDir/idFile.txt -Tcount - -# D8 REM ## -echo -e $startDiv"D8 REM $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/rem.tif ] && \ -python3 -m memory_profiler $srcDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels -i $outputHucDataDir/gw_catchments_reaches.tif -s $outputHucDataDir/demDerived_reaches_split.gpkg -Tcount - -## DINF DISTANCE DOWN ## -# echo -e $startDiv"DINF Distance Down on Filled Thalweg Conditioned DEM $hucNumber"$stopDiv -# date -u -# Tstart -# [ ! -f $outputHucDataDir/flowdir_dinf_thalwegCond.tif] && \ -# mpiexec -n $ncores_fd $taudemDir/dinfdistdown -ang $outputHucDataDir/flowdir_dinf_thalwegCond.tif -fel $outputHucDataDir/dem_thalwegCond_filled.tif -src $demDerived_streamPixels -dd $outputHucDataDir/rem.tif -m ave h -# Tcount - -## BRING DISTANCE DOWN TO ZERO ## -echo -e $startDiv"Zero out negative values in distance down grid $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/rem_zeroed.tif ] && \ -gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputHucDataDir/rem.tif --calc="(A*(A>=0))" --NoDataValue=$ndv --outfile=$outputHucDataDir/"rem_zeroed.tif" -Tcount - -## POLYGONIZE REACH WATERSHEDS ## -echo -e $startDiv"Polygonize Reach Watersheds $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/gw_catchments_reaches.gpkg ] && \ -gdal_polygonize.py -8 -f GPKG $outputHucDataDir/gw_catchments_reaches.tif $outputHucDataDir/gw_catchments_reaches.gpkg catchments HydroID -Tcount - -## PROCESS CATCHMENTS AND MODEL STREAMS STEP 1 ## -echo -e $startDiv"Process catchments and model streams step 1 $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg ] && \ -python3 -m memory_profiler $srcDir/filter_catchments_and_add_attributes.py -i $outputHucDataDir/gw_catchments_reaches.gpkg -f $outputHucDataDir/demDerived_reaches_split.gpkg -c $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -o $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -u $hucNumber - -if [[ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg ]] ; then - echo "EXIT FLAG!! (exit 65): No relevant streams within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - rm -rf $outputHucDataDir - exit 0 -fi -Tcount - -## GET RASTER METADATA ## ***** -echo -e $startDiv"Get Clipped Raster Metadata $hucNumber"$stopDiv -date -u -Tstart -read fsize ncols nrows ndv_clipped xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/gw_catchments_reaches.tif) -Tcount - -## RASTERIZE NEW CATCHMENTS AGAIN ## -echo -e $startDiv"Rasterize filtered catchments $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.tif ] && \ -gdal_rasterize -ot Int32 -a HydroID -a_nodata 0 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.tif -Tcount - -## RASTERIZE LANDSEA (OCEAN AREA) POLYGON (IF APPLICABLE) ## -echo -e $startDiv"Rasterize filtered/dissolved ocean/Glake polygon $hucNumber"$stopDiv -date -u -Tstart -[ -f $outputHucDataDir/LandSea_subset.gpkg ] && [ ! -f $outputHucDataDir/LandSea_subset.tif ] && \ -gdal_rasterize -ot Int32 -burn $ndv -a_nodata $ndv -init 1 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/LandSea_subset.gpkg $outputHucDataDir/LandSea_subset.tif -Tcount - -## MASK SLOPE RASTER ## -echo -e $startDiv"Masking Slope Raster to HUC $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/slopes_d8_dem_meters_masked.tif ] && \ -gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $slopes_d8_dem_meters -B $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.tif --calc="(A*(B>0))+((B<=0)*-1)" --NoDataValue=-1 --outfile=$outputHucDataDir/"slopes_d8_dem_meters_masked.tif" -Tcount - -## MASK REM RASTER ## -echo -e $startDiv"Masking REM Raster to HUC $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/rem_zeroed_masked.tif ] && \ -gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputHucDataDir/rem_zeroed.tif -B $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.tif --calc="(A*(B>0))" --NoDataValue=$ndv --outfile=$outputHucDataDir/"rem_zeroed_masked.tif" -Tcount - -## MASK REM RASTER TO REMOVE OCEAN AREAS ## -echo -e $startDiv"Additional masking to REM raster to remove ocean/Glake areas in HUC $hucNumber"$stopDiv -date -u -Tstart -[ -f $outputHucDataDir/LandSea_subset.tif ] && \ -gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputHucDataDir/rem_zeroed_masked.tif -B $outputHucDataDir/LandSea_subset.tif --calc="(A*B)" --NoDataValue=$ndv --outfile=$outputHucDataDir/"rem_zeroed_masked.tif" -Tcount - -## MAKE CATCHMENT AND STAGE FILES ## -echo -e $startDiv"Generate Catchment List and Stage List Files $hucNumber"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/make_stages_and_catchlist.py -f $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -c $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -s $outputHucDataDir/stage.txt -a $outputHucDataDir/catchment_list.txt -m $stage_min_meters -i $stage_interval_meters -t $stage_max_meters -Tcount - -## HYDRAULIC PROPERTIES ## -echo -e $startDiv"Hydraulic Properties $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/src_base.csv ] && \ -$taudemDir/catchhydrogeo -hand $outputHucDataDir/rem_zeroed_masked.tif -catch $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.tif -catchlist $outputHucDataDir/catchment_list.txt -slp $outputHucDataDir/slopes_d8_dem_meters_masked.tif -h $outputHucDataDir/stage.txt -table $outputHucDataDir/src_base.csv -Tcount - -## FINALIZE CATCHMENTS AND MODEL STREAMS ## -echo -e $startDiv"Finalize catchments and model streams $hucNumber"$stopDiv output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName, -date -u -Tstart -[ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg ] && \ -python3 -m memory_profiler $srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_nwm_catchments -p $extent -k $outputHucDataDir/small_segments.csv -Tcount - -## USGS CROSSWALK ## -echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv -e $extent -Tcount - -## CLEANUP OUTPUTS ## -echo -e $startDiv"Cleaning up outputs $hucNumber"$stopDiv -args=() -[[ ! -z "$whitelist" ]] && args+=( "-w$whitelist" ) -(( production == 1 )) && args+=( '-p' ) -(( viz == 1 )) && args+=( '-v' ) -date -u -Tstart -python3 -m memory_profiler $srcDir/output_cleanup.py $hucNumber $outputHucDataDir "${args[@]}" -Tcount diff --git a/src/run_unit_wb.sh b/src/run_unit_wb.sh new file mode 100755 index 000000000..ab41e9919 --- /dev/null +++ b/src/run_unit_wb.sh @@ -0,0 +1,289 @@ +#!/bin/bash -e + +# Do not call this file directly. Call fim_process_unit_wb.sh which calls +# this file. + +## SOURCE FILE AND FUNCTIONS ## +# load the various enviro files +args_file=$outputRunDataDir/runtime_args.env + +source $args_file +source $outputRunDataDir/params.env +source $srcDir/bash_functions.env +source $srcDir/bash_variables.env + +branch_list_csv_file=$outputHucDataDir/branch_ids.csv +branch_list_lst_file=$outputHucDataDir/branch_ids.lst + +branchSummaryLogFile=$outputRunDataDir/logs/branch/"$hucNumber"_summary_branch.log + +## INITIALIZE TOTAL TIME TIMER ## +T_total_start +huc_start_time=`date +%s` + +## SET VARIABLES AND FILE INPUTS ## +hucUnitLength=${#hucNumber} +huc4Identifier=${hucNumber:0:4} +huc2Identifier=${hucNumber:0:2} +input_NHD_WBHD_layer=WBDHU$hucUnitLength + +# Define the landsea water body mask using either Great Lakes or Ocean polygon input # +if [[ $huc2Identifier == "04" ]] ; then + input_LANDSEA=$input_GL_boundaries + #echo -e "Using $input_LANDSEA for water body mask (Great Lakes)" +else + input_LANDSEA=$inputDataDir/landsea/water_polygons_us.gpkg +fi + +## GET WBD ## +echo -e $startDiv"Get WBD $hucNumber" +date -u +Tstart +ogr2ogr -f GPKG -t_srs $DEFAULT_FIM_PROJECTION_CRS $outputHucDataDir/wbd.gpkg $input_WBD_gdb $input_NHD_WBHD_layer -where "HUC$hucUnitLength='$hucNumber'" +Tcount + +## Subset Vector Layers ## +echo -e $startDiv"Get Vector Layers and Subset $hucNumber" +date -u +Tstart + +cmd_args=" -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg" +cmd_args+=" -b $outputHucDataDir/nwm_subset_streams.gpkg" +cmd_args+=" -d $hucNumber" +cmd_args+=" -e $outputHucDataDir/nwm_headwater_points_subset.gpkg" +cmd_args+=" -f $outputHucDataDir/wbd_buffered.gpkg" +cmd_args+=" -g $outputHucDataDir/wbd.gpkg" +cmd_args+=" -i $input_DEM" +cmd_args+=" -j $input_DEM_domain" +cmd_args+=" -l $input_nwm_lakes" +cmd_args+=" -m $input_nwm_catchments" +cmd_args+=" -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg" +cmd_args+=" -r $input_NLD" +cmd_args+=" -rp $input_levees_preprocessed" +cmd_args+=" -v $input_LANDSEA" +cmd_args+=" -w $input_nwm_flows" +cmd_args+=" -x $outputHucDataDir/LandSea_subset.gpkg" +cmd_args+=" -y $input_nwm_headwaters" +cmd_args+=" -z $outputHucDataDir/nld_subset_levees.gpkg" +cmd_args+=" -zp $outputHucDataDir/3d_nld_subset_levees_burned.gpkg" +cmd_args+=" -wb $wbd_buffer" +cmd_args+=" -lpf $input_nld_levee_protected_areas" +cmd_args+=" -lps $outputHucDataDir/LeveeProtectedAreas_subset.gpkg" + +#echo "$cmd_args" +python3 $srcDir/clip_vectors_to_wbd.py $cmd_args +Tcount + +## Clip WBD8 ## +echo -e $startDiv"Clip WBD8" +date -u +Tstart +ogr2ogr -f GPKG -t_srs $DEFAULT_FIM_PROJECTION_CRS -clipsrc $outputHucDataDir/wbd_buffered.gpkg $outputHucDataDir/wbd8_clp.gpkg $inputDataDir/wbd/WBD_National.gpkg WBDHU8 +Tcount + +## DERIVE LEVELPATH ## +echo -e $startDiv"Generating Level Paths for $hucNumber" +date -u +Tstart +$srcDir/derive_level_paths.py -i $outputHucDataDir/nwm_subset_streams.gpkg -b $branch_id_attribute -r "ID" -o $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -e $outputHucDataDir/nwm_headwaters.gpkg -c $outputHucDataDir/nwm_catchments_proj_subset.gpkg -t $outputHucDataDir/nwm_catchments_proj_subset_levelPaths.gpkg -n $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg -w $outputHucDataDir/nwm_lakes_proj_subset.gpkg + +# test if we received a non-zero code back from derive_level_paths.py +subscript_exit_code=$? +# we have to retrow it if it is not a zero (but it will stop further execution in this script) +if [ $subscript_exit_code -ne 0 ]; then exit $subscript_exit_code; fi +Tcount + +## ASSOCIATE LEVEL PATHS WITH LEVEES +echo -e $startDiv"Associate level paths with levees" +date -u +Tstart +[ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ +python3 $srcDir/associate_levelpaths_with_levees.py -nld $outputHucDataDir/nld_subset_levees.gpkg -s $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -lpa $outputHucDataDir/LeveeProtectedAreas_subset.gpkg -out $outputHucDataDir/levee_levelpaths.csv -w $levee_buffer -b $branch_id_attribute -l $levee_id_attribute +Tcount + +## STREAM BRANCH POLYGONS +echo -e $startDiv"Generating Stream Branch Polygons for $hucNumber" +date -u +Tstart +$srcDir/buffer_stream_branches.py -a $input_DEM_domain -s $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -i $branch_id_attribute -d $branch_buffer_distance_meters -b $outputHucDataDir/branch_polygons.gpkg +Tcount + +## CREATE BRANCHID LIST FILE +echo -e $startDiv"Create list file of branch ids for $hucNumber" +date -u +Tstart +$srcDir/generate_branch_list.py -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute -o $branch_list_lst_file +Tcount + +## CREATE BRANCH ZERO ## +echo -e $startDiv"Creating branch zero for $hucNumber" +outputCurrentBranchDataDir=$outputBranchDataDir/$branch_zero_id + +## MAKE OUTPUT BRANCH DIRECTORY +mkdir -p $outputCurrentBranchDataDir + +## CLIP RASTERS +echo -e $startDiv"Clipping rasters to branches $hucNumber $branch_zero_id" +# Note: don't need to use gdalwarp -cblend as we are using a buffered wbd +date -u +Tstart +[ ! -f $outputCurrentBranchDataDir/dem_meters.tif ] && \ +gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Float32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -t_srs $DEFAULT_FIM_PROJECTION_CRS $input_DEM $outputHucDataDir/dem_meters.tif +Tcount + +## GET RASTER METADATA +echo -e $startDiv"Get DEM Metadata $hucNumber $branch_zero_id" +date -u +Tstart +read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/dem_meters.tif) + +## RASTERIZE NLD MULTILINES ## +echo -e $startDiv"Rasterize all NLD multilines using zelev vertices $hucNumber $branch_zero_id" +date -u +Tstart +# REMAINS UNTESTED FOR AREAS WITH LEVEES +[ -f $outputHucDataDir/3d_nld_subset_levees_burned.gpkg ] && \ +gdal_rasterize -l 3d_nld_subset_levees_burned -3d -at -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/3d_nld_subset_levees_burned.gpkg $outputCurrentBranchDataDir/nld_rasterized_elev_$branch_zero_id.tif +Tcount + +## BURN LEVEES INTO DEM ## +echo -e $startDiv"Burn nld levees into dem & convert nld elev to meters (*Overwrite dem_meters.tif output) $hucNumber $branch_zero_id" +date -u +Tstart +# REMAINS UNTESTED FOR AREAS WITH LEVEES +[ -f $outputCurrentBranchDataDir/nld_subset_levees.tif ] && \ +python3 -m memory_profiler $srcDir/burn_in_levees.py -dem $outputHucDataDir/dem_meters.tif -nld $outputCurrentBranchDataDir/nld_rasterized_elev_$branch_zero_id.tif -out $outputHucDataDir/dem_meters.tif +Tcount + +## RASTERIZE REACH BOOLEAN (1 & 0) ## +echo -e $startDiv"Rasterize Reach Boolean $hucNumber $branch_zero_id" +date -u +Tstart +gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/nwm_subset_streams.gpkg $outputCurrentBranchDataDir/flows_grid_boolean_$branch_zero_id.tif +Tcount + +## RASTERIZE NWM Levelpath HEADWATERS (1 & 0) ## +echo -e $startDiv"Rasterize NWM Headwaters $hucNumber $branch_zero_id" +date -u +Tstart +gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/nwm_headwater_points_subset.gpkg $outputCurrentBranchDataDir/headwaters_$branch_zero_id.tif +Tcount + +## DEM Reconditioning ## +# Using AGREE methodology, hydroenforce the DEM so that it is consistent with the supplied stream network. +# This allows for more realistic catchment delineation which is ultimately reflected in the output FIM mapping. +echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer $hucNumber $branch_zero_id" +date -u +Tstart +python3 -m memory_profiler $srcDir/agreedem.py -r $outputCurrentBranchDataDir/flows_grid_boolean_$branch_zero_id.tif -d $outputHucDataDir/dem_meters.tif -w $outputCurrentBranchDataDir -o $outputCurrentBranchDataDir/dem_burned_$branch_zero_id.tif -b $agree_DEM_buffer -sm 10 -sh 1000 +Tcount + +## PIT REMOVE BURNED DEM ## +echo -e $startDiv"Pit remove Burned DEM $hucNumber $branch_zero_id" +date -u +Tstart +rd_depression_filling $outputCurrentBranchDataDir/dem_burned_$branch_zero_id.tif $outputCurrentBranchDataDir/dem_burned_filled_$branch_zero_id.tif +Tcount + +## D8 FLOW DIR ## +echo -e $startDiv"D8 Flow Directions on Burned DEM $hucNumber $branch_zero_id" +date -u +Tstart +mpiexec -n $ncores_fd $taudemDir2/d8flowdir -fel $outputCurrentBranchDataDir/dem_burned_filled_$branch_zero_id.tif -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$branch_zero_id.tif +Tcount + +## MAKE A COPY OF THE DEM FOR BRANCH 0 +echo -e $startDiv"Copying DEM to Branch 0" +date -u +Tstart +cp $outputHucDataDir/dem_meters.tif $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif +Tcount + +## PRODUCE THE REM AND OTHER HAND FILE OUTPUTS ## +export hucNumber=$hucNumber +export current_branch_id=$current_branch_id +export outputCurrentBranchDataDir=$outputCurrentBranchDataDir +export outputHucDataDir=$outputHucDataDir +export ndv=$ndv +export xmin=$xmin +export ymin=$ymin +export xmax=$xmax +export ymax=$ymax +export ncols=$ncols +export nrows=$nrows + +## PRODUCE BRANCH ZERO HAND +$srcDir/delineate_hydros_and_produce_HAND.sh "unit" + +## CREATE USGS GAGES FILE +if [ -f $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg ]; then + echo -e $startDiv"Assigning USGS gages to branches for $hucNumber" + date -u + Tstart + python3 -m memory_profiler $srcDir/usgs_gage_unit_setup.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -nwm $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -o $outputHucDataDir/usgs_subset_gages.gpkg -huc $hucNumber -ahps $inputDataDir/ahps_sites/nws_lid.gpkg -bzero_id $branch_zero_id + Tcount +fi + +## USGS CROSSWALK ## +if [ -f $outputHucDataDir/usgs_subset_gages_$branch_zero_id.gpkg ]; then + echo -e $startDiv"USGS Crosswalk $hucNumber $branch_zero_id" + date -u + Tstart + python3 $srcDir/usgs_gage_crosswalk.py -gages $outputHucDataDir/usgs_subset_gages_$branch_zero_id.gpkg -flows $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$branch_zero_id.gpkg -cat $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$branch_zero_id.gpkg -dem $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif -dem_adj $outputCurrentBranchDataDir/dem_thalwegCond_$branch_zero_id.tif -outtable $outputCurrentBranchDataDir/usgs_elev_table.csv -b $branch_zero_id + Tcount +fi + +## CLEANUP BRANCH ZERO OUTPUTS ## +echo -e $startDiv"Cleaning up outputs in branch zero $hucNumber" +$srcDir/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $deny_branch_zero_list -b $branch_zero_id + + +# ------------------- +## Start the local csv branch list +$srcDir/generate_branch_list_csv.py -o $branch_list_csv_file -u $hucNumber -b $branch_zero_id + +# ------------------- +## Processing Branches ## +echo +echo "---- Start of branch processing for $hucNumber" +branch_processing_start_time=`date +%s` + +if [ -f $branch_list_lst_file ]; then + # There may not be a branch_ids.lst if there were no level paths (no stream orders 3+) + # but there will still be a branch zero + parallel --eta --timeout $branch_timeout -j $jobBranchLimit --joblog $branchSummaryLogFile --colsep ',' -- $srcDir/process_branch.sh $runName $hucNumber :::: $branch_list_lst_file +else + echo "No level paths exist with this HUC. Processing branch zero only." +fi + +## REMOVE FILES FROM DENY LIST ## +if [ -f $deny_unit_list ]; then + echo -e $startDiv"Remove files $hucNumber" + date -u + Tstart + $srcDir/outputs_cleanup.py -d $outputHucDataDir -l $deny_unit_list -b $hucNumber + Tcount +fi + +# ------------------- +## REMOVE FILES FROM DENY LIST FOR BRANCH ZERO (but using normal branch deny) ## +if [ "$has_deny_branch_zero_override" == "1" ] +then + echo -e $startDiv"Second cleanup of files for branch zero (none default)" + $srcDir/outputs_cleanup.py -d $outputHucDataDir -l $deny_branch_zero_list -b 0 + +else + echo -e $startDiv"Second cleanup of files for branch zero using the default branch deny list" + $srcDir/outputs_cleanup.py -d $outputHucDataDir -l $deny_branches_list -b 0 +fi + +echo "---- HUC $hucNumber - branches have now been processed" +Calc_Duration $branch_processing_start_time +echo + +date -u +echo "---- HUC processing for $hucNumber is complete" +Calc_Duration $huc_start_time +echo + + diff --git a/src/split_flows.py b/src/split_flows.py index 4dff74fe0..100be57b1 100755 --- a/src/split_flows.py +++ b/src/split_flows.py @@ -9,34 +9,48 @@ 5) create points layer with segment verticies encoded with HydroID's (used for catchment delineation in next step) ''' -import sys +import argparse +import build_stream_traversal import geopandas as gpd +import numpy as np +import os import pandas as pd -from shapely.geometry import Point, LineString, MultiPoint import rasterio -import numpy as np -import argparse -from tqdm import tqdm +import sys import time -from os.path import isfile -from os import remove,environ + from collections import OrderedDict -import build_stream_traversal +from os import remove,environ,path +from os.path import isfile,split,dirname +from shapely import ops, wkt +from shapely.geometry import Point, LineString, MultiPoint +from shapely.ops import split as shapely_ops_split +from tqdm import tqdm from utils.shared_functions import getDriver, mem_profile from utils.shared_variables import FIM_ID +from utils.fim_enums import FIM_exit_codes @mem_profile -def split_flows(max_length, slope_min, lakes_buffer_input, flows_filename, dem_filename, split_flows_filename, split_points_filename, wbd8_clp_filename, lakes_filename): - wbd = gpd.read_file(wbd8_clp_filename) +def split_flows(max_length, + slope_min, + lakes_buffer_input, + flows_filename, + dem_filename, + split_flows_filename, + split_points_filename, + wbd8_clp_filename, + lakes_filename, + nwm_streams_filename): toMetersConversion = 1e-3 print('Loading data ...') flows = gpd.read_file(flows_filename) - if not len(flows) > 0: - print ("No relevant streams within HUC boundaries.") - sys.exit(0) + if (len(flows) == 0): + # this is not an exception, but a custom exit code that can be trapped + print("No relevant streams within HUC boundaries.") + sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back wbd8 = gpd.read_file(wbd8_clp_filename) dem = rasterio.open(dem_filename,'r') @@ -57,12 +71,52 @@ def split_flows(max_length, slope_min, lakes_buffer_input, flows_filename, dem_f slopes = [] hydro_id = 'HydroID' + # If loop addressing: https://github.com/NOAA-OWP/inundation-mapping/issues/560 + # if we are processing branch 0, skip this step + if (os.path.split(os.path.dirname(nwm_streams_filename))[1] != '0'): + print ('trimming DEM stream to NWM branch terminus') + # read in nwm lines, explode to ensure linestrings are the only geometry + levelpath_lines = gpd.read_file(nwm_streams_filename).explode() + + # Dissolve the linestring (how much faith should I hold that these are digitized with flow?) + linestring_geo = levelpath_lines.iloc[0]['geometry'] + if (len(levelpath_lines) > 1): + linestring_geo = ops.linemerge(levelpath_lines.dissolve(by='levpa_id').iloc[0]['geometry']) + + # Identify the end vertex (most downstream, should be last), transform into geodataframe + terminal_nwm_point = [] + first, last = linestring_geo.boundary + terminal_nwm_point.append({'ID':'teminal','geometry':last}) + snapped_point = gpd.GeoDataFrame(terminal_nwm_point).set_crs(levelpath_lines.crs) + + # Snap to DEM flows + snapped_point['geometry'] = snapped_point.apply(lambda row: flows.interpolate(flows.project( row.geometry)), axis=1) + + # Trim flows to snapped point + # buffer here because python precision issues, print(demDerived_reaches.distance(snapped_point) < 1e-8) + trimmed_line = shapely_ops_split(flows.iloc[0]['geometry'], snapped_point.iloc[0]['geometry'].buffer(1)) + # Edge cases: line string not split?, nothing is returned, split does not preserve linestring order? + # Note to dear reader: last here is really the most upstream segmennt (see crevats above). When we split we should get 3 segments, the most downstream one + # the tiny 1 meter segment that falls within the snapped point buffer, and the most upstream one. We want that last one which is why we trimmed_line[len(trimmed_line)-1] + last_line_segment = pd.DataFrame({'id':['first'],'geometry':[trimmed_line[len(trimmed_line)-1].wkt]}) + last_line_segment['geometry'] = last_line_segment['geometry'].apply(wkt.loads) # can be last_line_segment = gpd.GeoSeries.from_wkt(last_line_segment) when we update geopandas verisons + last_line_segment_geodataframe = gpd.GeoDataFrame(last_line_segment).set_crs(flows.crs) + + # replace geometry in merged flowine + flows['geometry'] = last_line_segment_geodataframe.iloc[0]['geometry'] + # split at HUC8 boundaries print ('splitting stream segments at HUC8 boundaries') flows = gpd.overlay(flows, wbd8, how='union').explode().reset_index(drop=True) + flows = flows[~flows.is_empty] + + if (len(flows) == 0): + # this is not an exception, but a custom exit code that can be trapped + print("No relevant streams within HUC boundaries.") + sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back # check for lake features - if lakes is not None: + if lakes is not None and len(flows) > 0 : if len(lakes) > 0: print ('splitting stream segments at ' + str(len(lakes)) + ' waterbodies') #create splits at lake boundaries @@ -77,6 +131,11 @@ def split_flows(max_length, slope_min, lakes_buffer_input, flows_filename, dem_f # remove empty geometries flows = flows.loc[~flows.is_empty,:] + if (len(flows) == 0): + # this is not an exception, but a custom exit code that can be trapped + print("No relevant streams within HUC boundaries.") + sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back + for i,lineString in tqdm(enumerate(flows.geometry),total=len(flows.geometry)): # Reverse geometry order (necessary for BurnLines) lineString = LineString(lineString.coords[::-1]) @@ -199,10 +258,18 @@ def split_flows(max_length, slope_min, lakes_buffer_input, flows_filename, dem_f if isfile(split_flows_filename): remove(split_flows_filename) - split_flows_gdf.to_file(split_flows_filename,driver=getDriver(split_flows_filename),index=False) - if isfile(split_points_filename): remove(split_points_filename) + + if (len(split_flows_gdf) == 0): + # this is not an exception, but a custom exit code that can be trapped + print("There are no flowlines after stream order filtering.") + sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back + + split_flows_gdf.to_file(split_flows_filename,driver=getDriver(split_flows_filename),index=False) + + if len(split_points_gdf) == 0: + raise Exception("No points exist.") split_points_gdf.to_file(split_points_filename,driver=getDriver(split_points_filename),index=False) @@ -210,7 +277,7 @@ def split_flows(max_length, slope_min, lakes_buffer_input, flows_filename, dem_f max_length = float(environ['max_split_distance_meters']) slope_min = float(environ['slope_min']) lakes_buffer_input = float(environ['lakes_buffer_dist_meters']) - + # Parse arguments. parser = argparse.ArgumentParser(description='splitflows.py') parser.add_argument('-f', '--flows-filename', help='flows-filename',required=True) @@ -219,6 +286,7 @@ def split_flows(max_length, slope_min, lakes_buffer_input, flows_filename, dem_f parser.add_argument('-p', '--split-points-filename', help='split-points-filename',required=True) parser.add_argument('-w', '--wbd8-clp-filename', help='wbd8-clp-filename',required=True) parser.add_argument('-l', '--lakes-filename', help='lakes-filename',required=True) + parser.add_argument('-n', '--nwm-streams-filename', help='nwm-streams-filename',required=True) # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) diff --git a/src/src_adjust_spatial_obs.py b/src/src_adjust_spatial_obs.py new file mode 100644 index 000000000..1f2d08f1d --- /dev/null +++ b/src/src_adjust_spatial_obs.py @@ -0,0 +1,414 @@ +#!/usr/bin/env python3 + +import argparse +import datetime as dt +import geopandas as gpd +#import json +import multiprocessing +import numpy as np +import os +import pandas as pd +import psycopg2 # python package for connecting to postgres +import rasterio +import sys +import time + +from collections import deque +from dotenv import load_dotenv +from geopandas.tools import sjoin +from multiprocessing import Pool +from src_roughness_optimization import update_rating_curve +from utils.shared_variables import DOWNSTREAM_THRESHOLD, ROUGHNESS_MIN_THRESH, ROUGHNESS_MAX_THRESH, DEFAULT_FIM_PROJECTION_CRS + +#import variables from .env file +load_dotenv() +CALIBRATION_DB_HOST = os.getenv("CALIBRATION_DB_HOST") +CALIBRATION_DB_NAME = os.getenv("CALIBRATION_DB_NAME") +CALIBRATION_DB_USER_NAME = os.getenv("CALIBRATION_DB_USER_NAME") +CALIBRATION_DB_PASS = os.getenv("CALIBRATION_DB_PASS") + +''' +The script imports a PostgreSQL database containing observed FIM extent points and associated flow data. This script attributes the point data with its hydroid and HAND values before passing a dataframe to the src_roughness_optimization.py workflow. + +Processing +- Define CRS to use for initial geoprocessing and read wbd_path and points_layer. +- Define paths to hydroTable.csv, HAND raster, catchments raster, and synthetic rating curve JSON. +- Clip the points water_edge_df to the huc cathments polygons (for faster processing?) +- Define coords variable to be used in point raster value attribution and use point geometry to determine catchment raster pixel values +- Check that there are valid obs in the water_edge_df (not empty) and convert pandas series to dataframe to pass to update_rating_curve +- Call update_rating_curve() to perform the rating curve calibration. + +Inputs +- points_layer: .gpkg layer containing observed/truth FIM extent points and associated flow value +- fim_directory: fim directory containing individual HUC output dirs +- wbd_path: path the watershed boundary dataset layer (HUC polygon boundaries) +- job_number: number of multi-processing jobs to use +- debug_outputs_option: optional flag to output intermediate files for reviewing/debugging + +Outputs +- water_edge_median_df: dataframe containing "hydroid", "flow", "submitter", "coll_time", "flow_unit", "layer", and median "HAND" value +''' + +def process_points(args): + + ''' + The function ingests geodataframe and attributes the point data with its hydroid and HAND values before passing a dataframe to the src_roughness_optimization.py workflow + + Processing + - Extract x,y coordinates from geometry + - Projects the point data to matching CRS for HAND and hydroid rasters + - Samples the hydroid and HAND raster values for each point and stores the values in dataframe + - Calculates the median HAND value for all points by hydroid + ''' + + branch_dir = args[0] + huc = args[1] + branch_id = args[2] + hand_path = args[3] + catchments_path = args[4] + catchments_poly_path = args[5] + water_edge_df = args[6] + htable_path = args[7] + optional_outputs = args[8] + + ## Define coords variable to be used in point raster value attribution. + coords = [(x,y) for x, y in zip(water_edge_df.X, water_edge_df.Y)] + + water_edge_df.to_crs(DEFAULT_FIM_PROJECTION_CRS) + + ## Use point geometry to determine HAND raster pixel values. + with rasterio.open(hand_path) as hand_src, rasterio.open(catchments_path) as catchments_src: + water_edge_df['hand'] = [h[0] for h in hand_src.sample(coords)] + water_edge_df['hydroid'] = [c[0] for c in catchments_src.sample(coords)] + + water_edge_df = water_edge_df[(water_edge_df['hydroid'].notnull()) & (water_edge_df['hand'] > 0) & (water_edge_df['hydroid'] > 0)] + + ## Check that there are valid obs in the water_edge_df (not empty) + if water_edge_df.empty: + log_text = 'NOTE --> skipping HUC: ' + str(huc) + ' Branch: ' + str(branch_id)\ + + ': no valid observation points found within the branch catchments' + else: + ## Intermediate output for debugging + if optional_outputs: + branch_debug_pts_out_gpkg = os.path.join(branch_dir, 'export_water_edge_df_' + + branch_id + '.gpkg') + water_edge_df.to_file(branch_debug_pts_out_gpkg, driver='GPKG', index=False) + + #print('Processing points for HUC: ' + str(huc) + ' Branch: ' + str(branch_id)) + ## Get median HAND value for appropriate groups. + water_edge_median_ds = water_edge_df.groupby(["hydroid", "flow", "submitter", "coll_time", "flow_unit","layer"])['hand'].median() + + ## Write user_supplied_n_vals to CSV for next step. + pt_n_values_csv = os.path.join(branch_dir, 'user_supplied_n_vals_' + branch_id + '.csv') + water_edge_median_ds.to_csv(pt_n_values_csv) + ## Convert pandas series to dataframe to pass to update_rating_curve + water_edge_median_df = water_edge_median_ds.reset_index() + water_edge_median_df['coll_time'] = water_edge_median_df.coll_time.astype(str) + del water_edge_median_ds + + ## Additional arguments for src_roughness_optimization + source_tag = 'point_obs' # tag to use in source attribute field + merge_prev_adj = True # merge in previous SRC adjustment calculations + + ## Call update_rating_curve() to perform the rating curve calibration. + log_text = update_rating_curve(branch_dir, water_edge_median_df, htable_path, huc, + branch_id, catchments_poly_path, optional_outputs, + source_tag, merge_prev_adj, DOWNSTREAM_THRESHOLD) + ## Still testing: use code below to print out any exceptions. + ''' + try: + log_text = update_rating_curve(branch_dir, water_edge_median_df, htable_path, huc, catchments_poly_path, optional_outputs, source_tag, merge_prev_adj, DOWNSTREAM_THRESHOLD) + except Exception as e: + print(str(huc) + ' --> ' + str(e)) + log_text = 'ERROR!!!: HUC ' + str(huc) + ' --> ' + str(e) + ''' + return(log_text) + + +def find_points_in_huc(huc_id, conn): + # Point data in the database is already attributed with HUC8 id + ''' + The function queries the PostgreSQL database and returns all points attributed with the input huc id. + + Processing + - Query the PostgreSQL database for points attributed with huc id. + - Reads the filtered database result into a pandas geodataframe + + Inputs + - conn: connection to PostgreSQL db + - huc_id: HUC id to query the db + + Outputs + - water_edge_df: geodataframe with point data + ''' + + huc_pt_query = """SELECT ST_X(P.geom), ST_Y(P.geom), P.submitter, P.flow, P.coll_time, P.flow_unit, P.layer, P.geom + FROM points P + JOIN hucs H ON ST_Contains(H.geom, P.geom) + WHERE H.huc8 = %s """ + + # Use EPSG:5070 instead of the default ESRI:102039 (gdal pyproj throws an error with crs 102039) + # Appears that EPSG:5070 is functionally equivalent to ESRI:102039: https://gis.stackexchange.com/questions/329123/crs-interpretation-in-qgis + water_edge_df = gpd.GeoDataFrame.from_postgis(huc_pt_query, con=conn, + params=[huc_id], crs=DEFAULT_FIM_PROJECTION_CRS, + parse_dates=['coll_time']) + water_edge_df = water_edge_df.drop(columns=['st_x','st_y']) + + return water_edge_df + +def find_hucs_with_points(conn, fim_out_huc_list): + ''' + The function queries the PostgreSQL database and returns a list of all the HUCs that contain calb point data. + + Processing + - Query the PostgreSQL database for all unique huc ids + + Inputs + - conn: connection to PostgreSQL db + + Outputs + - hucs_wpoints: list with all unique huc ids + ''' + + cursor = conn.cursor() + ''' + cursor.execute(""" + SELECT DISTINCT H.huc8 + FROM points P JOIN hucs H ON ST_Contains(H.geom, P.geom); + """) + ''' + cursor.execute("SELECT DISTINCT H.huc8 FROM points P JOIN hucs H ON ST_Contains(H.geom, P.geom) WHERE H.huc8 = ANY(%s);", (fim_out_huc_list,)) + hucs_fetch = cursor.fetchall() # list with tuple with the attributes defined above (need to convert to df?) + hucs_wpoints = [] + for huc in hucs_fetch: + hucs_wpoints.append(huc[0]) + cursor.close() + return hucs_wpoints + +def ingest_points_layer(fim_directory, job_number, debug_outputs_option, log_file): + ''' + The function obtains all points within a given huc, locates the corresponding FIM output files for each huc (confirms all necessary files exist), and then passes a proc list of huc organized data to process_points function. + + Processing + - Query the PostgreSQL database for all unique huc ids that have calb points + - Loop through all HUCs with calb data and locate necessary fim output files to pass to calb workflow + + Inputs + - fim_directory: parent directory of fim ouputs (contains HUC directories) + - job_number: number of multiprocessing jobs to use for processing hucs + - debug_outputs_option: optional flag to output intermediate files + + Outputs + - procs_list: passes multiprocessing list of input args for process_points function input + ''' + + log_file.write('Connecting to database via host\n') + conn = connect() # Connect to the PostgreSQL db once + + if (conn is None): + msg = "unable to connect to calibration db\n" + print(msg) + log_file.write(msg) + return + + log_file.write('Connected to database via host\n') + print("Finding all fim_output hucs that contain calibration points...") + fim_out_huc_list = [ item for item in os.listdir(fim_directory) if os.path.isdir(os.path.join(fim_directory, item)) ] + + fim_out_huc_list.remove('logs') + ## Record run time and close log file + run_time_start = dt.datetime.now() + log_file.write('Finding all hucs that contain calibration points...' + '\n') + huc_list_db = find_hucs_with_points(conn, fim_out_huc_list) + run_time_end = dt.datetime.now() + task_run_time = run_time_end - run_time_start + log_file.write('HUC SEARCH TASK RUN TIME: ' + str(task_run_time) + '\n') + print(f"{len(huc_list_db)} hucs found in point database" + '\n') + log_file.write(f"{len(huc_list_db)} hucs found in point database" + '\n') + log_file.write('#########################################################\n') + + ## Ensure HUC id is either HUC8 + huc_list = [] + for huc in huc_list_db: + ## zfill to the appropriate scale to ensure leading zeros are present, if necessary. + if len(huc) == 7: + huc = huc.zfill(8) + if huc not in huc_list: + huc_list.append(huc) + log_file.write(str(huc) + '\n') + + procs_list = [] # Initialize proc list for mulitprocessing. + + #huc_list = ['12040103'] + ## Define paths to relevant HUC HAND data. + huc_list.sort() # sort huc_list for helping track progress in future print statments + for huc in huc_list: + huc_branches_dir = os.path.join(fim_directory, huc,'branches') + water_edge_df = find_points_in_huc(huc, conn).reset_index() + print(f"{len(water_edge_df)} points found in " + str(huc)) + log_file.write(f"{len(water_edge_df)} points found in " + str(huc) + '\n') + + ## Create X and Y location columns by extracting from geometry. + water_edge_df['X'] = water_edge_df['geom'].x + water_edge_df['Y'] = water_edge_df['geom'].y + + ## Check to make sure the HUC directory exists in the current fim_directory + if not os.path.exists(os.path.join(fim_directory, huc)): + log_file.write("FIM Directory for huc: " + str(huc) + " does not exist --> skipping SRC adjustments for this HUC (obs points found)\n") + + ## Intermediate output for debugging + if debug_outputs_option: + huc_debug_pts_out = os.path.join(fim_directory, huc, 'debug_water_edge_df_' + huc + '.csv') + water_edge_df.to_csv(huc_debug_pts_out) + huc_debug_pts_out_gpkg = os.path.join(fim_directory, huc, 'export_water_edge_df_' + huc + '.gpkg') + water_edge_df.to_file(huc_debug_pts_out_gpkg, driver='GPKG', index=False) + + for branch_id in os.listdir(huc_branches_dir): + branch_dir = os.path.join(huc_branches_dir,branch_id) + ## Define paths to HAND raster, catchments raster, and synthetic rating curve JSON. + hand_path = os.path.join(branch_dir, 'rem_zeroed_masked_' + branch_id + '.tif') + catchments_path = os.path.join(branch_dir, 'gw_catchments_reaches_filtered_addedAttributes_' + branch_id + '.tif') + htable_path = os.path.join(branch_dir, 'hydroTable_' + branch_id + '.csv') + catchments_poly_path = os.path.join(branch_dir, + 'gw_catchments_reaches_filtered_addedAttributes_crosswalked_' + branch_id + '.gpkg') + + # Check to make sure the fim output files exist. Continue to next iteration if not and warn user. + if not os.path.exists(hand_path): + print("WARNING: HAND grid does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id)) + log_file.write("WARNING: HAND grid does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id) + '\n') + elif not os.path.exists(catchments_path): + print("WARNING: Catchments grid does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id)) + log_file.write("WARNING: Catchments grid does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id) + '\n') + elif not os.path.exists(htable_path): + print("WARNING: hydroTable does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id)) + log_file.write("WARNING: hydroTable does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id) + '\n') + else: + procs_list.append([branch_dir, huc, branch_id, hand_path, catchments_path, catchments_poly_path, water_edge_df, htable_path, debug_outputs_option]) + + with Pool(processes=job_number) as pool: + log_output = pool.map(process_points, procs_list) + log_file.writelines(["%s\n" % item for item in log_output]) + + log_file.write('#########################################################\n') + disconnect(conn) # move this to happen at the end of the huc looping + +def connect(): + """ Connect to the PostgreSQL database server """ + + print('Connecting to the PostgreSQL database...') + conn = None + not_connected = True + fail_ctr = 0 + while not_connected and fail_ctr < 6: + try: + + # connect to the PostgreSQL server + conn = psycopg2.connect( + host=CALIBRATION_DB_HOST, + database=CALIBRATION_DB_NAME, + user=CALIBRATION_DB_USER_NAME, + password=CALIBRATION_DB_PASS) + + # create a cursor + cur = conn.cursor() + + # execute a statement + print('Host name: ' + CALIBRATION_DB_HOST) + print('PostgreSQL database version:') + cur.execute('SELECT version()') + + # display the PostgreSQL database server version + db_version = cur.fetchone() + print(db_version) + + # close the communication with the PostgreSQL + cur.close() + not_connected = False + print("Connected to database\n\n") + + except (Exception, psycopg2.DatabaseError) as error: + print("Waiting for database to come online") + fail_ctr += 1 + time.sleep(5) + + return conn + +def disconnect(conn): + """ Disconnect from the PostgreSQL database server """ + + if conn is not None: + conn.close() + print('Database connection closed.') + +def run_prep(fim_directory, debug_outputs_option, ds_thresh_override, DOWNSTREAM_THRESHOLD, job_number): + + assert os.path.isdir(fim_directory), 'ERROR: could not find the input fim_dir location: '\ + + str(fim_directory) + + available_cores = multiprocessing.cpu_count() + if job_number > available_cores: + job_number = available_cores - 1 + print("Provided job number exceeds the number of available cores. " \ + + str(job_number) + " max jobs will be used instead.") + + if ds_thresh_override != DOWNSTREAM_THRESHOLD: + print('ALERT!! - Using a downstream distance threshold value (' + + str(float(ds_thresh_override)) + 'km) different than the default ('\ + + str(DOWNSTREAM_THRESHOLD) + 'km) - interpret results accordingly') + DOWNSTREAM_THRESHOLD = float(ds_thresh_override) + + ## Create output dir for log file + output_dir = os.path.join(fim_directory, "logs", "src_optimization") + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + + ## Create a time var to log run time + begin_time = dt.datetime.now() + + ## Create log file for processing records + print('This may take a few minutes...') + sys.__stdout__ = sys.stdout + log_file = open(os.path.join(output_dir,'log_spatial_src_adjust.log'),"w") + log_file.write('#########################################################\n') + log_file.write('Parameter Values:\n' + 'DOWNSTREAM_THRESHOLD = ' + str(DOWNSTREAM_THRESHOLD)\ + + '\n' + 'ROUGHNESS_MIN_THRESH = ' + str( ROUGHNESS_MIN_THRESH) + '\n'\ + + 'ROUGHNESS_MAX_THRESH = ' + str(ROUGHNESS_MAX_THRESH) + '\n') + log_file.write('#########################################################\n\n') + log_file.write('START TIME: ' + str(begin_time) + '\n') + + ingest_points_layer(fim_directory, job_number, debug_outputs_option, log_file) + + ## Record run time and close log file + end_time = dt.datetime.now() + log_file.write('END TIME: ' + str(end_time) + '\n') + tot_run_time = end_time - begin_time + log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) + sys.stdout = sys.__stdout__ + log_file.close() + +if __name__ == '__main__': + ## Parse arguments. + parser = argparse.ArgumentParser(description='Adjusts rating curve given a shapefile containing points of known water boundary.') + #parser.add_argument('-db','--points-layer',help='Path to points layer containing known water boundary locations',required=True) + parser.add_argument('-fim_dir','--fim-directory', + help='Parent directory of FIM-required datasets.', required=True) + parser.add_argument('-debug','--extra-outputs', + help='OPTIONAL flag: Use this to keep intermediate output files for debugging/testing', + default=False, required=False, action='store_true') + parser.add_argument('-dthresh','--downstream-thresh', + help='OPTIONAL Override: distance in km to propogate modified roughness values downstream', + default=DOWNSTREAM_THRESHOLD, required=False) + parser.add_argument('-j','--job-number', + help='OPTIONAL: Number of jobs to use', required=False, + default=2) + + ## Assign variables from arguments. + args = vars(parser.parse_args()) + #points_layer = args['points_layer'] + fim_directory = args['fim_directory'] + debug_outputs_option = args['extra_outputs'] + ds_thresh_override = args['downstream_thresh'] + job_number = int(args['job_number']) + + run_prep(fim_directory, debug_outputs_option, ds_thresh_override, + DOWNSTREAM_THRESHOLD, job_number) diff --git a/src/src_adjust_usgs_rating.py b/src/src_adjust_usgs_rating.py new file mode 100644 index 000000000..1db6f3eec --- /dev/null +++ b/src/src_adjust_usgs_rating.py @@ -0,0 +1,269 @@ +import argparse +import os +import pandas as pd +import sys +import json +import datetime as dt +from pathlib import Path +from collections import deque +import multiprocessing +from multiprocessing import Pool +from utils.shared_functions import check_file_age, concat_huc_csv +from src_roughness_optimization import update_rating_curve +''' +The script ingests a USGS rating curve csv and a NWM flow recurrence interval database. The gage location will be associated to the corresponding hydroID and attributed with the HAND elevation value + +Processing +- Read in USGS rating curve from csv and convert WSE navd88 values to meters +- Read in the aggregate USGS elev table csv from the HUC fim directory (output from usgs_gage_crosswalk.py) +- Filter null entries and convert usgs flow from cfs to cms +- Calculate HAND elevation value for each gage location (NAVD88 elevation - NHD DEM thalweg elevation) +- Read in the NWM recurr csv file and convert flow to cfs +- Calculate the closest SRC discharge value to the NWM flow value +- Create dataframe with crosswalked USGS flow and NWM recurr flow and assign metadata attributes +- Calculate flow difference (variance) to check for large discrepancies btw NWM flow and USGS closest flow +- Log any signifant differences (or negative HAND values) btw the NWM flow value and closest USGS rating flow +- Produce log file +- Call update_rating_curve() to perform the rating curve calibration. + +Inputs +- branch_dir: fim directory containing individual HUC output dirs +- usgs_rc_filepath: USGS rating curve database (produced by rating_curve_get_usgs_curves.py) +- nwm_recurr_filepath: NWM flow recurrence interval dataset +- debug_outputs_option: optional flag to output intermediate files for reviewing/debugging +- job_number: number of multi-processing jobs to use + +Outputs +- water_edge_median_ds: dataframe containing 'location_id','hydroid','feature_id','huc','hand','discharge_cms','nwm_recur_flow_cms','nwm_recur','layer' +''' + +def create_usgs_rating_database(usgs_rc_filepath, usgs_elev_df, nwm_recurr_filepath, log_dir): + start_time = dt.datetime.now() + print('Reading USGS rating curve from csv...') + log_text = 'Processing database for USGS flow/WSE at NWM flow recur intervals...\n' + col_usgs = ["location_id", "flow", "stage", "elevation_navd88"] + usgs_rc_df = pd.read_csv(usgs_rc_filepath, dtype={'location_id': object}, usecols=col_usgs)#, nrows=30000) + print('Duration (read usgs_rc_csv): {}'.format(dt.datetime.now() - start_time)) + + # convert WSE navd88 values to meters + usgs_rc_df['elevation_navd88_m'] = usgs_rc_df['elevation_navd88'] / 3.28084 + + # read in the aggregate USGS elev table csv + start_time = dt.datetime.now() + cross_df = usgs_elev_df[["location_id", "HydroID", "feature_id", "levpa_id", "HUC8", "dem_adj_elevation"]].copy() + cross_df.rename(columns={'dem_adj_elevation':'hand_datum', 'HydroID':'hydroid', 'HUC8':'huc'}, inplace=True) + + # filter null location_id rows from cross_df (removes ahps lide entries that aren't associated with USGS gage) + cross_df = cross_df[cross_df.location_id.notnull()] + + # convert usgs flow from cfs to cms + usgs_rc_df['discharge_cms'] = usgs_rc_df.flow / 35.3147 + usgs_rc_df = usgs_rc_df.drop(columns=["flow"]) + + # merge usgs ratings with crosswalk attributes + usgs_rc_df = usgs_rc_df.merge(cross_df, how='left', on='location_id') + usgs_rc_df = usgs_rc_df[usgs_rc_df['hydroid'].notna()] + + # calculate hand elevation + usgs_rc_df['hand'] = usgs_rc_df['elevation_navd88_m'] - usgs_rc_df['hand_datum'] + usgs_rc_df = usgs_rc_df[['location_id','feature_id','hydroid','levpa_id','huc','hand','discharge_cms']] + usgs_rc_df['feature_id'] = usgs_rc_df['feature_id'].astype(int) + + # read in the NWM recurr csv file + nwm_recur_df = pd.read_csv(nwm_recurr_filepath, dtype={'feature_id': int}) + nwm_recur_df = nwm_recur_df.drop(columns=["Unnamed: 0"]) + nwm_recur_df.rename(columns={'2_0_year_recurrence_flow_17C':'2_0_year','5_0_year_recurrence_flow_17C':'5_0_year','10_0_year_recurrence_flow_17C':'10_0_year','25_0_year_recurrence_flow_17C':'25_0_year','50_0_year_recurrence_flow_17C':'50_0_year','100_0_year_recurrence_flow_17C':'100_0_year'}, inplace=True) + + #convert cfs to cms (x 0.028317) + nwm_recur_df.loc[:, ['2_0_year','5_0_year','10_0_year','25_0_year','50_0_year','100_0_year']] *= 0.028317 + + # merge nwm recurr with usgs_rc + merge_df = usgs_rc_df.merge(nwm_recur_df, how='left', on='feature_id') + + # NWM recurr intervals + recurr_intervals = ("2","5","10","25","50","100") + final_df = pd.DataFrame() # create empty dataframe to append flow interval dataframes + for interval in recurr_intervals: + log_text += ('\n\nProcessing: ' + str(interval) + '-year NWM recurr intervals\n') + print('Processing: ' + str(interval) + '-year NWM recurr intervals') + ## Calculate the closest SRC discharge value to the NWM flow value + merge_df['Q_find'] = (merge_df['discharge_cms'] - merge_df[interval+"_0_year"]).abs() + + ## Check for any missing/null entries in the input SRC + if merge_df['Q_find'].isnull().values.any(): # there may be null values for lake or coastal flow lines (need to set a value to do groupby idxmin below) + log_text += 'HUC: ' + str(merge_df['huc']) + ' : feature_id' + str(merge_df['feature_id']) + ' --> Null values found in "Q_find" calc. These will be filled with 999999 () \n' + ## Fill missing/nan nwm 'Discharge (m3s-1)' values with 999999 to handle later + merge_df['Q_find'] = merge_df['Q_find'].fillna(999999) + if merge_df['hydroid'].isnull().values.any(): + log_text += 'HUC: ' + str(merge_df['huc']) + ' --> Null values found in "hydroid"... \n' + + # Create dataframe with crosswalked USGS flow and NWM recurr flow + calc_df = merge_df.loc[merge_df.groupby(['location_id','levpa_id'])['Q_find'].idxmin()].reset_index(drop=True) # find the index of the Q_1_5_find (closest matching flow) + # Calculate flow difference (variance) to check for large discrepancies btw NWM flow and USGS closest flow + calc_df['check_variance'] = ((calc_df['discharge_cms'] - calc_df[interval+"_0_year"])/calc_df['discharge_cms']).abs() + # Assign new metadata attributes + calc_df['nwm_recur'] = interval+"_0_year" + calc_df['layer'] = '_usgs-gage____' + interval+"-year" + calc_df.rename(columns={interval+"_0_year":'nwm_recur_flow_cms'}, inplace=True) + # Subset calc_df for final output + calc_df = calc_df[['location_id','hydroid','feature_id','levpa_id','huc','hand','discharge_cms','check_variance','nwm_recur_flow_cms','nwm_recur','layer']] + final_df = final_df.append(calc_df, ignore_index=True) + # Log any negative HAND elev values and remove from database + log_text += ('Warning: Negative HAND stage values -->\n') + log_text += (calc_df[calc_df['hand']<0].to_string() +'\n') + final_df = final_df[final_df['hand']>0] + # Log any signifant differences btw the NWM flow value and closest USGS rating flow (this ensures that we consistently sample the USGS rating curves at known intervals - NWM recur flow) + log_text += ('Warning: Large variance (>10%) between NWM flow and closest USGS flow -->\n') + log_text += (calc_df[calc_df['check_variance']>0.1].to_string() +'\n') + final_df = final_df[final_df['check_variance']<0.1] + final_df['submitter'] = 'usgs_rating_wrds_api_' + final_df['location_id'] + # Get datestamp from usgs rating curve file to use as coll_time attribute in hydroTable.csv + datestamp = check_file_age(usgs_rc_filepath) + final_df['coll_time'] = str(datestamp)[:15] + + # Rename attributes (for ingest to update_rating_curve) and output csv with the USGS RC database + final_df.rename(columns={'discharge_cms':'flow'}, inplace=True) + final_df.to_csv(os.path.join(log_dir,"usgs_rc_nwm_recurr.csv"),index=False) + + # Output log text to log file + log_text += ('#########\nTotal entries per USGS gage location -->\n') + loc_id_df = final_df.groupby(['location_id']).size().reset_index(name='count') + log_text += (loc_id_df.to_string() +'\n') + log_text += ('#########\nTotal entries per NWM recur value -->\n') + recur_count_df = final_df.groupby(['nwm_recur']).size().reset_index(name='count') + log_text += (recur_count_df.to_string() +'\n') + log_usgs_db = open(os.path.join(log_dir,'log_usgs_rc_database.log'),"w") + log_usgs_db.write(log_text) + log_usgs_db.close() + return(final_df) + +def branch_proc_list(usgs_df,run_dir,debug_outputs_option,log_file): + procs_list = [] # Initialize list for mulitprocessing. + + # loop through all unique level paths that have a USGS gage + #branch_huc_dict = pd.Series(usgs_df.levpa_id.values,index=usgs_df.huc).to_dict('list') + #branch_huc_dict = usgs_df.set_index('huc').T.to_dict('list') + huc_branch_dict = usgs_df.groupby('huc')['levpa_id'].apply(set).to_dict() + + for huc in sorted(huc_branch_dict.keys()): # sort huc_list for helping track progress in future print statments + branch_set = huc_branch_dict[huc] + for branch_id in branch_set: + # Define paths to branch HAND data. + # Define paths to HAND raster, catchments raster, and synthetic rating curve JSON. + # Assumes outputs are for HUC8 (not HUC6) + branch_dir = os.path.join(run_dir,huc,'branches',branch_id) + hand_path = os.path.join(branch_dir, 'rem_zeroed_masked_' + branch_id + '.tif') + catchments_path = os.path.join(branch_dir, 'gw_catchments_reaches_filtered_addedAttributes_' + branch_id + '.tif') + catchments_poly_path = os.path.join(branch_dir, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked_' + branch_id + '.gpkg') + htable_path = os.path.join(branch_dir, 'hydroTable_' + branch_id + '.csv') + water_edge_median_ds = usgs_df[(usgs_df['huc']==huc) & (usgs_df['levpa_id']==branch_id)] + + # Check to make sure the fim output files exist. Continue to next iteration if not and warn user. + if not os.path.exists(hand_path): + print("WARNING: HAND grid does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id)) + log_file.write("WARNING: HAND grid does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id) + '\n') + elif not os.path.exists(catchments_path): + print("WARNING: Catchments grid does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id)) + log_file.write("WARNING: Catchments grid does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id) + '\n') + elif not os.path.exists(htable_path): + print("WARNING: hydroTable does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id)) + log_file.write("WARNING: hydroTable does not exist (skipping): " + str(huc) + ' - branch-id: ' + str(branch_id) + '\n') + else: + ## Additional arguments for src_roughness_optimization + source_tag = 'usgs_rating' # tag to use in source attribute field + merge_prev_adj = False # merge in previous SRC adjustment calculations + + print('Will perform SRC adjustments for huc: ' + str(huc) + ' - branch-id: ' + str(branch_id)) + procs_list.append([branch_dir, water_edge_median_ds, htable_path, huc, branch_id, catchments_poly_path, debug_outputs_option, source_tag, merge_prev_adj]) + + # multiprocess all available branches + print(f"Calculating new SRCs for {len(procs_list)} branches using {job_number} jobs...") + with Pool(processes=job_number) as pool: + log_output = pool.starmap(update_rating_curve, procs_list) + log_file.writelines(["%s\n" % item for item in log_output]) + # try statement for debugging + # try: + # with Pool(processes=job_number) as pool: + # log_output = pool.starmap(update_rating_curve, procs_list) + # log_file.writelines(["%s\n" % item for item in log_output]) + # except Exception as e: + # print(str(huc) + ' --> ' + ' branch id: ' + str(branch_id) + str(e)) + # log_file.write('ERROR!!!: HUC ' + str(huc) + ' --> ' + ' branch id: ' + str(branch_id) + str(e) + '\n') + +def run_prep(run_dir,usgs_rc_filepath,nwm_recurr_filepath,debug_outputs_option,job_number): + ## Check input args are valid + assert os.path.isdir(run_dir), 'ERROR: could not find the input fim_dir location: ' + str(run_dir) + + ## Create an aggregate dataframe with all usgs_elev_table.csv entries for hucs in fim_dir + print('Reading USGS gage HAND elevation from usgs_elev_table.csv files...') + #usgs_elev_file = os.path.join(branch_dir,'usgs_elev_table.csv') + #usgs_elev_df = pd.read_csv(usgs_elev_file, dtype={'HUC8': object, 'location_id': object, 'feature_id': int}) + csv_name = 'usgs_elev_table.csv' + + available_cores = multiprocessing.cpu_count() + if job_number > available_cores: + job_number = available_cores - 1 + print("Provided job number exceeds the number of available cores. " + str(job_number) + " max jobs will be used instead.") + + ## Create output dir for log and usgs rc database + log_dir = os.path.join(run_dir,"logs","src_optimization") + print("Log file output here: " + str(log_dir)) + if not os.path.isdir(log_dir): + os.makedirs(log_dir) + + ## Create a time var to log run time + begin_time = dt.datetime.now() + # Create log file for processing records + log_file = open(os.path.join(log_dir,'log_usgs_rc_src_adjust.log'),"w") + log_file.write('START TIME: ' + str(begin_time) + '\n') + log_file.write('#########################################################\n\n') + + usgs_elev_df = concat_huc_csv(run_dir,csv_name) + + if usgs_elev_df is None: + warn_err = 'WARNING: usgs_elev_df not created - check that usgs_elev_table.csv files exist in fim_dir!' + print(warn_err) + log_file.write(warn_err) + + elif usgs_elev_df.empty: + warn_err = 'WARNING: usgs_elev_df is empty - check that usgs_elev_table.csv files exist in fim_dir!' + print(warn_err) + log_file.write(warn_err) + + else: + print('This may take a few minutes...') + log_file.write("starting create usgs rating db") + usgs_df = create_usgs_rating_database(usgs_rc_filepath, usgs_elev_df, nwm_recurr_filepath, log_dir) + + ## Create huc proc_list for multiprocessing and execute the update_rating_curve function + branch_proc_list(usgs_df,run_dir,debug_outputs_option,log_file) + + ## Record run time and close log file + log_file.write('#########################################################\n\n') + end_time = dt.datetime.now() + log_file.write('END TIME: ' + str(end_time) + '\n') + tot_run_time = end_time - begin_time + log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) + sys.stdout = sys.__stdout__ + log_file.close() + +if __name__ == '__main__': + ## Parse arguments. + parser = argparse.ArgumentParser(description='Adjusts rating curve with database of USGS rating curve (calculated WSE/flow).') + parser.add_argument('-run_dir','--run-dir',help='Parent directory of FIM run.',required=True) + parser.add_argument('-usgs_rc','--usgs-ratings',help='Path to USGS rating curve csv file',required=True) + parser.add_argument('-nwm_recur','--nwm_recur',help='Path to NWM recur file (multiple NWM flow intervals). NOTE: assumes flow units are cfs!!',required=True) + parser.add_argument('-debug','--extra-outputs',help='Optional flag: Use this to keep intermediate output files for debugging/testing',default=False,required=False, action='store_true') + parser.add_argument('-j','--job-number',help='Number of jobs to use',required=False,default=1) + + ## Assign variables from arguments. + args = vars(parser.parse_args()) + run_dir = args['run_dir'] + usgs_rc_filepath = args['usgs_ratings'] + nwm_recurr_filepath = args['nwm_recur'] + debug_outputs_option = args['extra_outputs'] + job_number = int(args['job_number']) + + ## Prepare/check inputs, create log file, and spin up the proc list + run_prep(run_dir,usgs_rc_filepath,nwm_recurr_filepath,debug_outputs_option,job_number) + diff --git a/src/src_roughness_optimization.py b/src/src_roughness_optimization.py new file mode 100644 index 000000000..9be4b397e --- /dev/null +++ b/src/src_roughness_optimization.py @@ -0,0 +1,375 @@ +import argparse +import geopandas as gpd +from geopandas.tools import sjoin +import os +import rasterio +import pandas as pd +import numpy as np +import sys +import json +import datetime as dt +from collections import deque +import multiprocessing +from multiprocessing import Pool + +from utils.shared_variables import DOWNSTREAM_THRESHOLD, ROUGHNESS_MIN_THRESH, ROUGHNESS_MAX_THRESH + + +def update_rating_curve(fim_directory, water_edge_median_df, htable_path, huc, branch_id, catchments_poly_path, debug_outputs_option, source_tag, merge_prev_adj=False, down_dist_thresh=DOWNSTREAM_THRESHOLD): + ''' + This script ingests a dataframe containing observed data (HAND elevation and flow) and then calculates new SRC roughness values via Manning's equation. The new roughness values are averaged for each HydroID and then progated downstream and a new discharge value is calculated where applicable. + + Processing Steps: + - Read in the hydroTable.csv and check whether it has previously been updated (rename default columns if needed) + - Loop through the user provided point data --> stage/flow dataframe row by row and copy the corresponding htable values for the matching stage->HAND lookup + - Calculate new HydroID roughness values for input obs data using Manning's equation + - Create dataframe to check for erroneous Manning's n values (values set in tools_shared_variables.py: >0.6 or <0.001 --> see input args) + - Create magnitude and ahps column by subsetting the "layer" attribute + - Create df grouped by hydroid with ahps_lid and huc number and then pivot the magnitude column to display n value for each magnitude at each hydroid + - Create df with the most recent collection time entry and submitter attribs + - Cacluate median ManningN to handle cases with multiple hydroid entries and create a df with the median hydroid_ManningN value per feature_id + - Rename the original hydrotable variables to allow new calculations to use the primary var name + - Check for large variabilty in the calculated Manning's N values (for cases with mutliple entries for a single hydroid) + - Create attributes to traverse the flow network between HydroIDs + - Calculate group_calb_coef (mean calb n for consective hydroids) and apply values downsteam to non-calb hydroids (constrained to first Xkm of hydroids - set downstream diststance var as input arg) + - Create the adjust_ManningN column by combining the hydroid_ManningN with the featid_ManningN (use feature_id value if the hydroid is in a feature_id that contains valid hydroid_ManningN value(s)) + - Merge in previous SRC adjustments (where available) for hydroIDs that do not have a new adjusted roughness value + - Update the catchments polygon .gpkg with joined attribute - "src_calibrated" + - Merge the final ManningN dataframe to the original hydroTable + - Create the ManningN column by combining the hydroid_ManningN with the default_ManningN (use modified where available) + - Calculate new discharge_cms with new adjusted ManningN + - Export a new hydroTable.csv and overwrite the previous version and output new src json (overwrite previous) + + Inputs: + - fim_directory: fim directory containing individual HUC output dirs + - water_edge_median_df: dataframe containing observation data (attributes: "hydroid", "flow", "submitter", "coll_time", "flow_unit", "layer", "HAND") + - htable_path: path to the current HUC hydroTable.csv + - huc: string variable for the HUC id # (huc8 or huc6) + - branch_id: string variable for the branch id + - catchments_poly_path: path to the current HUC catchments polygon layer .gpkg + - debug_outputs_option: optional input argument to output additional intermediate data files (csv files with SRC calculations) + - source_tag: input text tag used to specify the type/source of the input obs data used for the SRC adjustments (e.g. usgs_rating or point_obs) + - merge_prev_adj: boolean argument to specify when to merge previous SRC adjustments vs. overwrite (default=False) + - down_dist_thresh: optional input argument to override the env variable that controls the downstream distance new roughness values are applied downstream of locations with valid obs data + + Ouputs: + - output_catchments: same input "catchments_poly_path" .gpkg with appened attributes for SRC adjustments fields + - df_htable: same input "htable_path" --> updated hydroTable.csv with new/modified attributes + - output_src_json: src.json file with new SRC discharge values + + ''' + #print("Processing huc --> " + str(huc)) + log_text = "\nProcessing huc --> " + str(huc) + ' branch id: ' + str(branch_id) + '\n' + log_text += "DOWNSTREAM_THRESHOLD: " + str(down_dist_thresh) + 'km\n' + log_text += "Merge Previous Adj Values: " + str(merge_prev_adj) + '\n' + df_nvalues = water_edge_median_df.copy() + df_nvalues = df_nvalues[ (df_nvalues.hydroid.notnull()) & (df_nvalues.hydroid > 0) ] # remove null entries that do not have a valid hydroid + + ## Determine calibration data type for naming calb dataframe column + if source_tag == 'point_obs': + calb_type = 'calb_coef_spatial' + if source_tag == 'usgs_rating': + calb_type = 'calb_coef_usgs' + + ## Read in the hydroTable.csv and check wether it has previously been updated (rename default columns if needed) + df_htable = pd.read_csv(htable_path, dtype={'HUC': object, 'last_updated':object, 'submitter':object, 'obs_source':object}) + df_prev_adj = pd.DataFrame() # initialize empty df for populating/checking later + if 'precalb_discharge_cms' not in df_htable.columns: # need this column to exist before continuing + df_htable['calb_applied'] = False + df_htable['last_updated'] = pd.NA + df_htable['submitter'] = pd.NA + df_htable['obs_source'] = pd.NA + df_htable['precalb_discharge_cms'] = pd.NA + df_htable['calb_coef_usgs'] = pd.NA + df_htable['calb_coef_spatial'] = pd.NA + df_htable['calb_coef_final'] = pd.NA + if df_htable['precalb_discharge_cms'].isnull().values.any(): # check if there are not valid values in the column (True = no previous calibration outputs) + df_htable['precalb_discharge_cms'] = df_htable['discharge_cms'].values + + ## The section below allows for previous calibration modifications (i.e. usgs rating calbs) to be available in the final calibration outputs + if merge_prev_adj and not df_htable['calb_coef_final'].isnull().all(): # check if the merge_prev_adj setting is True and there are valid 'calb_coef_final' values from previous calibration outputs + # Create a subset of hydrotable with previous adjusted SRC attributes + df_prev_adj_htable = df_htable.copy()[['HydroID','submitter','last_updated','obs_source','calb_coef_final']] + df_prev_adj_htable.rename(columns={'submitter':'submitter_prev','last_updated':'last_updated_prev','calb_coef_final':'calb_coef_final_prev','obs_source':'obs_source_prev'}, inplace=True) + df_prev_adj_htable = df_prev_adj_htable.groupby(["HydroID"]).first() + # Only keep previous USGS rating curve adjustments (previous spatial obs adjustments are not retained) + df_prev_adj = df_prev_adj_htable[df_prev_adj_htable['obs_source_prev'].str.contains("usgs_rating", na=False)] + log_text += 'HUC: ' + str(huc) + ' Branch: ' + str(branch_id) + ': found previous hydroTable calibration attributes --> retaining previous calb attributes for blending...\n' + + # Delete previous adj columns to prevent duplicate variable issues (if src_roughness_optimization.py was previously applied) + df_htable.drop(['discharge_cms','submitter','last_updated',calb_type,'calb_coef_final','calb_applied','obs_source'], axis=1, inplace=True, errors='ignore') + df_htable.rename(columns={'precalb_discharge_cms':'discharge_cms'}, inplace=True) + + ## loop through the user provided point data --> stage/flow dataframe row by row + for index, row in df_nvalues.iterrows(): + if row.hydroid not in df_htable['HydroID'].values: + print('ERROR: HydroID for calb point was not found in the hydrotable (check hydrotable) for HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' hydroid: ' + str(row.hydroid)) + log_text += 'ERROR: HydroID for calb point was not found in the hydrotable (check hydrotable) for HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' hydroid: ' + str(row.hydroid) + '\n' + else: + df_htable_hydroid = df_htable[(df_htable.HydroID == row.hydroid) & (df_htable.stage > 0)] # filter htable for entries with matching hydroid and ignore stage 0 (first possible stage match at 1ft) + if df_htable_hydroid.empty: + print('ERROR: df_htable_hydroid is empty but expected data: ' + str(huc) + ' branch id: ' + str(branch_id) + ' hydroid: ' + str(row.hydroid)) + log_text += 'ERROR: df_htable_hydroid is empty but expected data: ' + str(huc) + ' branch id: ' + str(branch_id) + ' hydroid: ' + str(row.hydroid) + '\n' + + find_src_stage = df_htable_hydroid.loc[df_htable_hydroid['stage'].sub(row.hand).abs().idxmin()] # find closest matching stage to the user provided HAND value + ## copy the corresponding htable values for the matching stage->HAND lookup + df_nvalues.loc[index,'feature_id'] = find_src_stage.feature_id + df_nvalues.loc[index,'LakeID'] = find_src_stage.LakeID + df_nvalues.loc[index,'NextDownID'] = find_src_stage.NextDownID + df_nvalues.loc[index,'LENGTHKM'] = find_src_stage.LENGTHKM + df_nvalues.loc[index,'src_stage'] = find_src_stage.stage + df_nvalues.loc[index,'channel_n'] = find_src_stage.channel_n + df_nvalues.loc[index,'overbank_n'] = find_src_stage.overbank_n + df_nvalues.loc[index,'discharge_cms'] = find_src_stage.discharge_cms + + ## Calculate calibration coefficient + df_nvalues.rename(columns={'hydroid':'HydroID'}, inplace=True) # rename the previous ManningN column + df_nvalues['hydroid_calb_coef'] = df_nvalues['discharge_cms']/df_nvalues['flow'] # Qobs / Qsrc + + ## Calcuate a "calibration adjusted" n value using channel and overbank n-values multiplied by calb_coef + df_nvalues['channel_n_calb'] = df_nvalues['hydroid_calb_coef']*df_nvalues['channel_n'] + df_nvalues['overbank_n_calb'] = df_nvalues['hydroid_calb_coef']*df_nvalues['overbank_n'] + + ## Create dataframe to check for unrealistic/egregious calibration adjustments by applying the calibration coefficient to the Manning's n values and setting an acceptable range (values set in tools_shared_variables.py --> >0.8 or <0.001) + df_nvalues['Mann_flag'] = np.where((df_nvalues['channel_n_calb'] >= ROUGHNESS_MAX_THRESH) | (df_nvalues['overbank_n_calb'] >= ROUGHNESS_MAX_THRESH) | (df_nvalues['channel_n_calb'] <= ROUGHNESS_MIN_THRESH) | (df_nvalues['overbank_n_calb'] <= ROUGHNESS_MIN_THRESH) | (df_nvalues['hydroid_calb_coef'].isnull()),'Fail','Pass') + df_mann_flag = df_nvalues[(df_nvalues['Mann_flag'] == 'Fail')][['HydroID','hydroid_calb_coef','channel_n_calb','overbank_n_calb']] + if not df_mann_flag.empty: + log_text += '!!! Flaged Mannings Roughness values below !!!' +'\n' + log_text += df_mann_flag.to_string() + '\n' + + ## Create magnitude and ahps column by subsetting the "layer" attribute + df_nvalues['magnitude'] = df_nvalues['layer'].str.split("_").str[5] + df_nvalues['ahps_lid'] = df_nvalues['layer'].str.split("_").str[1] + df_nvalues['huc'] = str(huc) + df_nvalues.drop(['layer'], axis=1, inplace=True) + + ## Create df grouped by hydroid with ahps_lid and huc number + df_huc_lid = df_nvalues.groupby(["HydroID"]).first()[['ahps_lid','huc']] + df_huc_lid.columns = pd.MultiIndex.from_product([['info'], df_huc_lid.columns]) + + ## pivot the magnitude column to display n value for each magnitude at each hydroid + df_nvalues_mag = df_nvalues.pivot_table(index='HydroID', columns='magnitude', values=['hydroid_calb_coef'], aggfunc='mean') # if there are multiple entries per hydroid and magnitude - aggregate using mean + + ## Optional: Export csv with the newly calculated Manning's N values + if debug_outputs_option: + output_calc_n_csv = os.path.join(fim_directory, calb_type + '_src_calcs_' + branch_id + '.csv') + df_nvalues.to_csv(output_calc_n_csv,index=False) + + ## filter the modified Manning's n dataframe for values out side allowable range + df_nvalues = df_nvalues[df_nvalues['Mann_flag'] == 'Pass'] + + ## Check that there are valid entries in the calculate roughness df after filtering + if not df_nvalues.empty: + ## Create df with the most recent collection time entry and submitter attribs + df_updated = df_nvalues[['HydroID','coll_time','submitter','ahps_lid']] # subset the dataframe + df_updated = df_updated.sort_values('coll_time').drop_duplicates(['HydroID'],keep='last') # sort by collection time and then drop duplicate HydroIDs (keep most recent coll_time per HydroID) + df_updated.rename(columns={'coll_time':'last_updated'}, inplace=True) + + ## cacluate median ManningN to handle cases with multiple hydroid entries + df_mann_hydroid = df_nvalues.groupby(["HydroID"])[['hydroid_calb_coef']].median() + + ## Create a df with the median hydroid_ManningN value per feature_id + #df_mann_featid = df_nvalues.groupby(["feature_id"])[['hydroid_ManningN']].mean() + #df_mann_featid.rename(columns={'hydroid_ManningN':'featid_ManningN'}, inplace=True) + + ## Rename the original hydrotable variables to allow new calculations to use the primary var name + df_htable.rename(columns={'discharge_cms':'precalb_discharge_cms'}, inplace=True) + + ## Check for large variabilty in the calculated Manning's N values (for cases with mutliple entries for a singel hydroid) + df_nrange = df_nvalues.groupby('HydroID').agg({'hydroid_calb_coef': ['median', 'min', 'max', 'std', 'count']}) + df_nrange['hydroid_calb_coef','range'] = df_nrange['hydroid_calb_coef','max'] - df_nrange['hydroid_calb_coef','min'] + df_nrange = df_nrange.join(df_nvalues_mag, how='outer') # join the df_nvalues_mag containing hydroid_manningn values per flood magnitude category + df_nrange = df_nrange.merge(df_huc_lid, how='outer', on='HydroID') # join the df_huc_lid df to add attributes for lid and huc# + log_text += 'Statistics for Modified Roughness Calcs -->' +'\n' + log_text += df_nrange.to_string() + '\n' + log_text += '----------------------------------------\n' + + ## Optional: Output csv with SRC calc stats + if debug_outputs_option: + output_stats_n_csv = os.path.join(fim_directory, calb_type + '_src_coef_vals_stats_' + branch_id + '.csv') + df_nrange.to_csv(output_stats_n_csv,index=True) + + ## subset the original hydrotable dataframe and subset to one row per HydroID + df_nmerge = df_htable[['HydroID','feature_id','NextDownID','LENGTHKM','LakeID','order_']].drop_duplicates(['HydroID'],keep='first') + + ## Need to check that there are non-lake hydroids in the branch hydrotable (prevents downstream error) + df_htable_check_lakes = df_nmerge.loc[df_nmerge['LakeID'] == -999] + if not df_htable_check_lakes.empty: + + ## Create attributes to traverse the flow network between HydroIDs + df_nmerge = branch_network_tracer(df_nmerge) + + ## Merge the newly caluclated ManningN dataframes + df_nmerge = df_nmerge.merge(df_mann_hydroid, how='left', on='HydroID') + df_nmerge = df_nmerge.merge(df_updated, how='left', on='HydroID') + + ## Calculate group_ManningN (mean calb n for consective hydroids) and apply values downsteam to non-calb hydroids (constrained to first Xkm of hydroids - set downstream diststance var as input arg) + df_nmerge = group_manningn_calc(df_nmerge, down_dist_thresh) + + ## Create a df with the median hydroid_calb_coef value per feature_id + df_mann_featid = df_nmerge.groupby(["feature_id"])[['hydroid_calb_coef']].mean() + df_mann_featid.rename(columns={'hydroid_calb_coef':'featid_calb_coef'}, inplace=True) + df_mann_featid_attrib = df_nmerge.groupby('feature_id').first() # create a seperate df with attributes to apply to other hydroids that share a featureid + df_mann_featid_attrib = df_mann_featid_attrib[df_mann_featid_attrib['submitter'].notna()][['last_updated','submitter']] + df_nmerge = df_nmerge.merge(df_mann_featid, how='left', on='feature_id').set_index('feature_id') + df_nmerge = df_nmerge.combine_first(df_mann_featid_attrib).reset_index() + + if not df_nmerge['hydroid_calb_coef'].isnull().all(): + + ## Create the calibration coefficient column by combining the hydroid_calb_coef with the featid_calb_coef (use feature_id value if the hydroid is in a feature_id that contains valid hydroid_calb_coef value(s)) + conditions = [ (df_nmerge['hydroid_calb_coef'].isnull()) & (df_nmerge['featid_calb_coef'].notnull()), (df_nmerge['hydroid_calb_coef'].isnull()) & (df_nmerge['featid_calb_coef'].isnull()) & (df_nmerge['group_calb_coef'].notnull()) ] + choices = [ df_nmerge['featid_calb_coef'], df_nmerge['group_calb_coef'] ] + df_nmerge[calb_type] = np.select(conditions, choices, default=df_nmerge['hydroid_calb_coef']) + df_nmerge['obs_source'] = np.where(df_nmerge[calb_type].notnull(), source_tag, pd.NA) + df_nmerge.drop(['feature_id','NextDownID','LENGTHKM','LakeID','order_'], axis=1, inplace=True, errors='ignore') # drop these columns to avoid duplicates where merging with the full hydroTable df + + ## Merge in previous SRC adjustments (where available) for hydroIDs that do not have a new adjusted roughness value + if not df_prev_adj.empty: + df_nmerge = pd.merge(df_nmerge,df_prev_adj, on='HydroID', how='outer') + df_nmerge['submitter'] = np.where((df_nmerge[calb_type].isnull() & df_nmerge['calb_coef_final_prev'].notnull()),df_nmerge['submitter_prev'],df_nmerge['submitter']) + df_nmerge['last_updated'] = np.where((df_nmerge[calb_type].isnull() & df_nmerge['calb_coef_final_prev'].notnull()),df_nmerge['last_updated_prev'],df_nmerge['last_updated']) + df_nmerge['obs_source'] = np.where((df_nmerge[calb_type].isnull() & df_nmerge['calb_coef_final_prev'].notnull()),df_nmerge['obs_source_prev'],df_nmerge['obs_source']) + df_nmerge['calb_coef_final'] = np.where((df_nmerge[calb_type].isnull() & df_nmerge['calb_coef_final_prev'].notnull()),df_nmerge['calb_coef_final_prev'],df_nmerge[calb_type]) + df_nmerge.drop(['submitter_prev','last_updated_prev','calb_coef_final_prev','obs_source_prev'], axis=1, inplace=True, errors='ignore') + else: + df_nmerge['calb_coef_final'] = df_nmerge[calb_type] + + + ## Update the catchments polygon .gpkg with joined attribute - "src_calibrated" + if os.path.isfile(catchments_poly_path): + input_catchments = gpd.read_file(catchments_poly_path) + ## Create new "src_calibrated" column for viz query + if 'src_calibrated' in input_catchments.columns: # check if this attribute already exists and drop if needed + input_catchments.drop(['src_calibrated'], axis=1, inplace=True, errors='ignore') + df_nmerge['src_calibrated'] = np.where(df_nmerge['calb_coef_final'].notnull(), 'True', 'False') + output_catchments = input_catchments.merge(df_nmerge[['HydroID','src_calibrated']], how='left', on='HydroID') + output_catchments['src_calibrated'].fillna('False', inplace=True) + output_catchments.to_file(catchments_poly_path,driver="GPKG",index=False) # overwrite the previous layer + df_nmerge.drop(['src_calibrated'], axis=1, inplace=True, errors='ignore') + ## Optional ouputs: 1) merge_n_csv csv with all of the calculated n values and 2) a catchments .gpkg with new joined attributes + if debug_outputs_option: + output_merge_n_csv = os.path.join(fim_directory, calb_type + '_merge_vals_' + branch_id + '.csv') + df_nmerge.to_csv(output_merge_n_csv,index=False) + ## output new catchments polygon layer with several new attributes appended + if os.path.isfile(catchments_poly_path): + input_catchments = gpd.read_file(catchments_poly_path) + output_catchments_fileName = os.path.join(os.path.split(catchments_poly_path)[0],"gw_catchments_src_adjust_" + str(branch_id) + ".gpkg") + output_catchments = input_catchments.merge(df_nmerge, how='left', on='HydroID') + output_catchments.to_file(output_catchments_fileName,driver="GPKG",index=False) + + ## Merge the final ManningN dataframe to the original hydroTable + df_nmerge.drop(['ahps_lid','start_catch','route_count','branch_id','hydroid_calb_coef','featid_calb_coef','group_calb_coef',], axis=1, inplace=True, errors='ignore') # drop these columns to avoid duplicates where merging with the full hydroTable df + df_htable = df_htable.merge(df_nmerge, how='left', on='HydroID') + df_htable['calb_applied'] = np.where(df_htable['calb_coef_final'].notnull(), 'True', 'False') # create true/false column to clearly identify where new roughness values are applied + + ## Calculate new discharge_cms with new adjusted ManningN + df_htable['discharge_cms'] = np.where(df_htable['calb_coef_final'].isnull(), df_htable['precalb_discharge_cms'], df_htable['precalb_discharge_cms']/df_htable['calb_coef_final']) + + ## Replace discharge_cms with 0 or -999 if present in the original discharge (carried over from thalweg notch workaround in SRC post-processing) + df_htable['discharge_cms'].mask(df_htable['precalb_discharge_cms']==0.0,0.0,inplace=True) + df_htable['discharge_cms'].mask(df_htable['precalb_discharge_cms']==-999,-999,inplace=True) + + ## Export a new hydroTable.csv and overwrite the previous version + out_htable = os.path.join(fim_directory, 'hydroTable_' + branch_id + '.csv') + df_htable.to_csv(out_htable,index=False) + + else: + print('ALERT!! HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> no valid hydroid roughness calculations after removing lakeid catchments from consideration') + log_text += 'ALERT!! HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> no valid hydroid roughness calculations after removing lakeid catchments from consideration\n' + else: + print('WARNING!! HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> hydrotable is empty after removing lake catchments (will ignore branch)') + log_text += 'ALERT!! HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> hydrotable is empty after removing lake catchments (will ignore branch)\n' + else: + print('ALERT!! HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> no valid roughness calculations - please check point data and src calculations to evaluate') + log_text += 'ALERT!! HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + ' --> no valid roughness calculations - please check point data and src calculations to evaluate\n' + + log_text += 'Completed: ' + str(huc) + ' --> branch: ' + str(branch_id) + '\n' + log_text += '#########################################################\n' + print("Completed huc: " + str(huc) + ' --> branch: ' + str(branch_id)) + return(log_text) + +def branch_network_tracer(df_input_htable): + df_input_htable = df_input_htable.astype({'NextDownID': 'int64'}) # ensure attribute has consistent format as int + df_input_htable = df_input_htable.loc[df_input_htable['LakeID'] == -999] # remove all hydroids associated with lake/water body (these often have disjoined artifacts in the network) + df_input_htable["start_catch"] = ~df_input_htable['HydroID'].isin(df_input_htable['NextDownID']) # define start catchments as hydroids that are not found in the "NextDownID" attribute for all other hydroids + + df_input_htable.set_index('HydroID',inplace=True,drop=False) # set index to the hydroid + branch_heads = deque(df_input_htable[df_input_htable['start_catch'] == True]['HydroID'].tolist()) # create deque of hydroids to define start points in the while loop + visited = set() # create set to keep track of all hydroids that have been accounted for + branch_count = 0 # start branch id + while branch_heads: + hid = branch_heads.popleft() # pull off left most hydroid from deque of start hydroids + Q = deque(df_input_htable[df_input_htable['HydroID'] == hid]['HydroID'].tolist()) # create a new deque that will be used to populate all relevant downstream hydroids + vert_count = 0; branch_count += 1 + while Q: + q = Q.popleft() + if q not in visited: + df_input_htable.loc[df_input_htable.HydroID==q,'route_count'] = vert_count # assign var with flow order ranking + df_input_htable.loc[df_input_htable.HydroID==q,'branch_id'] = branch_count # assign var with current branch id + vert_count += 1 + visited.add(q) + nextid = df_input_htable.loc[q,'NextDownID'] # find the id for the next downstream hydroid + order = df_input_htable.loc[q,'order_'] # find the streamorder for the current hydroid + + if nextid not in visited and nextid in df_input_htable.HydroID: + check_confluence = (df_input_htable.NextDownID == nextid).sum() > 1 # check if the NextDownID is referenced by more than one hydroid (>1 means this is a confluence) + nextorder = df_input_htable.loc[nextid,'order_'] # find the streamorder for the next downstream hydroid + if nextorder > order and check_confluence == True: # check if the nextdownid streamorder is greater than the current hydroid order and the nextdownid is a confluence (more than 1 upstream hydroid draining to it) + branch_heads.append(nextid) # found a terminal point in the network (append to branch_heads for second pass) + continue # if above conditions are True than stop traversing downstream and move on to next starting hydroid + Q.append(nextid) + df_input_htable.reset_index(drop=True, inplace=True) # reset index (previously using hydroid as index) + df_input_htable.sort_values(['branch_id','route_count'], inplace=True) # sort the dataframe by branch_id and then by route_count (need this ordered to ensure upstream to downstream ranking for each branch) + return(df_input_htable) + +def group_manningn_calc(df_nmerge, down_dist_thresh): + ## Calculate group_calb_coef (mean calb n for consective hydroids) and apply values downsteam to non-calb hydroids (constrained to first Xkm of hydroids - set downstream diststance var as input arg + #df_nmerge.sort_values(by=['NextDownID'], inplace=True) + dist_accum = 0; hyid_count = 0; hyid_accum_count = 0; + run_accum_mann = 0; group_calb_coef = 0; branch_start = 1 # initialize counter and accumulation variables + lid_count = 0; prev_lid = 'x' + for index, row in df_nmerge.iterrows(): # loop through the df (parse by hydroid) + if int(df_nmerge.loc[index,'branch_id']) != branch_start: # check if start of new branch + dist_accum = 0; hyid_count = 0; hyid_accum_count = 0; # initialize counter vars + run_accum_mann = 0; group_calb_coef = 0 # initialize counter vars + branch_start = int(df_nmerge.loc[index,'branch_id']) # reassign the branch_start var to evaluate on next iteration + # use the code below to withold downstream hydroid_calb_coef values (use this for downstream evaluation tests) + ''' + lid_count = 0 + if not pd.isna(df_nmerge.loc[index,'ahps_lid']): + if df_nmerge.loc[index,'ahps_lid'] == prev_lid: + lid_count += 1 + if lid_count > 3: # only keep the first 3 HydroID n values (everything else set to null for downstream application) + df_nmerge.loc[index,'hydroid_ManningN'] = np.nan + df_nmerge.loc[index,'featid_ManningN'] = np.nan + else: + lid_count = 1 + prev_lid = df_nmerge.loc[index,'ahps_lid'] + ''' + if np.isnan(df_nmerge.loc[index,'hydroid_calb_coef']): # check if the hydroid_calb_coef value is nan (indicates a non-calibrated hydroid) + df_nmerge.loc[index,'accum_dist'] = row['LENGTHKM'] + dist_accum # calculate accumulated river distance + dist_accum += row['LENGTHKM'] # add hydroid length to the dist_accum var + hyid_count = 0 # reset the hydroid counter to 0 + df_nmerge.loc[index,'hyid_accum_count'] = hyid_accum_count # output the hydroid accum counter + if dist_accum < down_dist_thresh: # check if the accum distance is less than Xkm downstream from valid hydroid_calb_coef group value + if hyid_accum_count > 1: # only apply the group_calb_coef if there are 2 or more valid hydorids that contributed to the upstream group_calb_coef + df_nmerge.loc[index,'group_calb_coef'] = group_calb_coef # output the group_calb_coef var + else: + run_avg_mann = 0 # reset the running average manningn variable (greater than 10km downstream) + else: # performs the following for hydroids that have a valid hydroid_calb_coef value + dist_accum = 0; hyid_count += 1 # initialize vars + df_nmerge.loc[index,'accum_dist'] = 0 # output the accum_dist value (set to 0) + if hyid_count == 1: # checks if this the first in a series of valid hydroid_calb_coef values + run_accum_mann = 0; hyid_accum_count = 0 # initialize counter and running accumulated manningN value + group_calb_coef = (row['hydroid_calb_coef'] + run_accum_mann)/float(hyid_count) # calculate the group_calb_coef (NOTE: this will continue to change as more hydroid values are accumulated in the "group" moving downstream) + df_nmerge.loc[index,'group_calb_coef'] = group_calb_coef # output the group_calb_coef var + df_nmerge.loc[index,'hyid_count'] = hyid_count # output the hyid_count var + run_accum_mann += row['hydroid_calb_coef'] # add current hydroid manningn value to the running accum mann var + hyid_accum_count += 1 # increase the # of hydroid accum counter + df_nmerge.loc[index,'hyid_accum_count'] = hyid_accum_count # output the hyid_accum_count var + + ## Delete unnecessary intermediate outputs + if 'hyid_count' in df_nmerge.columns: + df_nmerge.drop(['hyid_count','accum_dist','hyid_accum_count'], axis=1, inplace=True, errors='ignore') # drop hydroid counter if it exists + #df_nmerge.drop(['accum_dist','hyid_accum_count'], axis=1, inplace=True) # drop accum vars from group calc + return(df_nmerge) \ No newline at end of file diff --git a/src/stream_branches.py b/src/stream_branches.py new file mode 100755 index 000000000..8a6ebf6cc --- /dev/null +++ b/src/stream_branches.py @@ -0,0 +1,1160 @@ +#!/usr/bin/env python3 + +import os +import geopandas as gpd +import pandas as pd +import rasterio +from rasterio.mask import mask +from rasterio.io import DatasetReader +from os.path import splitext, isfile +import fiona +from fiona.errors import DriverError +from collections import deque +import numpy as np +from tqdm import tqdm +from shapely.ops import linemerge, unary_union +from shapely.geometry import MultiLineString, LineString, MultiPoint, Point +from shapely.strtree import STRtree +from random import sample +from scipy.stats import mode +from utils.shared_variables import PREP_CRS + +class StreamNetwork(gpd.GeoDataFrame): + + ''' + Notes: + - Many of the methods support two attributes called branch_id_attribute and values_excluded. + This can be used to filter out records. + ie) When calling the nwm_subset_streams.gpkg, you can filter some records like this: + StreamNetwork.from_file(filename=outputs//nwm_subset_streams.gpkg, + branch_id_attribute="order_", + values_excluded=[1,2] + (which means drop all records that have an order_ of 1 or 2. + + - Note: from_file is using its branch_id_attribute and values_excluded as intended but other + methods may not be incomplete and not filtering as expected. + ''' + + geom_name = 'geometry' # geometry attribute name + branch_id_attribute = None # branch id attribute name + values_excluded = None + attribute_excluded = None + + def __init__(self,*args,**kwargs): + + if kwargs: + branch_id_attribute = kwargs.pop("branch_id_attribute",None) + values_excluded = kwargs.pop("values_excluded",None) + attribute_excluded = kwargs.pop("attribute_excluded",None) + + super().__init__(*args,**kwargs) + + self.branch_id_attribute = branch_id_attribute + self.values_excluded = values_excluded + self.attribute_excluded = attribute_excluded + + + @classmethod + def from_file(cls, filename, branch_id_attribute=None, values_excluded=None, + attribute_excluded=None, verbose=False, *args, **kwargs): + + """ loads stream network from file to streamnetwork geopandas """ + + if kwargs: + inputs = { 'branch_id_attribute' : kwargs.pop("branch_id_attribute",None) , + 'values_excluded' :kwargs.pop("values_excluded",None) , + 'attribute_excluded' : kwargs.pop("attribute_excluded",None) } + + verbose = kwargs.pop('verbose',None) + else: + inputs = { 'branch_id_attribute' : branch_id_attribute , + 'values_excluded' : values_excluded , + 'attribute_excluded' : attribute_excluded } + + if verbose: + print('Loading file') + + raw_df = gpd.read_file(filename,*args,**kwargs) + + # Reproject + if raw_df.crs.to_authority() != PREP_CRS.to_authority(): + raw_df.to_crs(PREP_CRS) + + filtered_df = gpd.GeoDataFrame() + + if (branch_id_attribute is not None) and (values_excluded is not None): + filtered_df = raw_df[~raw_df[branch_id_attribute].isin(values_excluded)] + else: + filtered_df = raw_df + + if verbose: + print("======" + filename) + print("Number of df rows = " + str(filtered_df.shape[0])) + + return(cls(filtered_df,**inputs)) + + + def write(self,fileName,layer=None,index=True,verbose=False): + + """ Gets driver Name from file extension for Geopandas writing """ + + if verbose: + print("Writing to {}".format(fileName)) + + # sets driver + driverDictionary = {'.gpkg' : 'GPKG','.geojson' : 'GeoJSON','.shp' : 'ESRI Shapefile'} + driver = driverDictionary[splitext(fileName)[1]] + + self.to_file(fileName, driver=driver, layer=layer, index=index) + + def apply(self,*args,**kwargs): + + branch_id_attribute = self.branch_id_attribute + attribute_excluded = self.attribute_excluded + values_excluded = self.values_excluded + crs = self.crs + + self = super().apply(*args,**kwargs) + self = self.set_crs(crs) + + self = StreamNetwork( self, + branch_id_attribute=branch_id_attribute, + attribute_excluded=attribute_excluded, + values_excluded=values_excluded + ) + + return(self) + + + def multilinestrings_to_linestrings(self): + + branch_id_attribute = self.branch_id_attribute + attribute_excluded = self.attribute_excluded + values_excluded = self.values_excluded + + def convert_to_linestring(row): + + geometry = row['geometry'] + + if isinstance(geometry,MultiLineString): + linestring = LineString(sum([list(item.coords) for item in list(geometry.geoms)],[])) + row['geometry'] = linestring + + return(row) + + + self = StreamNetwork( self.apply(convert_to_linestring,axis=1), + branch_id_attribute=branch_id_attribute, + attribute_excluded=attribute_excluded, + values_excluded=values_excluded + ) + + return(self) + + + def explode(self,**kwargs): + + branch_id_attribute = self.branch_id_attribute + attribute_excluded = self.attribute_excluded + values_excluded = self.values_excluded + + self = StreamNetwork( super().explode(**kwargs), + branch_id_attribute=branch_id_attribute, + attribute_excluded=attribute_excluded, + values_excluded=values_excluded + ) + + return(self) + + + def to_df(self,*args,**kwargs): + + """ Converts back to dataframe """ + + self = pd.DataFrame(self,*args,**kwargs) + + return(self) + + + def merge(self,*args,**kwargs): + branch_id_attribute = self.branch_id_attribute + attribute_excluded = self.attribute_excluded + values_excluded = self.values_excluded + + self = super().merge(*args,**kwargs) + + self = StreamNetwork(self,branch_id_attribute=branch_id_attribute, + attribute_excluded=attribute_excluded, + values_excluded=values_excluded) + + return(self) + + + def merge_stream_branches(self,stream_branch_dataset,on='ID',branch_id_attribute='LevelPathI',attributes='StreamOrde',stream_branch_layer_name=None): + + """ Merges stream branch id attribute from another vector file """ + + # load vaas + if isinstance(stream_branch_dataset,str): + stream_branch_dataset = gpd.read_file(stream_branch_dataset,layer=stream_branch_layer_name) + elif isinstance(stream_branch_dataset,gpd.GeoDataFrame): + pass + else: + raise TypeError('Pass stream_branch_dataset argument as filepath or GeoDataframe') + + # merge and drop duplicate columns + if isinstance(attributes,list): + what = [on] + [branch_id_attribute] + attributes + elif isinstance(attributes,str): + what = [on] + [branch_id_attribute] +[attributes] + + self = self.merge(stream_branch_dataset[what],on=on, how='inner') + + # make sure it's the correct object type + self = StreamNetwork(self,branch_id_attribute=branch_id_attribute) + + return(self) + + + @staticmethod + def flip_inlet_outlet_linestring_index(linestring_index): + + # returns -1 for 0 and 0 for -1 + inlet_outlet_linestring_index_dict = { 0 : -1, -1 : 0 } + + try: + return( inlet_outlet_linestring_index_dict[linestring_index] ) + except KeyError: + raise ValueError('Linestring index should be 0 or -1') + + + def derive_nodes(self,toNode_attribute='ToNode',fromNode_attribute='FromNode',reach_id_attribute='ID', + outlet_linestring_index=0,node_prefix=None,max_node_digits=8,verbose=False): + + if verbose: + print("Deriving nodes ...") + + inlet_linestring_index = StreamNetwork.flip_inlet_outlet_linestring_index(outlet_linestring_index) + + # set node prefix to string + if node_prefix is None: + node_prefix='' + + # handle digits and values for node ids + max_post_node_digits = max_node_digits - len(node_prefix) + max_node_value = int('9' * max_post_node_digits) + + # sets index of stream branches as reach id attribute + #if self.index.name != reach_id_attribute: + #self.set_index(reach_id_attribute,drop=True,inplace=True) + + inlet_coordinates, outlet_coordinates = dict(), dict() + node_coordinates = dict() + toNodes, fromNodes = [None] * len(self),[None] * len(self) + current_node_id = '1'.zfill(max_post_node_digits) + + for i,(_,row) in enumerate(self.iterrows()): + + reach_id = row[reach_id_attribute] + + # get foss id for node_prefix + if len(node_prefix) > 0: + current_node_prefix = node_prefix + else: + current_node_prefix = str(reach_id)[0:4] + + # makes list of coordinates. Merges multi-part geoms + reach_coordinates = list(row['geometry'].coords) + + inlet_coordinate = reach_coordinates[inlet_linestring_index] + outlet_coordinate = reach_coordinates[outlet_linestring_index] + + if inlet_coordinate not in node_coordinates: + current_node_id_with_prefix = current_node_prefix + current_node_id + node_coordinates[inlet_coordinate] = current_node_id_with_prefix + fromNodes[i] = current_node_id_with_prefix + + current_node_id = int(current_node_id.lstrip('0')) + 1 + if current_node_id > max_node_value: + raise ValueError('Current Node ID exceeding max. Look at source code to change.') + current_node_id = str(current_node_id).zfill(max_post_node_digits) + + else: + fromNodes[i] = node_coordinates[inlet_coordinate] + + if outlet_coordinate not in node_coordinates: + current_node_id_with_prefix = current_node_prefix + current_node_id + node_coordinates[outlet_coordinate] = current_node_id_with_prefix + toNodes[i] = current_node_id_with_prefix + + current_node_id = int(current_node_id.lstrip('0')) + 1 + if current_node_id > max_node_value: + raise ValueError('Current Node ID exceeding max. Look at source code to change.') + current_node_id = str(current_node_id).zfill(max_post_node_digits) + + else: + toNodes[i] = node_coordinates[outlet_coordinate] + + self.loc[:,fromNode_attribute] = fromNodes + self.loc[:,toNode_attribute] = toNodes + + return(self) + + + def derive_outlets(self,toNode_attribute='ToNode',fromNode_attribute='FromNode',outlets_attribute='outlet_id', + verbose=False): + + if verbose: + print("Deriving outlets ...") + + fromNodes = set(i for i in self[fromNode_attribute]) + + outlets = [-1] * len(self) + + for i,tn in enumerate(self[toNode_attribute]): + if tn not in fromNodes: + outlets[i] = i + 1 + + self[outlets_attribute] = outlets + + return(self) + + + def derive_inlets(self,toNode_attribute='ToNode',fromNode_attribute='FromNode', + inlets_attribute='inlet_id', + verbose=False + ): + + if verbose: + print("Deriving inlets ...") + + toNodes = set(i for i in self[toNode_attribute]) + + inlets = [-1] * len(self) + + for i,fn in enumerate(self[fromNode_attribute]): + if fn not in toNodes: + inlets[i] = i + 1 + + self[inlets_attribute] = inlets + + return(self) + + + def derive_inlet_points_by_feature( self,feature_attribute, outlet_linestring_index): + + """ Finds the upstream point of every feature in the stream network """ + + inlet_linestring_index = StreamNetwork.flip_inlet_outlet_linestring_index(outlet_linestring_index) + + feature_inlet_points_gdf = gpd.GeoDataFrame(self.copy()) + + for idx, row in self.iterrows(): + feature_inlet_point = Point(row.geometry.coords[inlet_linestring_index]) + + feature_inlet_points_gdf.loc[idx,'geometry'] = feature_inlet_point + + return(feature_inlet_points_gdf) + + + def derive_headwater_points_with_inlets( self,inlets_attribute='inlet_id', + fromNode_attribute='FromNode', + outlet_linestring_index=0 + ): + + """ Derives headwater points file given inlets """ + + # get inlet linestring index + inlet_linestring_index = StreamNetwork.flip_inlet_outlet_linestring_index(outlet_linestring_index) + + inlet_indices = self.loc[:,inlets_attribute] != -1 + + inlets = self.loc[inlet_indices,:].reset_index(drop=True) + + headwater_points_gdf = gpd.GeoDataFrame(inlets.copy()) + + for idx,row in inlets.iterrows(): + headwater_point = row.geometry.coords[inlet_linestring_index] + + headwater_point = Point(headwater_point) + + headwater_points_gdf.loc[idx,'geometry'] = headwater_point + + return(headwater_points_gdf) + + + def exclude_attribute_values(self, branch_id_attribute=None, values_excluded=None, verbose=False): + + if (branch_id_attribute is not None) and (values_excluded is not None): + self = StreamNetwork(self[~self[branch_id_attribute].isin(values_excluded)], branch_id_attribute=branch_id_attribute) + + if verbose: + print("Number of df rows = " + str(self.shape[0])) + + return(self) + + + def remove_stream_segments_without_catchments( self, + catchments, + reach_id_attribute='ID', + reach_id_attribute_in_catchments='ID', + verbose=False + ): + + if verbose: + print("Removing stream segments without catchments ...") + + # load catchments + if isinstance(catchments,gpd.GeoDataFrame): + pass + elif isinstance(catchments,str): + catchments = gpd.read_file(catchments) + else: + raise TypeError("Catchments needs to be GeoDataFame or path to vector file") + + self = self.merge(catchments.loc[:,reach_id_attribute_in_catchments], + left_on=reach_id_attribute, + right_on=reach_id_attribute_in_catchments, + how='inner') + + return(self) + + + def remove_branches_without_catchments(self, + catchments, + reach_id_attribute='ID', + branch_id_attribute='branchID', + reach_id_attribute_in_catchments='ID', + verbose=False + ): + + if verbose: + print("Removing stream branches without catchments ...") + + # load catchments + if isinstance(catchments, gpd.GeoDataFrame): + pass + elif isinstance(catchments,str): + catchments = gpd.read_file(catchments) + else: + raise TypeError("Catchments needs to be GeoDataFame or path to vector file") + + unique_stream_branches = self.loc[:,branch_id_attribute].unique() + unique_catchments = set(catchments.loc[:,reach_id_attribute_in_catchments].unique()) + + current_index_name = self.index.name + self.set_index(branch_id_attribute, drop=False, inplace=True) + + for usb in unique_stream_branches: + + try: + reach_ids_in_branch = set(self.loc[usb,reach_id_attribute].unique()) + except AttributeError: + reach_ids_in_branch = set( [ self.loc[usb,reach_id_attribute] ] ) + + if len( reach_ids_in_branch & unique_catchments) == 0: + #print(f'Dropping {usb}') + self.drop(usb,inplace=True) + + if current_index_name is None: + self.reset_index(drop=True, inplace=True) + else: + self.set_index(current_index_name, drop=True, inplace=True) + + return(self) + + + def trim_branches_in_waterbodies(self, + branch_id_attribute, + verbose=False + ): + """ + Recursively trims the reaches from the ends of the branches if they are in a + waterbody (determined by the Lake attribute). + """ + + def find_downstream_reaches_in_waterbodies(tmp_self, tmp_IDs=[]): + # Find lowest reach(es) + downstream_IDs = [int(x) for x in tmp_self.From_Node[~tmp_self.To_Node.isin(tmp_self.From_Node)]] # IDs of most downstream reach(es) + + for downstream_ID in downstream_IDs: + # Stop if lowest reach is not in a lake + if int(tmp_self.Lake[tmp_self.From_Node.astype(int)==downstream_ID]) == -9999: + continue + else: + # Remove reach from tmp_self + tmp_IDs.append(downstream_ID) + tmp_self.drop(tmp_self[tmp_self.From_Node.astype(int).isin([downstream_ID,])].index, inplace=True) + # Repeat for next lowest downstream reach + if downstream_ID in tmp_self.To_Node.astype(int).values: + return find_downstream_reaches_in_waterbodies(tmp_self, tmp_IDs) + return tmp_IDs + + def find_upstream_reaches_in_waterbodies(tmp_self, tmp_IDs=[]): + # Find highest reach(es) + upstream_IDs = [int(x) for x in tmp_self.From_Node[~tmp_self.From_Node.isin(tmp_self.To_Node)]] # IDs of most upstream reach(es) + nonlake_reaches = [int(x) for x in tmp_self.From_Node[tmp_self.Lake == -9999]] # IDs of most reach(es) that are not designated as lake reaches + + for upstream_ID in upstream_IDs: + # Stop if uppermost reach is not in a lake + if int(tmp_self.Lake[tmp_self.From_Node.astype(int)==upstream_ID]) == -9999: + continue + else: + if int(tmp_self.To_Node[tmp_self.From_Node.astype(int)==upstream_ID]) in nonlake_reaches: + continue + # Remove reach from tmp_self + tmp_IDs.append(upstream_ID) + tmp_self.drop(tmp_self[tmp_self.From_Node.astype(int).isin([upstream_ID,])].index, inplace=True) + # Repeat for next highest upstream reach + return find_upstream_reaches_in_waterbodies(tmp_self, tmp_IDs) + return tmp_IDs + + if verbose: + print("Trimming stream branches in waterbodies ...") + + for branch in self[branch_id_attribute].astype(int).unique(): + tmp_self = self[self[branch_id_attribute].astype(int)==branch] + + # If entire branch is in waterbody + if all(tmp_self.Lake.values != -9999): + tmp_IDs = tmp_self.From_Node.astype(int) + + else: + # Find bottom up + tmp_IDs = find_downstream_reaches_in_waterbodies(tmp_self) + + # Find top down + tmp_IDs = find_upstream_reaches_in_waterbodies(tmp_self, tmp_IDs) + + if len(tmp_IDs) > 0: + self.drop(self[self.From_Node.astype(int).isin(tmp_IDs)].index, inplace=True) + + return(self) + + + def remove_branches_in_waterbodies(self, + waterbodies, + out_vector_files=None, + verbose=False + ): + """ + Removes branches completely in waterbodies + """ + + if verbose: + print('Removing branches in waterbodies') + + # load waterbodies + if isinstance(waterbodies,str) and isfile(waterbodies): + waterbodies = gpd.read_file(waterbodies) + + if isinstance(waterbodies,gpd.GeoDataFrame): + # Find branches in waterbodies + sjoined = gpd.sjoin(self, waterbodies, op='within') + self.drop(sjoined.index, inplace=True) + + if out_vector_files is not None: + + if verbose: + print("Writing pruned branches ...") + + self.write(out_vector_files, index=False) + + return(self) + + + def derive_stream_branches(self,toNode_attribute='ToNode', + fromNode_attribute='FromNode', + upstreams=None, + outlet_attribute='outlet_id', + branch_id_attribute='branchID', + reach_id_attribute='ID', + comparison_attributes='StreamOrde', + comparison_function=max, + max_branch_id_digits=6, + verbose=False): + + """ Derives stream branches """ + + # checks inputs + allowed_comparison_function = {max,min} + if comparison_function not in allowed_comparison_function: + raise ValueError(f"Only {allowed_comparison_function} comparison functions allowed") + + # sets index of stream branches as reach id attribute + reset_index = False + if self.index.name != reach_id_attribute: + self.set_index(reach_id_attribute,drop=True,inplace=True) + reset_index = True + + # make upstream and downstream dictionaries if none are passed + if upstreams is None: + upstreams,_ = self.make_up_and_downstream_dictionaries(reach_id_attribute=reach_id_attribute, + toNode_attribute=toNode_attribute, + fromNode_attribute=fromNode_attribute, + verbose=verbose) + + # initialize empty queue, visited set, branch attribute column, and all toNodes set + Q = deque() + visited = set() + self[branch_id_attribute] = [-1] * len(self) + + # progress bar + progress = tqdm(total=len(self),disable=(not verbose),desc='Stream branches') + + outlet_boolean_mask = self[outlet_attribute] >= 0 + outlet_reach_ids = self.index[outlet_boolean_mask].tolist() + + branch_ids = [ str(h)[0:4] + str(b+1).zfill(max_branch_id_digits) for b,h in enumerate(outlet_reach_ids) ] + + self.loc[outlet_reach_ids,branch_id_attribute] = branch_ids + Q = deque(outlet_reach_ids) + visited = set(outlet_reach_ids) + bid = int(branch_ids[-1][-max_branch_id_digits:].lstrip('0')) + 1 + progress.update(bid-1) + + # breath-first traversal + # while queue contains reaches + while Q: + + # pop current reach id from queue + current_reach_id = Q.popleft() + + # update progress + progress.update(1) + + # get current reach stream order and branch id + # current_reach_comparison_value = self.at[current_reach_id,comparison_attributes] + current_reach_branch_id = self.at[current_reach_id,branch_id_attribute] + + # get upstream ids + upstream_ids = upstreams[current_reach_id] + + # identify upstreams by finding if fromNode exists in set of all toNodes + if upstream_ids: + + # determine if each upstream has been visited or not + not_visited_upstream_ids = [] # list to save not visited upstreams + for us in upstream_ids: + + # if upstream id has not been visited + if us not in visited: + + # add to visited set and to queue + visited.add(us) + Q.append(us) + not_visited_upstream_ids += [us] + + # if upstreams that are not visited exist + if not_visited_upstream_ids: + + # find + upstream_reaches_compare_values = self.loc[not_visited_upstream_ids,comparison_attributes] + # matching_value = comparison_function(upstream_reaches_compare_values) + + #================================================================================== + # If the two stream orders aren't the same, then follow the highest order, otherwise use arbolate sum + if upstream_reaches_compare_values.idxmax()['order_'] == upstream_reaches_compare_values.idxmin()['order_']: + decision_attribute = 'arbolate_sum' + else: + decision_attribute = 'order_' + # Continue the current branch up the larger stream + continue_id = upstream_reaches_compare_values.idxmax()[decision_attribute] + self.loc[continue_id,branch_id_attribute] = current_reach_branch_id + # Create a new level path for the smaller tributary(ies) + if len(not_visited_upstream_ids) == 1: continue # only create a new branch if there are 2 upstreams + new_upstream_branches = upstream_reaches_compare_values.loc[~upstream_reaches_compare_values.index.isin([continue_id,])] + for new_up_id in new_upstream_branches.index: + branch_id = str(current_reach_branch_id)[0:4] + str(bid).zfill(max_branch_id_digits) + self.loc[new_up_id,branch_id_attribute] = branch_id + bid += 1 + #================================================================================== + ''' NOTE: The above logic uses stream order to override arbolate sum. Use the commented section + below if this turns out to be a bad idea!''' + #matches = 0 # if upstream matches are more than 1, limits to only one match + #for usrcv,nvus in zip(upstream_reaches_compare_values,not_visited_upstream_ids): + # if (usrcv == matching_value) & (matches == 0): + # self.at[nvus,branch_id_attribute] = current_reach_branch_id + # matches += 1 + # else: + # branch_id = str(current_reach_branch_id)[0:4] + str(bid).zfill(max_branch_id_digits) + # self.at[nvus,branch_id_attribute] = branch_id + # bid += 1 + + progress.close() + + if reset_index: + self.reset_index(drop=False,inplace=True) + + return(self) + + + def make_up_and_downstream_dictionaries(self,reach_id_attribute='ID', + toNode_attribute='ToNode', + fromNode_attribute='FromNode', + verbose=False): + + # sets index of stream branches as reach id attribute + #if self.index.name != reach_id_attribute: + # self.set_index(reach_id_attribute,drop=True,inplace=True) + + # find upstream and downstream dictionaries + upstreams,downstreams = dict(),dict() + + for _, row in tqdm(self.iterrows(),disable=(not verbose), + total=len(self), desc='Upstream and downstream dictionaries'): + + reach_id = row[reach_id_attribute] + downstreams[reach_id] = self.loc[ self[fromNode_attribute] == row[toNode_attribute] , reach_id_attribute].tolist() + upstreams[reach_id] = self.loc[ self[toNode_attribute] == row[fromNode_attribute] , reach_id_attribute].tolist() + + return(upstreams,downstreams) + + + def get_arbolate_sum(self,arbolate_sum_attribute='arbolate_sum',inlets_attribute='inlet_id', + reach_id_attribute='ID',length_conversion_factor_to_km = 0.001, + upstreams=None, downstreams=None, + toNode_attribute='ToNode', + fromNode_attribute='FromNode', + verbose=False + ): + + # sets index of stream branches as reach id attribute + reset_index = False + if self.index.name != reach_id_attribute: + self.set_index(reach_id_attribute,drop=True,inplace=True) + reset_index = True + + # make upstream and downstream dictionaries if none are passed + if (upstreams is None) | (downstreams is None): + upstreams, downstreams = self.make_up_and_downstream_dictionaries(reach_id_attribute=reach_id_attribute, + toNode_attribute=toNode_attribute, + fromNode_attribute=fromNode_attribute, + verbose=verbose) + + # initialize queue, visited set, with inlet reach ids + inlet_reach_ids = self.index[self[inlets_attribute] >= 0].tolist() + S = deque(inlet_reach_ids) + visited = set() + + # initialize arbolate sum, make length km column, make all from nodes set + self[arbolate_sum_attribute] = self.geometry.length * length_conversion_factor_to_km + + # progress bar + progress = tqdm(total=len(self),disable=(not verbose), desc= "Arbolate sums") + + # depth-first traversal + # while stack contains reaches + while S: + + # pop current reach id from queue + current_reach_id = S.pop() + + # current arbolate sum + current_reach_arbolate_sum = self.at[current_reach_id,arbolate_sum_attribute] + + # if current reach id is not visited mark as visited + if current_reach_id not in visited: + visited.add(current_reach_id) + progress.update(n=1) + + # get downstream ids + downstream_ids = downstreams[current_reach_id] + + if downstream_ids: + + for ds in downstream_ids: + + # figure out of all upstream reaches of ds have been visited + upstream_of_ds_ids = set(upstreams[ds]) + all_upstream_ids_of_ds_are_visited = upstream_of_ds_ids.issubset(visited) + + # append downstream to stack + if all_upstream_ids_of_ds_are_visited: + S.append(ds) + + self.loc[ds,arbolate_sum_attribute] += current_reach_arbolate_sum + + progress.close() + + if reset_index: + self.reset_index(drop=False,inplace=True) + + return(self) + + + def dissolve_by_branch(self, branch_id_attribute='LevelPathI', attribute_excluded='StreamOrde', + values_excluded=[1,2], out_vector_files=None, verbose=False): + + if verbose: + print("Dissolving by branch ...") + + # exclude attributes and their values + if (attribute_excluded is not None) & (values_excluded is not None): + values_excluded = set(values_excluded) + exclude_indices = [False if i in values_excluded else True for i in self[attribute_excluded]] + self = self.loc[exclude_indices,:] + + # dissolve lines + self['bids_temp'] = self.loc[:,branch_id_attribute].copy() + + # ensure the new stream order has the order from it's highest child + max_stream_order = self.groupby(branch_id_attribute).max()['order_'].copy() + + self = self.dissolve(by=branch_id_attribute) + self.rename(columns={'bids_temp' : branch_id_attribute},inplace=True) + + self["order_"] = max_stream_order.values + + # merges each multi-line string to a singular linestring + for lpid,row in tqdm(self.iterrows(),total=len(self),disable=(not verbose),desc="Merging mult-part geoms"): + if isinstance(row.geometry,MultiLineString): + merged_line = linemerge(row.geometry) + #self.loc[lpid,'geometry'] = merged_line + try: + self.loc[lpid,'geometry'] = merged_line + except ValueError: + merged_line = list(merged_line.geoms)[0] + self.loc[lpid,'geometry'] = merged_line + + #self[branch_id_attribute] = bids + self = StreamNetwork(self, branch_id_attribute=branch_id_attribute, + attribute_excluded=attribute_excluded, + values_excluded=values_excluded) + + if out_vector_files is not None: + + # base_file_path,extension = splitext(out_vector_files) + + if verbose: + print("Writing dissolved branches ...") + + #for bid in tqdm(self.loc[:,branch_id_attribute],total=len(self),disable=(not verbose)): + #out_vector_file = "{}_{}{}".format(base_file_path,bid,extension) + + #bid_indices = self.loc[:,branch_id_attribute] == bid + #current_stream_network = StreamNetwork(self.loc[bid_indices,:]) + + #current_stream_network.write(out_vector_file,index=False) + self.write(out_vector_files, index=False) + + return(self) + + + def derive_segments(self,inlets_attribute='inlet_id', reach_id_attribute='ID'): + pass + + + def conflate_branches(self,target_stream_network,branch_id_attribute_left='branch_id', + branch_id_attribute_right='branch_id', left_order_attribute='order_', + right_order_attribute='order_', + crosswalk_attribute='crosswalk_id', verbose=False): + + # get unique stream orders + orders = self.loc[:,right_order_attribute].unique() + + # make a dictionary of STR trees for every stream order + trees = { o:STRtree(target_stream_network.geometry.tolist()) for o in orders } + + # make the crosswalk id attribute and set index + self.loc[:,crosswalk_attribute] = [None] * len(self) + self.set_index(branch_id_attribute_left,inplace=True) + + # loop through rows of self + for idx,row in tqdm(self.iterrows(),total=len(self),disable=(not verbose),desc="Conflating branches"): + + g = row['geometry'] + o = row[left_order_attribute] + + tree = trees[o] + + # find nearest geom in target and its index + matching_geom = tree.nearest(g) + match_idx = target_stream_network.geometry == matching_geom + + # get the branch ids + right_branch_id = int(target_stream_network.loc[match_idx,branch_id_attribute_left]) + left_branch_id = idx + + # save the target matching branch id + self.loc[left_branch_id,crosswalk_attribute] = right_branch_id + + # reset indices + self.reset_index(inplace=True,drop=False) + + return(self) + + + def explode_to_points(self,reach_id_attribute='ID', sampling_size=None, + verbose=False): + + points_gdf = self.copy() + points_gdf.reset_index(inplace=True,drop=True) + + all_exploded_points = [None] * len(points_gdf) + for idx,row in tqdm(self.iterrows(),total=len(self),disable=(not verbose),desc='Exploding Points'): + + geom = row['geometry'] + + exploded_points = [p for p in iter(geom.coords)] + + if sampling_size is None: + exploded_points = MultiPoint(exploded_points) + else: + try: + exploded_points = MultiPoint( sample(exploded_points,sampling_size) ) + except ValueError: + exploded_points = MultiPoint( exploded_points ) + + all_exploded_points[idx] = exploded_points + + points_gdf['geometry'] = all_exploded_points + + points_gdf = points_gdf.explode() + points_gdf.reset_index(inplace=True,drop=True) + + return(points_gdf) + + @staticmethod + def conflate_points(source_points,target_points,source_reach_id_attribute,target_reach_id_attribute,verbose=False): + + tree = STRtree(target_points.geometry.tolist()) + + # find matching geometry + matches_dict = dict.fromkeys(source_points.loc[:,source_reach_id_attribute].astype(int).tolist(),[]) + for idx,row in tqdm(source_points.iterrows(),total=len(source_points),disable=(not verbose),desc="Conflating points"): + + geom = row['geometry'] + nearest_target_point = tree.nearest(geom) + match_idx = target_points.index[target_points.geometry == nearest_target_point].tolist() + + if len(match_idx) > 1: + match_idx = match_idx[0] + else: + match_idx = match_idx[0] + + matched_id = int(target_points.loc[match_idx,target_reach_id_attribute]) + source_id = int(row[source_reach_id_attribute]) + matches_dict[source_id] = matches_dict[source_id] + [matched_id] + + #if len(matches_dict[source_id])>1: + # print(matches_dict[source_id]) + + # get mode of matches + if verbose: + print("Finding mode of matches ...") + + for source_id,matches in matches_dict.items(): + majority = mode(matches).mode + matches_dict[source_id] = majority[0] + + + # make dataframe + if verbose: + print("Generating crosswalk table ...") + + crosswalk_table = pd.DataFrame.from_dict(matches_dict,orient='index', + columns=[target_reach_id_attribute]) + crosswalk_table.index.name = source_reach_id_attribute + + + return(crosswalk_table) + + + def clip(self,mask,keep_geom_type=False,verbose=False): + + if verbose: + print("Clipping streams to mask ...") + + # load mask + if isinstance(mask,gpd.GeoDataFrame): + pass + elif isinstance(mask,str): + mask = gpd.read_file(mask) + else: + raise TypeError("mask needs to be GeoDataFame or path to vector file") + + + branch_id_attribute = self.branch_id_attribute + attribute_excluded = self.attribute_excluded + values_excluded = self.values_excluded + + self = StreamNetwork( + gpd.clip(self,mask,keep_geom_type).reset_index(drop=True), + branch_id_attribute=branch_id_attribute, + attribute_excluded=attribute_excluded, + values_excluded=values_excluded) + + return(self) + + +class StreamBranchPolygons(StreamNetwork): + + #branch_id_attribute = None + #values_excluded = None + #attribute_excluded = None + + def __init__(self,*args,**kwargs): + super().__init__(*args,**kwargs) + + + @classmethod + def buffer_stream_branches(cls,stream_network,buffer_distance,verbose=True): + + """ Buffers stream branches by distance """ + + if verbose: + print("Buffering stream branches to polygons") + + buffer_distance = int(buffer_distance) + + # buffer lines + new_bids = [None] *len(stream_network) ; new_geoms = new_bids.copy() + i=0 + + for _,row in tqdm(stream_network.iterrows(),disable=(not verbose),total=len(stream_network)): + new_geoms[i] = row[stream_network.geometry.name].buffer(buffer_distance) + new_bids[i] = i + 1 + i += 1 + + # create polys gpd + polys = stream_network.copy() + + # assign to StreamBranchPolys + #polys[stream_network.branch_id_attribute] = new_bids + polys[stream_network.geom_name] = new_geoms + polys.set_geometry(stream_network.geom_name) + + # assign class and attributes + polys = cls(polys,branch_id_attribute=stream_network.branch_id_attribute, + attribute_excluded=stream_network.attribute_excluded, + values_excluded=stream_network.values_excluded) + + return(polys) + + @staticmethod + def query_vectors_by_branch(vector,branch_ids,branch_id_attribute,out_filename_template=None,vector_layer=None): + + # load vaas + if isinstance(vector,str): + vector_filename = vector + #vector = gpd.read_file(vector_filename,layer=vector_layer) + vector = fiona.open(vector_filename,'r',layer=vector_layer) + elif isinstance(vector,fiona.Collection): + pass + else: + raise TypeError('Pass vector argument as filepath or fiona collection') + + + def __find_matching_record(vector,attribute,value,matching='first'): + + if matching not in ('first','all'): + raise ValueError("matching needs to be \'first\' or \'all\'") + + matches = [] + for rec in vector: + if rec['properties'][attribute] == value: + if matching == 'first': + matches = [ rec ] + break + elif matching == 'all': + matches += [ rec ] + + return(matches) + + + # get source information + source_meta = vector.meta + + # out records + out_records = [] + + for bid in branch_ids: + out_records += __find_matching_record(vector,branch_id_attribute,bid,matching='all') + + if (out_filename_template is not None) & ( len(out_records) != 0): + base, ext = os.path.splitext(out_filename_template) + out_filename = base + "_{}".format(bid) + ext + + with fiona.open(out_filename,'w',**source_meta) as out_file: + out_file.writerecords(out_records) + + # close + vector.close() + + return(out_records) + + + def clip(self,to_clip,out_filename_template=None,branch_id=None,branch_id_attribute=None): + + """ Clips a raster or vector to the stream branch polygons """ + + fileType = "raster" #default + + # load raster or vector file + if isinstance(to_clip,DatasetReader): #if already rasterio dataset + pass + elif isinstance(to_clip,str): # if a string + + try: # tries to open as rasterio first then geopanda + to_clip = rasterio.open(to_clip,'r') + except rasterio.errors.RasterioIOError: + try: + to_clip = gpd.read_file(to_clip) + fileType = "vector" + except DriverError: + raise IOError("{} file not found".format(to_clip)) + + elif isinstance(to_clip,gpd.GeoDataFrame): # if a geopanda dataframe + fileType = "vector" + else: + raise TypeError("Pass rasterio dataset,geopandas GeoDataFrame, or filepath to raster or vector file") + + # generator to iterate + if branch_id is not None: + #print(iter(tuple([0,self.loc[self.loc[:,branch_id_attribute]==branch_id,:].squeeze()]))) + generator_to_iterate = enumerate( [self.loc[self.loc[:,branch_id_attribute]==branch_id,:].squeeze()] ) + else: + generator_to_iterate = self.iterrows() + + return_list = [] # list to return rasterio objects or gdf's + + if fileType == "raster": + buffered_meta = to_clip.meta.copy() + buffered_meta.update(blockxsize=256, blockysize=256, tiled=True) + + for i,row in generator_to_iterate: + buffered_array,buffered_transform = mask(to_clip,[row[self.geom_name]],crop=True) + + buffered_meta.update(height = buffered_array.shape[1], + width = buffered_array.shape[2], + transform = buffered_transform + ) + + # write out files + if out_filename_template is not None: + branch_id = row[self.branch_id_attribute] + + base, ext = os.path.splitext(out_filename_template) + out_filename = base + "_{}".format(branch_id) + ext + + with rasterio.open(out_filename,'w',**buffered_meta) as out: + out.write(buffered_array) + + # return files in list + return_list += [out] + + out.close() + + if fileType == "vector": + for i,row in generator_to_iterate: + branch_id = row[self.branch_id_attribute] + out = gpd.clip(to_clip,row[self.geom_name],keep_geom_type=True) + return_list += [out] + + if (out_filename_template is not None) & (not out.empty): + base, ext = os.path.splitext(out_filename_template) + out_filename = base + "_{}".format(branch_id) + ext + StreamNetwork.write(out,out_filename) + + return(return_list) diff --git a/src/subdiv_chan_obank_src.py b/src/subdiv_chan_obank_src.py new file mode 100755 index 000000000..ae09508df --- /dev/null +++ b/src/subdiv_chan_obank_src.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python3 +import os +import sys +import argparse +import datetime as dt +import matplotlib.pyplot as plt +import multiprocessing +import numpy as np +import pandas as pd +import re +import seaborn as sns +import shutil +import traceback +import warnings +from functools import reduce +from multiprocessing import Pool +from os.path import isfile, join, dirname, isdir +from pathlib import Path +from tqdm import tqdm + +sns.set_theme(style="whitegrid") +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Compute channel geomety and Manning's equation using subdivision method (separate in-channel vs. overbank). + Also apply unique Manning's n-values for channel and overbank using a user provided feature_id csv + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + mann_n_table : str + Path to a csv file containing Manning's n values by feature_id (must contain variables "feature_id", "channel_n", "overbank_n") + file_suffix : str + Optional: Suffix to append to the output log file + number_of_jobs : str + Number of jobs. + src_plot_option : str + Optional (True or False): use this flag to crate src plots for all hydroids (long run time) +""" + +def variable_mannings_calc(args): + + in_src_bankfull_filename = args[0] + df_mann = args[1] + huc = args[2] + branch_id = args[3] + htable_filename = args[4] + output_suffix = args[5] + src_plot_option = args[6] + huc_output_dir = args[7] + + ## Read the src_full_crosswalked.csv + log_text = 'Calculating modified SRC: ' + str(huc) + ' branch id: ' + str(branch_id) + '\n' + try: + df_src_orig = pd.read_csv(in_src_bankfull_filename,dtype={'feature_id': 'int64'}) + + ## Check that the channel ratio column the user specified exists in the def + if 'Stage_bankfull' not in df_src_orig.columns: + print('WARNING --> ' + str(huc) + ' branch id: ' + str(branch_id) + in_src_bankfull_filename + ' does not contain the specified bankfull column: ' + 'Stage_bankfull') + print('Skipping --> ' + str(huc) + ' branch id: ' + str(branch_id)) + log_text += 'WARNING --> ' + str(huc) + ' branch id: ' + str(branch_id) + in_src_bankfull_filename + ' does not contain the specified bankfull column: ' + 'Stage_bankfull' + '\n' + else: + df_src_orig.drop(['channel_n','overbank_n','subdiv_applied','Discharge (m3s-1)_subdiv','Volume_chan (m3)','Volume_obank (m3)','BedArea_chan (m2)','BedArea_obank (m2)','WettedPerimeter_chan (m)','WettedPerimeter_obank (m)'], axis=1, inplace=True, errors='ignore') # drop these cols (in case vmann was previously performed) + + ## Calculate subdiv geometry variables + print('Calculating subdiv variables for SRC: ' + str(huc) + ' branch id: ' + str(branch_id)) + log_text = 'Calculating subdiv variables for SRC: ' + str(huc) + ' branch id: ' + str(branch_id) + '\n' + df_src = subdiv_geometry(df_src_orig) + + ## Merge (crosswalk) the df of Manning's n with the SRC df (using the channel/fplain delination in the 'Stage_bankfull') + df_src = df_src.merge(df_mann, how='left', on='feature_id') + check_null = df_src['channel_n'].isnull().sum() + df_src['overbank_n'].isnull().sum() + if check_null > 0: + log_text += str(huc) + ' branch id: ' + str(branch_id) + ' --> ' + 'Null feature_ids found in crosswalk btw roughness dataframe and src dataframe' + ' --> missing entries= ' + str(check_null/84) + '\n' + + ## Check if there are any missing data in the 'Stage_bankfull' column (these are locations where subdiv will not be applied) + df_src['subdiv_applied'] = np.where(df_src['Stage_bankfull'].isnull(), False, True) # create field to identify where vmann is applied (True=yes; False=no) + + ## Calculate Manning's equation discharge for channel, overbank, and total + df_src = subdiv_mannings_eq(df_src) + + ## Use the default discharge column when vmann is not being applied + df_src['Discharge (m3s-1)_subdiv'] = np.where(df_src['subdiv_applied']==False, df_src['Discharge (m3s-1)'], df_src['Discharge (m3s-1)_subdiv']) # reset the discharge value back to the original if vmann=false + + ## Output new SRC with bankfull column + df_src.to_csv(in_src_bankfull_filename,index=False) + + ## Output new hydroTable with updated discharge and ManningN column + df_src_trim = df_src[['HydroID','Stage','subdiv_applied','channel_n','overbank_n','Discharge (m3s-1)_subdiv']] + df_src_trim = df_src_trim.rename(columns={'Stage':'stage','Discharge (m3s-1)_subdiv': 'subdiv_discharge_cms'}) + df_src_trim['discharge_cms'] = df_src_trim['subdiv_discharge_cms'] # create a copy of vmann modified discharge (used to track future changes) + df_htable = pd.read_csv(htable_filename,dtype={'HUC': str}) + + ## drop the previously modified discharge column to be replaced with updated version + df_htable.drop(['subdiv_applied','discharge_cms','overbank_n','channel_n','subdiv_discharge_cms'], axis=1, errors='ignore', inplace=True) + df_htable = df_htable.merge(df_src_trim, how='left', left_on=['HydroID','stage'], right_on=['HydroID','stage']) + + ## Output new hydroTable csv + if output_suffix != "": + htable_filename = os.path.splitext(htable_filename)[0] + output_suffix + '.csv' + df_htable.to_csv(htable_filename,index=False) + + log_text += 'Completed: ' + str(huc) + + ## plot rating curves + if src_plot_option: + if isdir(huc_output_dir) == False: + os.mkdir(huc_output_dir) + generate_src_plot(df_src, huc_output_dir) + except Exception as ex: + summary = traceback.StackSummary.extract( + traceback.walk_stack(None)) + print('WARNING: ' + str(huc) + ' branch id: ' + str(branch_id) + " subdivision failed for some reason") + #print(f"*** {ex}") + #print(''.join(summary.format())) + log_text += 'ERROR --> ' + str(huc) + ' branch id: ' + str(branch_id) + " subdivision failed (details: " + (f"*** {ex}") + (''.join(summary.format())) + '\n' + + return(log_text) + +def subdiv_geometry(df_src): + + ## Calculate in-channel volume & bed area + df_src['Volume_chan (m3)'] = np.where(df_src['Stage']<=df_src['Stage_bankfull'], df_src['Volume (m3)'], (df_src['Volume_bankfull'] + ((df_src['Stage'] - df_src['Stage_bankfull']) * df_src['SurfArea_bankfull']))) + df_src['BedArea_chan (m2)'] = np.where(df_src['Stage']<=df_src['Stage_bankfull'], df_src['BedArea (m2)'], df_src['BedArea_bankfull']) + df_src['WettedPerimeter_chan (m)'] = np.where(df_src['Stage']<=df_src['Stage_bankfull'], (df_src['BedArea_chan (m2)']/df_src['LENGTHKM']/1000), (df_src['BedArea_chan (m2)']/df_src['LENGTHKM']/1000) + ((df_src['Stage'] - df_src['Stage_bankfull'])*2)) + + ## Calculate overbank volume & bed area + df_src['Volume_obank (m3)'] = np.where(df_src['Stage']>df_src['Stage_bankfull'], (df_src['Volume (m3)'] - df_src['Volume_chan (m3)']), 0.0) + df_src['BedArea_obank (m2)'] = np.where(df_src['Stage']>df_src['Stage_bankfull'], (df_src['BedArea (m2)'] - df_src['BedArea_chan (m2)']), 0.0) + df_src['WettedPerimeter_obank (m)'] = df_src['BedArea_obank (m2)']/df_src['LENGTHKM']/1000 + return(df_src) + +def subdiv_mannings_eq(df_src): + ## Calculate discharge (channel) using Manning's equation + df_src.drop(['WetArea_chan (m2)','HydraulicRadius_chan (m)','Discharge_chan (m3s-1)','Velocity_chan (m/s)'], axis=1, inplace=True, errors='ignore') # drop these cols (in case subdiv was previously performed) + df_src['WetArea_chan (m2)'] = df_src['Volume_chan (m3)']/df_src['LENGTHKM']/1000 + df_src['HydraulicRadius_chan (m)'] = df_src['WetArea_chan (m2)']/df_src['WettedPerimeter_chan (m)'] + df_src['HydraulicRadius_chan (m)'].fillna(0, inplace=True) + df_src['Discharge_chan (m3s-1)'] = df_src['WetArea_chan (m2)']* \ + pow(df_src['HydraulicRadius_chan (m)'],2.0/3)* \ + pow(df_src['SLOPE'],0.5)/df_src['channel_n'] + df_src['Velocity_chan (m/s)'] = df_src['Discharge_chan (m3s-1)']/df_src['WetArea_chan (m2)'] + df_src['Velocity_chan (m/s)'].fillna(0, inplace=True) + + ## Calculate discharge (overbank) using Manning's equation + df_src.drop(['WetArea_obank (m2)','HydraulicRadius_obank (m)','Discharge_obank (m3s-1)','Velocity_obank (m/s)'], axis=1, inplace=True, errors='ignore') # drop these cols (in case subdiv was previously performed) + df_src['WetArea_obank (m2)'] = df_src['Volume_obank (m3)']/df_src['LENGTHKM']/1000 + df_src['HydraulicRadius_obank (m)'] = df_src['WetArea_obank (m2)']/df_src['WettedPerimeter_obank (m)'] + df_src.replace([np.inf, -np.inf], np.nan, inplace=True) # need to replace inf instances (divide by 0) + df_src['HydraulicRadius_obank (m)'].fillna(0, inplace=True) + df_src['Discharge_obank (m3s-1)'] = df_src['WetArea_obank (m2)']* \ + pow(df_src['HydraulicRadius_obank (m)'],2.0/3)* \ + pow(df_src['SLOPE'],0.5)/df_src['overbank_n'] + df_src['Velocity_obank (m/s)'] = df_src['Discharge_obank (m3s-1)']/df_src['WetArea_obank (m2)'] + df_src['Velocity_obank (m/s)'].fillna(0, inplace=True) + + ## Calcuate the total of the subdivided discharge (channel + overbank) + df_src.drop(['Discharge (m3s-1)_subdiv'], axis=1, inplace=True, errors='ignore') # drop these cols (in case subdiv was previously performed) + df_src['Discharge (m3s-1)_subdiv'] = df_src['Discharge_chan (m3s-1)'] + df_src['Discharge_obank (m3s-1)'] + return(df_src) + +def generate_src_plot(df_src, plt_out_dir): + + ## create list of unique hydroids + hydroids = df_src.HydroID.unique().tolist() + + ## plot each hydroid SRC in the huc + for hydroid in hydroids: + print("Creating SRC plot: " + str(hydroid)) + plot_df = df_src.loc[df_src['HydroID'] == hydroid] + + f, ax = plt.subplots(figsize=(6.5, 6.5)) + ax.set_title(str(hydroid)) + sns.despine(f, left=True, bottom=True) + sns.scatterplot(x='Discharge (m3s-1)', y='Stage', data=plot_df, label="Orig SRC", ax=ax, color='blue') + sns.scatterplot(x='Discharge (m3s-1)_subdiv', y='Stage', data=plot_df, label="SRC w/ Subdiv", ax=ax, color='orange') + sns.scatterplot(x='Discharge_chan (m3s-1)', y='Stage', data=plot_df, label="SRC Channel", ax=ax, color='green', s=8) + sns.scatterplot(x='Discharge_obank (m3s-1)', y='Stage', data=plot_df, label="SRC Overbank", ax=ax, color='purple', s=8) + sns.lineplot(x='Discharge (m3s-1)', y='Stage_bankfull', data=plot_df, color='green', ax=ax) + plt.fill_between(plot_df['Discharge (m3s-1)'], plot_df['Stage_bankfull'],alpha=0.5) + plt.text(plot_df['Discharge (m3s-1)'].median(), plot_df['Stage_bankfull'].median(), "NWM Bankfull Approx: " + str(plot_df['Stage_bankfull'].median())) + ax.legend() + plt.savefig(plt_out_dir + os.sep + str(hydroid) + '_vmann.png',dpi=175, bbox_inches='tight') + plt.close() + +def multi_process(variable_mannings_calc, procs_list, log_file, number_of_jobs, verbose): + ## Initiate multiprocessing + available_cores = multiprocessing.cpu_count() + if number_of_jobs > available_cores: + number_of_jobs = available_cores - 2 + print("Provided job number exceeds the number of available cores. " + str(number_of_jobs) + " max jobs will be used instead.") + + print(f"Computing subdivided SRC and applying variable Manning's n to channel/overbank for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + if verbose: + map_output = tqdm(pool.imap(variable_mannings_calc, procs_list), total=len(procs_list)) + tuple(map_output) # fetch the lazy results + else: + map_output = pool.map(variable_mannings_calc, procs_list) + log_file.writelines(["%s\n" % item for item in map_output]) + +def run_prep(fim_dir,mann_n_table,output_suffix,number_of_jobs,verbose,src_plot_option): + procs_list = [] + + print('Writing progress to log file here: ' + str(join(fim_dir,'logs','subdiv_src_' + output_suffix + '.log'))) + print('This may take a few minutes...') + ## Create a time var to log run time + begin_time = dt.datetime.now() + + ## initiate log file + log_file = open(join(fim_dir,'logs','subdiv_src_' + output_suffix + '.log'),"w") + log_file.write('START TIME: ' + str(begin_time) + '\n') + log_file.write('#########################################################\n\n') + + ## Check that the input fim_dir exists + assert os.path.isdir(fim_dir), 'ERROR: could not find the input fim_dir location: ' + str(fim_dir) + ## Check that the manning's roughness input filepath exists and then read to dataframe + assert os.path.isfile(mann_n_table), 'Can not find the input roughness/feature_id file: ' + str(mann_n_table) + + ## Read the Manning's n csv (ensure that it contains feature_id, channel mannings, floodplain mannings) + print('Importing the Manning roughness data file: ' + mann_n_table) + df_mann = pd.read_csv(mann_n_table,dtype={'feature_id': 'int64'}) + if 'channel_n' not in df_mann.columns or 'overbank_n' not in df_mann.columns or 'feature_id' not in df_mann.columns: + print('Missing required data column ("feature_id","channel_n", and/or "overbank_n")!!! --> ' + df_mann) + else: + print('Running the variable_mannings_calc function...') + + ## Loop through hucs in the fim_dir and create list of variables to feed to multiprocessing + huc_list = os.listdir(fim_dir) + huc_list.sort() # sort huc_list for helping track progress in future print statments + for huc in huc_list: + #if huc != 'logs' and huc[-3:] != 'log' and huc[-4:] != '.csv': + if re.match('\d{8}', huc): + huc_branches_dir = os.path.join(fim_dir, huc,'branches') + for branch_id in os.listdir(huc_branches_dir): + branch_dir = os.path.join(huc_branches_dir,branch_id) + in_src_bankfull_filename = join(branch_dir,'src_full_crosswalked_' + branch_id + '.csv') + htable_filename = join(branch_dir,'hydroTable_' + branch_id + '.csv') + huc_plot_output_dir = join(branch_dir,'src_plots') + + if isfile(in_src_bankfull_filename) and isfile(htable_filename): + procs_list.append([in_src_bankfull_filename, df_mann, huc, branch_id, htable_filename, output_suffix, src_plot_option, huc_plot_output_dir]) + else: + print('HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + '\nWARNING --> can not find required file (src_full_crosswalked_bankfull_*.csv or hydroTable_*.csv) in the fim output dir: ' + str(branch_dir) + ' - skipping this branch!!!\n') + log_file.write('HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + '\nWARNING --> can not find required file (src_full_crosswalked_bankfull_*.csv or hydroTable_*.csv) in the fim output dir: ' + str(branch_dir) + ' - skipping this branch!!!\n') + + ## Pass huc procs_list to multiprocessing function + multi_process(variable_mannings_calc, procs_list, log_file, number_of_jobs, verbose) + + ## Record run time and close log file + end_time = dt.datetime.now() + log_file.write('END TIME: ' + str(end_time) + '\n') + tot_run_time = end_time - begin_time + log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) + log_file.close() + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Subdivide the default SRC to compute a seperate channel component and overbank component. Impliment user provided Manning's n values for in-channel vs. overbank flow. Recalculate Manning's eq for discharge") + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-mann','--mann-n-table',help="Path to a csv file containing Manning's n values by featureid",required=True,type=str) + parser.add_argument('-suff','--output-suffix',help="Suffix to append to the output log file (e.g. '_global_06_011')",default="",required=False,type=str) + parser.add_argument('-j','--number-of-jobs',help='OPTIONAL: number of workers (default=8)',required=False,default=8,type=int) + parser.add_argument('-vb','--verbose',help='OPTIONAL: verbose progress bar',required=False,default=None,action='store_true') + parser.add_argument('-plots','--src-plot-option',help='OPTIONAL flag: use this flag to create src plots for all hydroids. WARNING - long runtime',default=False,required=False, action='store_true') + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + mann_n_table = args['mann_n_table'] + output_suffix = args['output_suffix'] + number_of_jobs = args['number_of_jobs'] + verbose = bool(args['verbose']) + src_plot_option = args['src_plot_option'] + + run_prep(fim_dir,mann_n_table,output_suffix,number_of_jobs,verbose,src_plot_option) \ No newline at end of file diff --git a/src/subset_catch_list_by_branch_id.py b/src/subset_catch_list_by_branch_id.py new file mode 100755 index 000000000..606d4e99e --- /dev/null +++ b/src/subset_catch_list_by_branch_id.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +import pandas as pd +import geopandas as gpd +from stream_branches import StreamNetwork +from os.path import splitext +import argparse + +def Subset_catch_list(catch_list,stream_network,branch_id_attribute,branch_id_list=None,out_catch_list=None,verbose=False): + + if verbose: + print("Loading files ....") + + # loading files + catch_list = pd.read_csv(catch_list,sep=" ",header=None,skiprows=1) + catch_list.rename(columns={0:"HydroID",1:"slopes",2:"lengthKM",3:"areasqkm"},inplace=True) + stream_network = StreamNetwork.from_file(stream_network,branch_id_attribute=branch_id_attribute) + stream_network = StreamNetwork(stream_network.astype({'HydroID':int}),branch_id_attribute=branch_id_attribute) + + if verbose: + print("Merging HydroIDs ... ") + catch_list = catch_list.merge(stream_network.loc[:,["HydroID",branch_id_attribute]],on='HydroID',how='inner') + + unique_branch_ids = catch_list.loc[:,branch_id_attribute].sort_values().unique() + base_file_path,extension = splitext(out_catch_list) + + if branch_id_list: + # write unique branch ids to file + if verbose: + print("Writing branch id list ...") + + unique_branch_ids.tofile(branch_id_list,sep="\n") + + if verbose: + print("Writing catch list subsets ...") + for bid in unique_branch_ids: + + # subsetting to branch id and getting number of hydroids + branch_catch_list = catch_list.loc[ catch_list.loc[:,branch_id_attribute] == bid, : ] + num_of_hydroIDs = len(branch_catch_list) + + # dropping branch id attribute + branch_catch_list = branch_catch_list.drop(columns=branch_id_attribute) + + # out file name management + out_branch_catch_list = "{}_{}{}".format(base_file_path,bid,extension) + + # write number of hydroids + with open(out_branch_catch_list,'w') as f: + f.write("{}\n".format(num_of_hydroIDs)) + + # write out catch list in append mode + branch_catch_list.to_csv(out_branch_catch_list, mode = 'a',header=False,sep=" ",index=False) + + return(catch_list) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Subsets catch list to branch scale') + parser.add_argument('-c','--catch-list', help='Input catchment list', required=True) + parser.add_argument('-s','--stream-network', help='Stream Network with HydroIDs and Branch IDs', required=True) + parser.add_argument('-b','--branch-id-attribute', help='Name of the branch attribute desired', required=True) + parser.add_argument('-l','--branch-id-list', help='Output the branch id list file desired', required=False,default=None) + parser.add_argument('-o','--out-catch-list', help='Output catchment list', required=False,default=None) + parser.add_argument('-v','--verbose', help='Verbose output', required=False,default=False,action='store_true') + + args = vars(parser.parse_args()) + + Subset_catch_list(**args) diff --git a/src/time_and_tee_run_by_unit.sh b/src/time_and_tee_run_by_unit.sh deleted file mode 100755 index 431beef97..000000000 --- a/src/time_and_tee_run_by_unit.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -e - -if [[ "$mem" == "1" ]] ; then - mprof run -o $1.dat --include-children /usr/bin/time -v $srcDir/run_by_unit.sh $1 |& tee $outputRunDataDir/logs/$1.log - mprof plot -o $outputRunDataDir/logs/$1_memory $1.dat -else - /usr/bin/time -v $srcDir/run_by_unit.sh $1 |& tee $outputRunDataDir/logs/$1.log -fi - -exit ${PIPESTATUS[0]} diff --git a/src/toDo.md b/src/toDo.md new file mode 100755 index 000000000..53bd35845 --- /dev/null +++ b/src/toDo.md @@ -0,0 +1,24 @@ +# To Do List + +## Eval +- *Testing Architecture & Modularity:* The test case functionality requires more modularity. There should be a clear set of tools that abstract away the model or test cases used. + - User tools should allow any predicted raster and any benchmark raster to be compared given proper encoding. + - This function should be command line oriented and allow for a list or file to be evaluated using multiple CPUs + - Eval tools for FIM 3, Lisflood, GMS, etc can then wrap around these core eval tools + - Aggregation of metrics can be improved. The function create_metrics_metrics.py is spaghetti (15 indentations).See consolidate_metrics for modular tooling. Needs support for other test case formats. + - Try statements should only include very specific lines. Having very large try blocks (see run_test_case.py creates alot of issues in debugging nested modules). + - Any code put in if main == name part of a module is not exposed to the user if the main function is imported elsewhere in python. These code segments should modularized and grouped into functionality in main function. +- *NoForecastFound Error:* inundation.py throws a NoForecastFound exception when no matches are found between forecast and hydrotable. In levelpaths with no forecast this avoids creating an inundation raster for that area. This changes the spatial extent of GMS to exclude some areas that FR/MS would write as FNs (see for an example 13020102). +- *Evaluation extents:* The evaluation extents between FR, MS, GMS are not consistent. While this does have a small effect on CSI, MCC should be immune to it and generally shows a similar trend + + +## GMS +- *NWM Divergences:* Levelpath derivation doesn't handle divergences (eg 12020002). + - it shortens the effective length of levelpaths thus reducing the rating curve height of the most upstream catchment + - it also creates more levelpaths and likely increases computational time +- *Unique Level Path and Hydro IDs:* unique identifiers for HydroIDs with GMS. Maybe FIMID, then branch ID, then HydroID. +- *convenience wrapper for gms_run...sh:* Make a convenience wrapper for gms_run_unit.sh and gms_run_branch.sh. Be mindful of the two different processing loads and expose two different job numbers to the user. + - *Deny Listing for Units:* The files in the deny list for units is executed at the end of gms/run_by_unit.sh. This requires files used in run_by_branch.sh to be left while not necessary left behind. This should be moved later in the process possibly once the convenience wrapper is made. +- *Update clip_vectors_to_wbd.py:* Clipping way too many vectors for GMS purposes. This creates extra processing and storage requirements. +- *Stream order filtering:* You would likely gain computational time if you filtered out lower stream orders in the NWM for GMS input (creating a nwm_streams_gms.gpkg). By doing this you would need to mosaic with FR streams at the lower removed stream orders. GMS likely does very little at stream orders 1s and 2s and maybe even 3s and 4s.* +- *Levees to run_by_unit*: burning in levees at the branch scale is likely less efficient than at the unit scale. Consider moving those two modules to the run_by_unit script for gms diff --git a/src/unique_pixel_and_allocation.py b/src/unique_pixel_and_allocation.py index 0df89560c..b100d4c86 100755 --- a/src/unique_pixel_and_allocation.py +++ b/src/unique_pixel_and_allocation.py @@ -4,15 +4,16 @@ @author: trevor.grout """ +import os import rasterio import numpy as np import argparse -from r_grow_distance import r_grow_distance +import whitebox from utils.shared_functions import mem_profile @mem_profile -def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): +def stream_pixel_zones(stream_pixels, unique_stream_pixels): ''' This function will assign a unique ID for each stream pixel and writes to file. It then uses this raster to run GRASS r.grow.distance tool to create the allocation and proximity rasters required to complete the lateral thalweg conditioning. @@ -22,8 +23,6 @@ def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): Path to stream raster with value of 1. For example, demDerived_streamPixels.tif. unique_stream_pixels : STR Output path of raster containing unique ids for each stream pixel. - grass_workspace : STR - Path to temporary GRASS directory which is deleted. Returns ------- @@ -33,7 +32,15 @@ def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): Path to output allocation raster. ''' - + # Set wbt envs + wbt = whitebox.WhiteboxTools() + wbt.set_verbose_mode(False) + + workspace = os.path.dirname(unique_stream_pixels) + base = os.path.basename(unique_stream_pixels) + distance_grid = os.path.join(workspace,os.path.splitext(base)[0]+'_dist.tif') + allocation_grid = os.path.join(workspace,os.path.splitext(base)[0]+'_allo.tif') + # Import stream pixel raster with rasterio.open(stream_pixels) as temp: streams_profile = temp.profile @@ -42,8 +49,8 @@ def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): # Create array that matches shape of streams raster with unique values for each cell. Dataype is float64. unique_vals = np.arange(streams.size, dtype = 'float64').reshape(*streams.shape) - # At streams return the unique array value otherwise return NODATA value from input streams layer. NODATA value for demDerived_streamPixels.tif is -32768. - stream_pixel_values = np.where(streams == 1, unique_vals, streams_profile['nodata']) + # At streams return the unique array value otherwise return 0 values + stream_pixel_values = np.where(streams == 1, unique_vals, 0) # Reassign dtype to be float64 (needs to be float64) streams_profile.update(dtype = 'float64') @@ -53,8 +60,20 @@ def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): with rasterio.open(unique_stream_pixels, 'w', **streams_profile) as raster: raster.write(stream_pixel_values,1) - # Compute allocation and proximity grid using r.grow.distance. Output distance grid in meters. Set datatype for output allocation (needs to be float64) and proximity grids (float32). - distance_grid, allocation_grid = r_grow_distance(unique_stream_pixels, grass_workspace, 'Float32', 'Float64') + # Compute allocation and proximity grids. + wbt.euclidean_distance(stream_pixels,distance_grid) + wbt.euclidean_allocation(unique_stream_pixels,allocation_grid) + + with rasterio.open(allocation_grid) as allocation_ds: + allocation = allocation_ds.read(1) + allocation_profile = allocation_ds.profile + + # Add stream channel ids + allocation = np.where(allocation > 0, allocation, stream_pixel_values) + + with rasterio.open(allocation_grid, 'w', **allocation_profile) as allocation_ds: + allocation_ds.write(allocation, 1) + return distance_grid, allocation_grid @@ -65,7 +84,6 @@ def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): parser = argparse.ArgumentParser(description = 'Produce unique stream pixel values and allocation/proximity grids') parser.add_argument('-s', '--stream', help = 'raster to perform r.grow.distance', required = True) parser.add_argument('-o', '--out', help = 'output raster of unique ids for each stream pixel', required = True) - parser.add_argument('-g', '--grass_workspace', help = 'Temporary GRASS workspace', required = True) # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) @@ -73,7 +91,6 @@ def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): # Rename variable inputs stream_pixels = args['stream'] unique_stream_pixels = args['out'] - grass_workspace = args['grass_workspace'] # Run stream_pixel_zones - stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace) + stream_pixel_zones(stream_pixels, unique_stream_pixels) diff --git a/src/usgs_gage_aggregate.py b/src/usgs_gage_aggregate.py new file mode 100644 index 000000000..1a8993e83 --- /dev/null +++ b/src/usgs_gage_aggregate.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 + +import os +from os.path import join +import pandas as pd +import re +import argparse + +class HucDirectory(object): + + def __init__(self, path, limit_branches=[]): + + self.dir = path + self.name = os.path.basename(path) + self.limit_branches = limit_branches + + self.usgs_dtypes = {'location_id':str, + 'nws_lid':str, + 'feature_id':int, + 'HydroID':int, + 'levpa_id':str, + 'dem_elevation':float, + 'dem_adj_elevation':float, + 'order_':str, + 'LakeID':object, + 'HUC8':str, + 'snap_distance':float} + self.agg_usgs_elev_table = pd.DataFrame(columns=list(self.usgs_dtypes.keys())) + + def iter_branches(self): + + if self.limit_branches: + for branch in self.limit_branches: + yield (branch, join(self.dir, 'branches', branch)) + + else: + for branch in os.listdir(join(self.dir, 'branches')): + yield (branch, join(self.dir, 'branches', branch)) + + def usgs_elev_table(self, branch_path): + + usgs_elev_filename = join(branch_path, 'usgs_elev_table.csv') + if not os.path.isfile(usgs_elev_filename): + return + + usgs_elev_table = pd.read_csv(usgs_elev_filename, dtype=self.usgs_dtypes) + self.agg_usgs_elev_table = self.agg_usgs_elev_table.append(usgs_elev_table) + + + def agg_function(self): + + for branch_id, branch_path in self.iter_branches(): + + self.usgs_elev_table(branch_path) + + ## Other aggregate funtions can go here + + ## After all of the branches are visited, the code below will write the aggregates + if os.path.isfile(join(self.dir, 'usgs_elev_table.csv')): + os.remove(join(self.dir, 'usgs_elev_table.csv')) + + if not self.agg_usgs_elev_table.empty: + self.agg_usgs_elev_table.to_csv(join(self.dir, 'usgs_elev_table.csv'), index=False) + + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Aggregates usgs_elev_table.csv at the HUC level') + parser.add_argument('-fim','--fim_directory', help='Input FIM Directory', required=True) + parser.add_argument('-i','--fim_inputs', help='Input fim_inputs CSV file', required=False) + + args = vars(parser.parse_args()) + + fim_directory = args['fim_directory'] + fim_inputs = args['fim_inputs'] + assert os.path.isdir(fim_directory), f'{fim_directory} is not a valid directory' + + if fim_inputs: + fim_inputs = pd.read_csv(fim_inputs, header=None, names=['huc', 'levpa_id'],dtype=str) + + for huc in fim_inputs.huc.unique(): + + branches = fim_inputs.loc[fim_inputs.huc == huc, 'levpa_id'].tolist() + huc = HucDirectory(join(fim_directory, huc), limit_branches=branches) + huc.agg_function() + + else: + for huc_dir in [d for d in os.listdir(fim_directory) if re.match('\d{8}', d)]: + + huc = HucDirectory(join(fim_directory, huc_dir)) + huc.agg_function() + + + + + + diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index 3b7b54256..05ccfa07e 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 +import os import geopandas as gpd -import pandas as pd import rasterio import argparse -import pygeos -from shapely.wkb import dumps, loads import warnings from utils.shared_functions import mem_profile warnings.simplefilter("ignore") @@ -16,118 +14,155 @@ Parameters ---------- usgs_gages_filename : str - File name of USGS stations layer. - dem_filename : str - File name of original DEM. + File path of USGS stations subset layer. i.e. '/data/path/usgs_subset_gages.gpkg' input_flows_filename : str - File name of FIM streams layer. + File path of FIM streams layer. i.e. '/data/path/demDerived_reaches_split_filtered_3246000257.gpkg' input_catchment_filename : str - File name of FIM catchment layer. - wbd_buffer_filename : str - File name of buffered wbd. + File path of FIM catchment layer. i.e. '/data/path/gw_catchments_reaches_filtered_addedAttributes_3246000257.gpkg' + dem_filename : str + File path of original DEM. i.e. '/data/path/dem_meters_3246000257.tif' dem_adj_filename : str - File name of thalweg adjusted DEM. + File path of thalweg adjusted DEM. i.e. '/data/path/dem_thalwegCond_3246000257.tif' output_table_filename : str - File name of output table. + File path of output table. i.e. '/data/path/usgs_elev_table.csv' + branch_id: str + ID of the current branch i.e. '3246000257' ''' - -@mem_profile -def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,output_table_filename,extent): - - wbd_buffer = gpd.read_file(wbd_buffer_filename) - usgs_gages = gpd.read_file(usgs_gages_filename, mask=wbd_buffer) - dem_m = rasterio.open(dem_filename,'r') - input_flows = gpd.read_file(input_flows_filename) - input_catchment = gpd.read_file(input_catchment_filename) - dem_adj = rasterio.open(dem_adj_filename,'r') - - #MS extent use gages that are mainstem - if extent == "MS": - usgs_gages = usgs_gages.query('curve == "yes" & mainstem == "yes"') - #FR extent use gages that are not mainstem - if extent == "FR": - usgs_gages = usgs_gages.query('curve == "yes" & mainstem == "no"') - - if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) - - # Identify closest HydroID - closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) - closest_hydro_id = closest_catchment.filter(items=['location_id','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) - closest_hydro_id = closest_hydro_id.dropna() - - # Get USGS gages that are within catchment boundaries - usgs_gages = usgs_gages.loc[usgs_gages.location_id.isin(list(closest_hydro_id.location_id))] - - columns = ['location_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] - gage_data = [] - - # Move USGS gage to stream - for index, gage in usgs_gages.iterrows(): - - # Get stream attributes - hydro_id = closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].HydroID.item() - str_order = str(int(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].order_.item())) - min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].min_thal_elev.item(),2) - med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].med_thal_elev.item(),2) - max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].max_thal_elev.item(),2) - - # Convert headwater point geometries to WKB representation - wkb_gages = dumps(gage.geometry) - - # Create pygeos headwater point geometries from WKB representation - gage_bin_geom = pygeos.io.from_wkb(wkb_gages) - - # Closest segment to headwater - closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] - wkb_closest_stream = dumps(closest_stream.geometry.item()) - stream_bin_geom = pygeos.io.from_wkb(wkb_closest_stream) - - # Linear reference headwater to closest stream segment - gage_distance_to_line = pygeos.linear.line_locate_point(stream_bin_geom, gage_bin_geom) - referenced_gage = pygeos.linear.line_interpolate_point(stream_bin_geom, gage_distance_to_line) - - # Convert geometries to wkb representation - bin_referenced_gage = pygeos.io.to_wkb(referenced_gage) - - # Convert to shapely geometries - shply_referenced_gage = loads(bin_referenced_gage) - - # Sample rasters at adjusted gage - dem_m_elev = round(list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item(),2) - dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) - - # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table - site_elevations = [str(gage.location_id), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] - gage_data.append(site_elevations) - - elev_table = pd.DataFrame(gage_data, columns=columns) - - if not elev_table.empty: - elev_table.to_csv(output_table_filename,index=False) +class GageCrosswalk(object): + + def __init__(self, usgs_subset_gages_filename, branch_id): + + self.branch_id = branch_id + self.gages = self._load_gages(usgs_subset_gages_filename) + + def run_crosswalk(self, input_catchment_filename, input_flows_filename, dem_filename, dem_adj_filename, output_table_filename): + '''Run the gage crosswalk steps: 1) spatial join to branch catchments layer 2) snap sites to + the dem-derived flows 3) sample both dems at the snapped points 4) write the crosswalked points + to usgs_elev_table.csv + ''' + + if self.gages.empty: + print(f'There are no gages for branch {branch_id}') + os._exit(0) + # Spatial join to fim catchments + self.catchment_sjoin(input_catchment_filename) + if self.gages.empty: + print(f'There are no gages for branch {branch_id}') + os._exit(0) + # Snap to dem derived flow lines + self.snap_to_dem_derived_flows(input_flows_filename) + # Sample DEM and thalweg adjusted DEM + self.sample_dem(dem_filename, 'dem_elevation') + self.sample_dem(dem_adj_filename, 'dem_adj_elevation') + # Write to csv + num_gages = len(self.gages) + print(f"{num_gages} gage{'' if num_gages == 1 else 's'} in branch {self.branch_id}") + self.write(output_table_filename) + + + def _load_gages(self, gages_filename): + '''Reads gage geopackage from huc level and filters based on current branch id''' + + usgs_gages = gpd.read_file(gages_filename) + return usgs_gages[usgs_gages.levpa_id == self.branch_id] + + def catchment_sjoin(self, input_catchment_filename): + '''Spatial joins gages to FIM catchments''' + + input_catchments = gpd.read_file(input_catchment_filename, dtype={'HydroID':int}) + self.gages = gpd.sjoin(self.gages, input_catchments[['HydroID', 'LakeID', 'geometry']], how='inner') + + def snap_to_dem_derived_flows(self, input_flows_filename): + '''Joins to dem derived flow line and produces snap_distance and geometry_snapped for sampling DEMs on the thalweg''' + + input_flows = gpd.read_file(input_flows_filename) + input_flows['geometry_ln'] = input_flows.geometry + self.gages = self.gages.merge(input_flows[['HydroID', 'geometry_ln']], on='HydroID') + + # Snap each point to its feature_id line + self.gages['geometry_snapped'], self.gages['snap_distance'] = self.gages.apply(self.snap_to_line, axis=1,result_type='expand').T.values + self.gages.geometry_snapped = self.gages.geometry_snapped.astype('geometry') + + def sample_dem(self, dem_filename, column_name): + '''Sample an input DEM at snapped points. Make sure to run self.gages.set_geometry("geometry_snapped") before runnig + this method, otherwise the DEM will be sampled at the actual gage locations.''' + + coord_list = [(x,y) for x,y in zip(self.gages['geometry_snapped'].x , self.gages['geometry_snapped'].y)] + + with rasterio.open(dem_filename) as dem: + self.gages[column_name] = [x[0] for x in dem.sample(coord_list)] + + def write(self, output_table_filename): + '''Write to csv file''' + + # Prep and write out file + elev_table = self.gages.copy() + elev_table.loc[elev_table['location_id'] == elev_table['nws_lid'], 'location_id'] = None # set location_id to None where there isn't a gage + elev_table = elev_table[elev_table['location_id'].notna()] + + if not elev_table.empty: + elev_table.to_csv(output_table_filename, index=False) + + @staticmethod + def snap_to_line(row): + if not row.geometry_ln: + return (None, None) + snap_geom = row.geometry_ln.interpolate(row.geometry_ln.project(row.geometry)) + return (snap_geom, snap_geom.distance(row.geometry)) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Crosswalk USGS sites to HydroID and get elevations') - parser.add_argument('-gages','--usgs-gages-filename', help='USGS gages', required=True) - parser.add_argument('-dem','--dem-filename',help='DEM',required=True) + parser.add_argument('-gages','--usgs-gages-filename', help='USGS gage subset at the huc level', required=True) parser.add_argument('-flows','--input-flows-filename', help='DEM derived streams', required=True) parser.add_argument('-cat','--input-catchment-filename', help='DEM derived catchments', required=True) - parser.add_argument('-wbd','--wbd-buffer-filename', help='WBD buffer', required=True) + parser.add_argument('-dem','--dem-filename',help='DEM',required=True) parser.add_argument('-dem_adj','--dem-adj-filename', help='Thalweg adjusted DEM', required=True) parser.add_argument('-outtable','--output-table-filename', help='Table to append data', required=True) - parser.add_argument('-e', '--extent', help="extent configuration entered by user when running fim_run.sh", required = True) + parser.add_argument('-b','--branch-id', help='Branch ID used to filter the gages', type=str, required=True) args = vars(parser.parse_args()) usgs_gages_filename = args['usgs_gages_filename'] - dem_filename = args['dem_filename'] input_flows_filename = args['input_flows_filename'] input_catchment_filename = args['input_catchment_filename'] - wbd_buffer_filename = args['wbd_buffer_filename'] + dem_filename = args['dem_filename'] dem_adj_filename = args['dem_adj_filename'] output_table_filename = args['output_table_filename'] - extent = args['extent'] - - crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,output_table_filename, extent) + branch_id = args['branch_id'] + + assert os.path.isfile(usgs_gages_filename), f"The input file {usgs_gages_filename} does not exist." + + # Instantiate class + gage_crosswalk = GageCrosswalk(usgs_gages_filename, branch_id) + gage_crosswalk.run_crosswalk(input_catchment_filename, input_flows_filename, dem_filename, dem_adj_filename, output_table_filename) + +""" +Examples: + +python /foss_fim/src/usgs_gage_crosswalk.py -gages /outputs/carson_gms_bogus/02020005/usgs_subset_gages.gpkg \ + -flows /outputs/carson_gms_bogus/02020005/branches/3246000305/demDerived_reaches_split_filtered_3246000305.gpkg \ + -cat /outputs/carson_gms_bogus/02020005/branches/3246000305/gw_catchments_reaches_filtered_addedAttributes_3246000305.gpkg \ + -dem /outputs/carson_gms_bogus/02020005/branches/3246000305/dem_meters_3246000305.tif \ + -dem_adj /outputs/carson_gms_bogus/02020005/branches/3246000305/dem_thalwegCond_3246000305.tif \ + -outtable /outputs/carson_gms_bogus/02020005/branches/3246000305/usgs_elev_table.csv \ + -b 32460003 05 + +python /foss_fim/src/usgs_gage_crosswalk.py -gages /outputs/carson_gms_bogus/02020005/usgs_subset_gages.gpkg \ + -flows /outputs/carson_gms_bogus/02020005/branches/3246000257/demDerived_reaches_split_filtered_3246000257.gpkg \ + -cat /outputs/carson_gms_bogus/02020005/branches/3246000257/gw_catchments_reaches_filtered_addedAttributes_3246000257.gpkg \ + -dem /outputs/carson_gms_bogus/02020005/branches/3246000257/dem_meters_3246000257.tif \ + -dem_adj /outputs/carson_gms_bogus/02020005/branches/3246000257/dem_thalwegCond_3246000257.tif \ + -outtable /outputs/carson_gms_bogus/02020005/branches/3246000257/usgs_elev_table.csv \ + -b 32460002 57 + +python /foss_fim/src/usgs_gage_crosswalk.py -gages /outputs/carson_gage_test/04130001/usgs_subset_gages.gpkg \ + -flows /outputs/carson_gage_test/04130001/branches/9041000030/demDerived_reaches_split_filtered_9041000030.gpkg \ + -cat /outputs/carson_gage_test/04130001/branches/9041000030/gw_catchments_reaches_filtered_addedAttributes_9041000030.gpkg \ + -dem /outputs/carson_gage_test/04130001/branches/9041000030/dem_meters_9041000030.tif \ + -dem_adj /outputs/carson_gage_test/04130001/branches/904100030/dem_thalwegCond_0941000030.tif \ + -outtable /outputs/carson_gage_test/04130001/branches/9041000030/usgs_elev_table.csv \ + -b 90410000 30 +""" diff --git a/src/usgs_gage_unit_setup.py b/src/usgs_gage_unit_setup.py new file mode 100755 index 000000000..075ff961d --- /dev/null +++ b/src/usgs_gage_unit_setup.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 + +import os +from posixpath import dirname +import re +import geopandas as gpd +import pandas as pd +import argparse +import warnings +from utils.shared_functions import mem_profile +warnings.simplefilter("ignore") + +class Gage2Branch(object): + + def __init__(self, usgs_gage_filename, ahps_filename, huc8): + + self.usgs_gage_filename = usgs_gage_filename + self.ahps_filename = ahps_filename + self.huc8 = huc8 + self.load_gages() + + def load_gages(self): + + # Filter USGS gages to huc + usgs_gages = gpd.read_file(self.usgs_gage_filename) + self.gages = usgs_gages[(usgs_gages.HUC8 == self.huc8)] + + # Get AHPS sites within the HUC and add them to the USGS dataset + if self.ahps_filename: + ahps_sites = gpd.read_file(self.ahps_filename) + ahps_sites = ahps_sites[ahps_sites.HUC8 == self.huc8] # filter to HUC8 + ahps_sites.rename(columns={'nwm_feature_id':'feature_id', + 'usgs_site_code':'location_id'}, inplace=True) + ahps_sites = ahps_sites[ahps_sites.location_id.isna()] # Filter sites that are already in the USGS dataset + self.gages = self.gages.append(ahps_sites[['feature_id', 'nws_lid', 'location_id', 'HUC8', 'name', 'states','geometry']]) + + # Create gages attribute + self.gages.location_id.fillna(usgs_gages.nws_lid, inplace=True) + self.gages.loc[self.gages['nws_lid'] == 'Bogus_ID', 'nws_lid'] = None + + def sort_into_branch(self, nwm_subset_streams_levelPaths): + + nwm_reaches = gpd.read_file(nwm_subset_streams_levelPaths) + nwm_reaches.rename(columns={'ID':'feature_id'}, inplace=True) + + if not self.gages[self.gages.feature_id.isnull()].empty: + missing_feature_id = self.gages.loc[self.gages.feature_id.isnull()].copy() + nwm_reaches_union = nwm_reaches.geometry.unary_union + missing_feature_id['feature_id'] = missing_feature_id.apply(lambda row: self.sjoin_nearest_to_nwm(row.geometry, nwm_reaches, nwm_reaches_union), axis=1) + + self.gages.update(missing_feature_id) + + del nwm_reaches_union + + # Left join gages with NWM streams to get the level path + self.gages.feature_id = self.gages.feature_id.astype(int) + self.gages = self.gages.merge(nwm_reaches[['feature_id','levpa_id','order_']], on='feature_id', how='left') + return self.gages + + def branch_zero(self, bzero_id): + + # note that some gages will not have a valid "order_" attribute (not attributed to a level path in the step before - likely a gage on dropped stream order) + self.gages.levpa_id = bzero_id + return self.gages + + def write(self, out_name): + + self.gages.to_file(out_name,driver='GPKG',index=False) + + @staticmethod + def sjoin_nearest_to_nwm(pnt, lines, union): + + snap_geom = union.interpolate(union.project(pnt)) + queried_index = lines.geometry.sindex.query(snap_geom) + if len(queried_index): + return int(lines.iloc[queried_index[0]].feature_id.item()) + + @staticmethod + def filter_gage_branches(fim_inputs_filename): + + fim_dir = os.path.dirname(fim_inputs_filename) + fim_inputs = pd.read_csv(fim_inputs_filename, header=None, names=['huc', 'levpa_id'], + dtype={'huc':str, 'levpa_id':str}) + + for huc_dir in [d for d in os.listdir(fim_dir) if re.search('^\d{8}$', d)]: + + gage_file = os.path.join(fim_dir, huc_dir, 'usgs_subset_gages.gpkg') + if not os.path.isfile(gage_file): + fim_inputs.drop(fim_inputs.loc[fim_inputs.huc == huc_dir].index, inplace=True) + continue + + gages = gpd.read_file(gage_file) + level_paths = gages.levpa_id + fim_inputs.drop(fim_inputs.loc[(fim_inputs.huc == huc_dir) & (~fim_inputs.levpa_id.isin(level_paths))].index, inplace=True) + + fim_inputs.to_csv(fim_inputs_filename, index=False, header=False) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Assign HUC gages to branch and stage for usgs_gage_crosswalk.py') + parser.add_argument('-gages','--usgs-gages-filename', help='USGS gages', required=True) + parser.add_argument('-ahps','--nws-lid-filename', help='AHPS gages', required=False) + parser.add_argument('-nwm','--input-nwm-filename', help='NWM stream subset', required=True) + parser.add_argument('-o','--output-filename', help='Table to append data', required=True) + parser.add_argument('-huc','--huc8-id', help='HUC8 ID (to verify gage location huc)', type=str, required=True) + parser.add_argument('-bzero_id','--branch-zero-id', help='Branch zero ID value', type=str, required=True) + parser.add_argument('-ff','--filter-fim-inputs', help='WARNING: only run this parameter if you know exactly what you are doing', required=False) + + args = vars(parser.parse_args()) + + usgs_gages_filename = args['usgs_gages_filename'] + nws_lid_filename = args['nws_lid_filename'] + input_nwm_filename = args['input_nwm_filename'] + output_filename = args['output_filename'] + huc8 = args['huc8_id'] + bzero_id = args['branch_zero_id'] + filter_fim_inputs = args['filter_fim_inputs'] + + if not filter_fim_inputs: + + usgs_gage_subset = Gage2Branch(usgs_gages_filename, nws_lid_filename, huc8) + if usgs_gage_subset.gages.empty: + print(f'There are no gages identified for {huc8}') + os._exit(0) + usgs_gage_subset.sort_into_branch(input_nwm_filename) + usgs_gage_subset.write(output_filename) + + # Create seperate output for branch zero + output_filename_zero = os.path.splitext(output_filename)[0] + '_' + bzero_id + os.path.splitext(output_filename)[-1] + usgs_gage_subset.branch_zero(bzero_id) + usgs_gage_subset.write(output_filename_zero) + + else: + ''' + This is an easy way to filter fim_inputs so that only branches with gages will run during fim_process_unit_wb.sh. + + example: + python3 src/usgs_gage_unit_setup.py -gages x -ahps x -nwm x -o x -huc x -ff /outputs/test_output/fim_inputs.csv + ''' + assert os.path.isfile(filter_fim_inputs) + Gage2Branch.filter_gage_branches(filter_fim_inputs) + + diff --git a/src/utils/archive_cleanup.py b/src/utils/archive_cleanup.py deleted file mode 100755 index b0a08aafa..000000000 --- a/src/utils/archive_cleanup.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 -import os -import argparse - -def archive_cleanup(archive_cleanup_path, additional_whitelist): - ''' - Processes all archived job outputs from a given path to keep only necessary files - - Parameters - ---------- - archive_cleanup_path : STR - Path to the archived outputs - additional_whitelist : STR - Additional list of files to keep - ''' - - # List of files that will be saved by default - whitelist = [ - 'rem_zeroed_masked.tif', - 'rem_clipped_zeroed_masked.tif', - 'gw_catchments_reaches_filtered_addedAttributes.tif', - 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', - 'gw_catchments_reaches_clipped_addedAttributes.tif', - 'gw_catchments_reaches_clipped_addedAttributes_crosswalked.gpkg', - 'hydroTable.csv', - 'gw_catchments_pixels.tif', - 'dem_burned_filled.tif', - 'demDerived_reaches.dbf', - 'demDerived_reaches.prj', - 'demDerived_reaches.shp', - 'demDerived_reaches.shx' - ] - - # Add any additional files to the whitelist that the user wanted to keep - if additional_whitelist: - whitelist = whitelist + [filename for filename in additional_whitelist.split(',')] - - # Delete any non-whitelisted files - directory = os.fsencode(archive_cleanup_path) - for subdir in os.listdir(directory): - subdirname = os.fsdecode(subdir) - if subdirname != 'logs' and subdirname != 'aggregate_fim_outputs': - for file in os.listdir(os.path.join(archive_cleanup_path, subdirname)): - filename = os.fsdecode(file) - if filename not in whitelist: - os.remove(os.path.join(archive_cleanup_path, subdirname, filename)) - -if __name__ == '__main__': - #Parse arguments - parser = argparse.ArgumentParser(description = 'Cleanup archived output files') - parser.add_argument('archive_cleanup_path', type=str, help='Path to the archived job outputs') - parser.add_argument('-w', '--additional_whitelist', type=str, help='List of additional files to keep') - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - # Rename variable inputs - archive_cleanup_path = args['archive_cleanup_path'] - additional_whitelist = args['additional_whitelist'] - - # Run archive_cleanup - archive_cleanup(archive_cleanup_path, additional_whitelist) - \ No newline at end of file diff --git a/src/utils/fim_enums.py b/src/utils/fim_enums.py new file mode 100644 index 000000000..5af4d8451 --- /dev/null +++ b/src/utils/fim_enums.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +from enum import Enum + +class FIM_exit_codes(Enum): + ''' + This particular enum class allows for special system exit codes to be issued to tell different + code layers about what has happened. Currently, most of our code uses just sys.exit(0) and + sys.exit(1). + The list of enums lower (which can grow as needed) allows for more status to be return. + - Notes about system exit codes: + - Numerics of 0 to 255 are available. + - Some are already reserved can we can not use them for custom exit codes. Reserved codes are: + - 0: Success + - 1: Fail + - 2, 126, 127, 128, 130 and 255. + - see: https://tldp.org/LDP/abs/html/exitcodes.html + - More advanced combinations of codes can be used and we will keep it simple for now. + - Sample usage: + import utils/fim_enums + print(FIM_exit_codes.UNIT_NO_BRANCHES.value) -> 60 [this is used in derive_level_paths.py] + + - For more information : https://docs.python.org/3.11/howto/enum.html and https://docs.python.org/3/library/enum.html + ''' + + UNIT_NO_BRANCHES = 60 + NO_FLOWLINES_EXIST = 61 + EXCESS_UNIT_ERRORS = 62 + \ No newline at end of file diff --git a/src/utils/reproject_dem.py b/src/utils/reproject_dem.py index dba8f65de..056602b53 100755 --- a/src/utils/reproject_dem.py +++ b/src/utils/reproject_dem.py @@ -16,9 +16,15 @@ def reproject_dem(args): elev_cm = args[1] elev_cm_proj = args[2] reprojection = args[3] + overwrite = args[4] - if os.path.exists(elev_cm_proj): + if os.path.exists(elev_cm_proj) & overwrite: os.remove(elev_cm_proj) + elif not os.path.exists(elev_cm_proj): + pass + else: + print(f"Skipping {elev_cm_proj}. Raster already exists. Use overwrite option.") + return shutil.copy(elev_cm, elev_cm_proj) diff --git a/src/utils/shared_functions.py b/src/utils/shared_functions.py index b01533c7d..df4ce8224 100644 --- a/src/utils/shared_functions.py +++ b/src/utils/shared_functions.py @@ -1,11 +1,27 @@ #!/usr/bin/env python3 import os +import glob +import inspect +import re +import sys + +from concurrent.futures import as_completed +from datetime import datetime, timezone +from pathlib import Path from os.path import splitext -import rasterio + +import fiona +import geopandas as gp import numpy as np -from rasterio.warp import calculate_default_transform, reproject, Resampling +import pandas as pd +import rasterio +import utils.shared_variables as sv + from pyproj.crs import CRS +from rasterio.warp import calculate_default_transform, reproject, Resampling +from tqdm import tqdm + def getDriver(fileName): @@ -60,8 +76,6 @@ def run_system_command(args): def subset_wbd_gpkg(wbd_gpkg, multilayer_wbd_geopackage): - import geopandas as gp - from utils.shared_variables import CONUS_STATE_LIST, PREP_PROJECTION print("Subsetting " + wbd_gpkg + "...") # Read geopackage into dataframe. @@ -72,12 +86,12 @@ def subset_wbd_gpkg(wbd_gpkg, multilayer_wbd_geopackage): state = row["STATES"] if state != None: # Some polygons are empty in the STATES field. keep_flag = False # Default to Fault, i.e. to delete the polygon. - if state in CONUS_STATE_LIST: + if state in sv.CONUS_STATE_LIST: keep_flag = True # Only split if multiple states present. More efficient this way. elif len(state) > 2: for wbd_state in state.split(","): # Some polygons have multiple states, separated by a comma. - if wbd_state in CONUS_STATE_LIST: # Check each polygon to make sure it's state abbrev name is allowed. + if wbd_state in sv.CONUS_STATE_LIST: # Check each polygon to make sure it's state abbrev name is allowed. keep_flag = True break if not keep_flag: @@ -85,10 +99,21 @@ def subset_wbd_gpkg(wbd_gpkg, multilayer_wbd_geopackage): # Overwrite geopackage. layer_name = os.path.split(wbd_gpkg)[1].strip('.gpkg') - gdf.crs = PREP_PROJECTION + gdf.crs = sv.PREP_PROJECTION gdf.to_file(multilayer_wbd_geopackage, layer=layer_name,driver='GPKG',index=False) +def get_fossid_from_huc8(huc8_id,foss_id_attribute='fossid', + hucs=os.path.join(os.environ['inputDataDir'],'wbd','WBD_National.gpkg'), + hucs_layerName=None): + + hucs = fiona.open(hucs,'r',layer=hucs_layerName) + + for huc in hucs: + if huc['properties']['HUC8'] == huc8_id: + return(huc['properties'][foss_id_attribute]) + + def update_raster_profile(args): elev_cm_filename = args[0] @@ -97,6 +122,15 @@ def update_raster_profile(args): nodata_val = args[3] blocksize = args[4] keep_intermediate = args[5] + overwrite = args[6] + + if os.path.exists(elev_m_filename) & overwrite: + os.remove(elev_m_filename) + elif not os.path.exists(elev_m_filename): + pass + else: + print(f"Skipping {elev_m_filename}. Use overwrite option.") + return if isinstance(blocksize, int): pass @@ -111,29 +145,38 @@ def update_raster_profile(args): # Update nodata value and convert from cm to meters dem_cm = rasterio.open(elev_cm_filename) - + no_data = dem_cm.nodata - data = dem_cm.read(1) - - dem_m = np.where(data == int(no_data), nodata_val, (data/100).astype(rasterio.float32)) - - del data - + dem_m_profile = dem_cm.profile.copy() - dem_m_profile.update(driver='GTiff',tiled=True,nodata=nodata_val, blockxsize=blocksize, blockysize=blocksize, dtype='float32',crs=projection,compress='lzw',interleave='band') - with rasterio.open(elev_m_filename, "w", **dem_m_profile, BIGTIFF='YES') as dest: - dest.write(dem_m, indexes = 1) + dest = rasterio.open(elev_m_filename, "w", **dem_m_profile, BIGTIFF='YES') - if keep_intermediate == False: - os.remove(elev_cm_filename) + for idx,window in dem_cm.block_windows(1): + data = dem_cm.read(1,window=window) - del dem_m - dem_cm.close() + # wrote out output of this line as the same variable. + data = np.where(data == int(no_data), nodata_val, (data/100).astype(rasterio.float32)) + + # removed this line to avoid having two array copies of data. Kills memory usage + #del data + dest.write(data, indexes = 1, window=window) + + + # not necessary + #del dem_m + + dem_cm.close() + dest.close() + + if keep_intermediate == False: + os.remove(elev_cm_filename) + + return(elev_m_filename) ''' This function isn't currently used but is the preferred method for @@ -149,7 +192,12 @@ def update_raster_profile(args): ''' -def reproject_raster(input_raster_name,reprojection,blocksize=None,reprojected_raster_name=None): +def reproject_raster(input_raster_name, + reprojection, + blocksize=None, + reprojected_raster_name=None): + + # TODO: Might be removed (Nov 8, 2022), not in use if blocksize is not None: if isinstance(blocksize, int): @@ -217,3 +265,331 @@ def wrapper(*args, **kwargs): else: func(*args, **kwargs) return wrapper + + + +######################################################################## +#Function to check the age of a file (use for flagging potentially outdated input) +######################################################################## +def check_file_age(file): + ''' + Checks if file exists, determines the file age + Returns + ------- + None. + ''' + file = Path(file) + if file.is_file(): + modified_date = datetime.fromtimestamp(file.stat().st_mtime, tz=timezone.utc) + + return modified_date + + +######################################################################## +#Function to concatenate huc csv files to a single dataframe/csv +######################################################################## +def concat_huc_csv(fim_dir,csv_name): + ''' + Checks if huc csv file exist, concatenates contents of csv + Returns + ------- + None. + ''' + + merged_csv = [] + huc_list = os.listdir(fim_dir) + for huc in huc_list: + if huc != 'logs': + csv_file = os.path.join(fim_dir,huc,str(csv_name)) + if Path(csv_file).is_file(): + # Aggregate all of the individual huc elev_tables into one aggregate for accessing all data in one csv + read_csv = pd.read_csv(csv_file, dtype={'HUC8': object, 'location_id': object, 'feature_id': int, 'levpa_id': object}) + # Add huc field to dataframe + read_csv['HUC8'] = huc + merged_csv.append(read_csv) + + # Create and return a concatenated pd dataframe + if merged_csv: + print(f"Creating aggregate csv") + concat_df = pd.concat(merged_csv) + return concat_df + + +# ----------------------------------------------------------- +def progress_bar_handler(executor_dict, desc): + + for future in tqdm(as_completed(executor_dict), + total=len(executor_dict), + desc=desc + ): + try: + future.result() + except Exception as exc: + print('{}, {}, {}'.format(executor_dict[future],exc.__class__.__name__,exc)) + + +# ##################################### +class FIM_Helpers: + + # ----------------------------------------------------------- + @staticmethod + def append_id_to_file_name(file_name, identifier): + ''' + Processing: + Takes an incoming file name and inserts an identifier into the name + just ahead of the extension, with an underscore added. + ie) filename = "/output/myfolder/a_raster.tif" + indentifer = "13090001" + Becomes: "/output/myfolder/a_raster_13090001.tif" + Note: + - Can handle a single identifier or a list of identifier + ie) identifier = ["13090001", "123000001"] + Becomes: "/output/myfolder/a_raster_13090001_123000001.tif" + - This allows for file name to not be submitted and will return None + ------- + + Inputs: + file_name: a single file name + identifier: a value or list of values to be inserted with an underscore + added ahead of the extention + + ------- + Output: + out_file_name: A single name with each identifer added at the end before + the extension, each with an underscore in front of the identifier. + + ------- + Usage: + from utils.shared_functions import FIM_Helpers as fh + composite_file_output = fh.append_id_to_file_name(composite_file_output, huc) + ''' + + if file_name is not None: + + root,extension = os.path.splitext(file_name) + + if isinstance(identifier, list): + + out_file_name = root + for i in identifier: + out_file_name += "_{}".format(i) + out_file_name += extension + else: + out_file_name = root + "_{}".format(identifier) + extension + else: + out_file_name = None + + return(out_file_name) + + # ----------------------------------------------------------- + @staticmethod + def vprint (message, is_verbose, show_caller = False): + ''' + Processing: Will print a standard output message only when the + verbose flag is set to True + ------- + + Parameters: + message : str + The message for output + Note: this method puts a '...' in front of the message + is_verbose : bool + This exists so the call to vprint always exists and does not + need a "if verbose: test for inline code + If this value is False, this method will simply return + show_caller : bool + Sometimes, it is desired to see the calling function, method or class + + ------- + Returns: + str : the message starting with "... " and optionallly ending with + the calling function, method or class name + + ------- + Usage: + from utils.shared_functions import FIM_Helpers as fh + fh.vprint(f"Starting alpha test for {self.dir}", verbose) + ''' + if not is_verbose: + return + + msg = f"... {message}" + if (show_caller): + caller_name = inspect.stack()[1][3] + if (caller_name == " 0: - log_text += str(huc) + ' --> ' + 'Null feature_ids found in crosswalk btw roughness dataframe and src dataframe' + ' --> missing entries= ' + str(check_null/84) + '\n' - - ## Calculate composite Manning's n using the channel geometry ratio attribute given by user (e.g. chann_hradius_ratio or chann_vol_ratio) - df_src['comp_ManningN'] = (df_src[channel_ratio_src_column]*df_src['channel_n']) + ((1.0 - df_src[channel_ratio_src_column])*df_src['overbank_n']) - #print('Done calculating composite Manning n (' + channel_ratio_src_column + '): ' + str(huc)) - - ## Check if there are any missing data in the composite ManningN column - check_null_comp = df_src['comp_ManningN'].isnull().sum() - if check_null_comp > 0: - log_text += str(huc) + ' --> ' + 'Missing values in the comp_ManningN calculation' + ' --> missing entries= ' + str(check_null_comp/84) + '\n' - df_src['vmann_on'] = np.where(df_src['comp_ManningN'].isnull(), False, True) # create field to identify where vmann is applied (True=yes; False=no) - - ## Define the channel geometry variable names to use from the src - hydr_radius = 'HydraulicRadius (m)' - wet_area = 'WetArea (m2)' - - ## Calculate Q using Manning's equation - #df_src.rename(columns={'Discharge (m3s-1)'}, inplace=True) # rename the previous Discharge column - df_src['Discharge (m3s-1)_varMann'] = df_src[wet_area]* \ - pow(df_src[hydr_radius],2.0/3)* \ - pow(df_src['SLOPE'],0.5)/df_src['comp_ManningN'] - - ## Set Q values to 0 and -999 for specified criteria - df_src['Discharge (m3s-1)_varMann'].mask(df_src['Stage'] == 0,0,inplace=True) - if 'Thalweg_burn_elev' in df_src: - df_src['Discharge (m3s-1)_varMann'].mask(df_src['Stage'] == df_src['Thalweg_burn_elev'],0,inplace=True) - df_src['Discharge (m3s-1)_varMann'].mask(df_src['Stage'] < df_src['Thalweg_burn_elev'],-999,inplace=True) - - ## Use the default discharge column when vmann is not being applied - df_src['Discharge (m3s-1)_varMann'] = np.where(df_src['vmann_on']==False, df_src['default_Discharge (m3s-1)'], df_src['Discharge (m3s-1)_varMann']) # reset the discharge value back to the original if vmann=false - df_src['comp_ManningN'] = np.where(df_src['vmann_on']==False, df_src['default_ManningN'], df_src['comp_ManningN']) # reset the ManningN value back to the original if vmann=false - - ## Output new SRC with bankfull column - df_src.to_csv(out_src_vmann_filename,index=False) - - ## Output new hydroTable with updated discharge and ManningN column - df_src_trim = df_src[['HydroID','Stage','vmann_on',channel_ratio_src_column,'Discharge (m3s-1)_varMann','comp_ManningN']] - df_src_trim = df_src_trim.rename(columns={'Stage':'stage','Discharge (m3s-1)_varMann': 'discharge_cms','comp_ManningN':'ManningN'}) - df_htable = pd.read_csv(htable_filename,dtype={'HUC': str}) - df_htable.rename(columns={'ManningN':'orig_ManningN'},inplace=True) - df_htable.drop(['vmann_on'], axis=1, inplace=True) # drop the default "vmann_on" variable from add_crosswalk.py - if not set(['orig_discharge_cms']).issubset(df_htable.columns): - df_htable.rename(columns={'discharge_cms':'orig_discharge_cms'},inplace=True) - else: - df_htable.drop(['discharge_cms'], axis=1, inplace=True) # drop the previously modified discharge column to be replaced with updated version - df_htable = df_htable.merge(df_src_trim, how='left', left_on=['HydroID','stage'], right_on=['HydroID','stage']) - - # Delete intermediate CSVs outputs. Todo delete this block later. - htable_parent_dir = os.path.split(htable_filename)[0] - # List all CSVs. - file_list = os.listdir(htable_parent_dir) - for f in file_list: - if viz_clean_flag == 1: # if using the viz flag then delete all intermediate csv files - if '.csv' in f: - if f != 'hydroTable.csv': - os.remove(os.path.join(htable_parent_dir, f)) - else: - keep_files = ['usgs_elev_table.csv', 'src_base.csv', 'small_segments.csv'] - if '.csv' in f: - if f not in keep_files: - os.remove(os.path.join(htable_parent_dir, f)) - - df_htable.to_csv(htable_filename,index=False) - - log_text += 'Completed: ' + str(huc) - - ## plot rating curves - if src_plot_option == 'True': - if isdir(huc_output_dir) == False: - os.mkdir(huc_output_dir) - generate_src_plot(df_src, huc_output_dir) - - return(log_text) - -def generate_src_plot(df_src, plt_out_dir): - - ## create list of unique hydroids - hydroids = df_src.HydroID.unique().tolist() - - ## plot each hydroid SRC in the huc - for hydroid in hydroids: - print("Creating SRC plot: " + str(hydroid)) - plot_df = df_src.loc[df_src['HydroID'] == hydroid] - - f, ax = plt.subplots(figsize=(6.5, 6.5)) - ax.set_title(str(hydroid)) - sns.despine(f, left=True, bottom=True) - sns.scatterplot(x='Discharge (m3s-1)', y='Stage', data=plot_df, label="Orig SRC", ax=ax, color='blue') - sns.scatterplot(x='Discharge (m3s-1)_varMann', y='Stage', data=plot_df, label="SRC w/ vMann", ax=ax, color='orange') - sns.lineplot(x='Discharge (m3s-1)', y='Stage_1_5', data=plot_df, color='green', ax=ax) - plt.fill_between(plot_df['Discharge (m3s-1)'], plot_df['Stage_1_5'],alpha=0.5) - plt.text(plot_df['Discharge (m3s-1)'].median(), plot_df['Stage_1_5'].median(), "NWM 1.5yr: " + str(plot_df['Stage_1_5'].median())) - ax.legend() - plt.savefig(plt_out_dir + os.sep + str(hydroid) + '_vmann.png',dpi=175, bbox_inches='tight') - plt.close() - -# for hydroid in hydroids: -# print("Creating SRC plot: " + str(hydroid)) -# plot_df = df_src.loc[df_src['HydroID'] == hydroid] -# -# f, ax = plt.subplots(figsize=(6.5, 6.5)) -# ax.set_title(str(hydroid)) -# sns.despine(f, left=True, bottom=True) -# sns.scatterplot(x='comp_ManningN', y='Stage', data=plot_df, label="Orig SRC", ax=ax, color='blue') -# #sns.scatterplot(x='Discharge (m3s-1)_varMann', y='Stage', data=plot_df, label="SRC w/ vMann", ax=ax, color='orange') -# sns.lineplot(x='comp_ManningN', y='Stage_1_5', data=plot_df, color='green', ax=ax) -# plt.fill_between(plot_df['comp_ManningN'], plot_df['Stage_1_5'],alpha=0.5) -# plt.text(plot_df['comp_ManningN'].median(), plot_df['Stage_1_5'].median(), "NWM 1.5yr: " + str(plot_df['Stage_1_5'].median())) -# ax.legend() -# plt.savefig(plt_out_dir + os.sep + str(hydroid) + '.png',dpi=175, bbox_inches='tight') -# plt.close() - -def multi_process(variable_mannings_calc, procs_list): - ## Initiate multiprocessing - print(f"Applying variable Manning's n to SRC calcs for {len(procs_list)} hucs using {number_of_jobs} jobs") - with Pool(processes=number_of_jobs) as pool: - map_output = pool.map(variable_mannings_calc, procs_list) - log_file.writelines(["%s\n" % item for item in map_output]) - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="Vary the Manning's n values for in-channel vs. floodplain (recalculate Manning's eq for Discharge)") - parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) - parser.add_argument('-bc','--channel-ratio-src-column',help='SRC attribute containing the channel vs. overbank geometry ratio (for composite calc)',required=False,type=str,default='chann_hradius_ratio') - parser.add_argument('-mann','--mann-n-table',help="Path to a csv file containing Manning's n values by featureid",required=True,type=str) - parser.add_argument('-suff','--output-suffix',help="Suffix to append to the output log file (e.g. '_global_06_011')",required=True,type=str) - parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) - parser.add_argument('-plots','--src-plot-option',help='Optional (True or False): use this flag to create src plots for all hydroids. WARNING - long runtime',required=False,default='False',type=str) - parser.add_argument('-viz_clean','--viz-clean',help='Optional (Viz flag): pass the viz flag (0 or 1) to delete intermediate csv files',required=False,default=0,type=int) - - args = vars(parser.parse_args()) - - fim_dir = args['fim_dir'] - channel_ratio_src_column = args['channel_ratio_src_column'] - mann_n_table = args['mann_n_table'] - output_suffix = args['output_suffix'] - number_of_jobs = args['number_of_jobs'] - src_plot_option = args['src_plot_option'] - viz_clean_flag = args['viz_clean'] - procs_list = [] - - print('Writing progress to log file here: ' + str(join(fim_dir,'log_composite_n' + output_suffix + '.log'))) - print('This may take a few minutes...') - ## Create a time var to log run time - begin_time = dt.datetime.now() - - ## Check that the bankfull flow filepath exists and read to dataframe - if not isfile(mann_n_table): - print('!!! Can not find the input roughness/feature_id file: ' + str(mann_n_table)) - else: - ## Read the Manning's n csv (ensure that it contains feature_id, channel mannings, floodplain mannings) - print('Importing the Manning roughness data file: ' + mann_n_table) - df_mann = pd.read_csv(mann_n_table,dtype={'feature_id': 'int64'}) - if 'channel_n' not in df_mann.columns or 'overbank_n' not in df_mann.columns or 'feature_id' not in df_mann.columns: - print('Missing required data column ("feature_id","channel_n", and/or "overbank_n")!!! --> ' + df_mann) - else: - print('Running the variable_mannings_calc function...') - - ## Loop through hucs in the fim_dir and create list of variables to feed to multiprocessing - huc_list = os.listdir(fim_dir) - skip_hucs_log = "" - for huc in huc_list: - if huc != 'logs' and huc[-3:] != 'log' and huc[-4:] != '.csv': - in_src_bankfull_filename = join(fim_dir,huc,'src_full_crosswalked_bankfull.csv') - out_src_vmann_filename = join(fim_dir,huc,'src_full_crosswalked_vmann.csv') - htable_filename = join(fim_dir,huc,'hydroTable.csv') - huc_plot_output_dir = join(fim_dir,huc,'src_plots') - - if isfile(in_src_bankfull_filename): - print(str(huc)) - procs_list.append([in_src_bankfull_filename, channel_ratio_src_column, df_mann, huc, out_src_vmann_filename, htable_filename, src_plot_option, huc_plot_output_dir,viz_clean_flag]) - else: - print(str(huc) + '\nWARNING --> can not find the src_full_crosswalked_bankfull.csv in the fim output dir: ' + str(join(fim_dir,huc)) + ' - skipping this HUC!!!\n') - - ## initiate log file - print(f"Applying variable Manning's n to SRC calcs for {len(procs_list)} hucs using {number_of_jobs} jobs") - sys.__stdout__ = sys.stdout - log_file = open(join(fim_dir,'logs','log_composite_n' + output_suffix + '.log'),"w") - sys.stdout = log_file - log_file.write('START TIME: ' + str(begin_time) + '\n') - log_file.write('#########################################################\n\n') - - ## Pass huc procs_list to multiprocessing function - multi_process(variable_mannings_calc, procs_list) - - ## Record run time and close log file - end_time = dt.datetime.now() - log_file.write('END TIME: ' + str(end_time) + '\n') - tot_run_time = end_time - begin_time - log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) - sys.stdout = sys.__stdout__ - log_file.close() diff --git a/tools/aggregate_csv_files.py b/tools/aggregate_csv_files.py new file mode 100644 index 000000000..cfd531eba --- /dev/null +++ b/tools/aggregate_csv_files.py @@ -0,0 +1,63 @@ +import os, fnmatch +import pandas as pd +import argparse + +def locate(pattern, root_path): + for path, dirs, files in os.walk(os.path.abspath(root_path)): + for filename in fnmatch.filter(files, pattern): + yield os.path.join(path, filename) + +def read_csvs_to_df(files_to_merge,head_row): + #df = pd.concat((pd.read_csv(f,usecols=["HUC", "feature_id", "HydroID", "last_updated", "submitter", "adjust_ManningN"],dtype={'feature_id': 'int64','HUC': str}) for f in files_to_merge), ignore_index=True) + li = [] + for file_in in files_to_merge: + print(file_in) + ## Use below for merging hydroTables for calib n value data + #df = pd.read_csv(file_in,usecols=["HUC", "feature_id", "HydroID", "last_updated", "submitter", "adjust_ManningN"],dtype={'feature_id': 'int64','HUC': str}, index_col=None, header=0) + #df = df[df['adjust_ManningN'].notna()] + #df.drop_duplicates(subset=['HydroID'],inplace=True) + df = pd.read_csv(file_in, index_col=None, header=head_row) #dtype={'feature_id': 'int64'} + df = df[df['Unnamed: 0'] != 'HydroID'] + li.append(df) + + frame = pd.concat(li, axis=0, ignore_index=True) + return(frame) + +def write_aggregate(frame,output_file): + print('Writing new csv file: ' + output_file) + frame.to_csv(output_file,index=False) + +def concat_files(files_to_merge): + # joining files with concat and read_csv + print('Concatenating all matching csv files...') + df_concat = pd.concat(map(pd.read_csv, files_to_merge), ignore_index=True) + +def run_prep(fim_dir,file_search_str,head_row,output_file): + assert os.path.isdir(fim_dir), 'ERROR: could not find the input fim_dir location: ' + str(fim_dir) + + files_to_merge = [js for js in locate('*' + file_search_str + '*.csv', fim_dir)] + if len(files_to_merge) > 0: + print('Found files: ' + str(len(files_to_merge))) + + aggreg_df = read_csvs_to_df(files_to_merge,head_row) + write_aggregate(aggreg_df,output_file) + + else: + print('Did not find any files using tag: ' + '*' + file_search_str + '*.csv') + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Simple tool to search for csv files (using wildcard text) within a fim output directory and then aggregate all files into a single csv") + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir (e.g. data/outputs/xxxx/', required=True,type=str) + parser.add_argument('-search_str','--file-search-str', help='File search string', required=True,type=str) + parser.add_argument('-head_row','--header-row', help='Optional: header row to parse (default=0)',default=0, required=False,type=int) + parser.add_argument('-out_csv','--output-csv', help='full filepath to write new csv', required=True,type=str) + + args = vars(parser.parse_args()) + fim_dir = args['fim_dir'] + file_search_str = args['file_search_str'] + head_row = args['header_row'] + output_file = args['output_csv'] + + run_prep(fim_dir,file_search_str,head_row,output_file) + diff --git a/tools/aggregate_metrics.py b/tools/aggregate_metrics.py index 7cc5951b5..bd5a99fdd 100755 --- a/tools/aggregate_metrics.py +++ b/tools/aggregate_metrics.py @@ -204,7 +204,7 @@ def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfo for test_case in test_cases_dir_list: - if test_case not in ['other', 'validation_data_ble', 'validation_data_legacy', 'validation_data_ahps']: + if test_case not in ['validation_data_ble', 'validation_data_legacy', 'validation_data_ahps']: branch_results_dir = os.path.join(TEST_CASES_DIR, test_case, 'performance_archive', config_version, branch) huc = test_case.split('_')[0] diff --git a/tools/cache_metrics.py b/tools/cache_metrics.py index 0d02fb217..825b8f4fb 100755 --- a/tools/cache_metrics.py +++ b/tools/cache_metrics.py @@ -9,7 +9,7 @@ TEST_CASES_DIR = r'/data/test_cases_new/' #TODO remove "_new" PREVIOUS_FIM_DIR = r'/data/previous_fim' -OUTPUTS_DIR = r'/data/outputs' +OUTPUTS_DIR = r'/outputs' def process_alpha_test(args): @@ -81,7 +81,7 @@ def process_alpha_test(args): bench_cat_test_case_list = os.listdir(bench_cat_test_case_dir) for test_id in bench_cat_test_case_list: - if 'validation' and 'other' not in test_id: + if 'validation' not in test_id: current_huc = test_id.split('_')[0] if test_id.split('_')[1] in bench_cat: diff --git a/tools/calibration-db/README.md b/tools/calibration-db/README.md new file mode 100755 index 000000000..6235d877c --- /dev/null +++ b/tools/calibration-db/README.md @@ -0,0 +1,83 @@ +## The calibration database service + +The **calibration-db tool** is an optional use tool. When started, it creates a service, via a docker container, that creates a postgres database. The calibration-db tool will make minor updates to hydrotables.csv's, but only to applicable HUCs. The applicable HUCs generally have AHPS library data or good USGS guage data. + +------------------------------- +To setup the system, it needs to be done in two parts. + +**A)** Update the `/config/params_template.env` file and set `src_adjust_spatial` to "True". If you are not using the tool, set the flag to "False". + +**B)** Setup the calibration database service. See steps below. + +------------------------------- +## Creating the calibration database service + +When you start the service, it will start up a new docker container named "fim_calibration_db", if enabled in the params_template.env. + +To run commands, use a bash terminal window. + +Steps: +1. Copy `/config/calb_db_keys_template.env` and rename it to a name of your choice (ie: `calb_db_keys.env` or whatever). We recommend saving the file outside the fim source code folder for security purposes. + +2. Update the values in the new .env file. + Note: The `CALIBRATION_DB_HOST` is the computer/server where the docker container is physically running. This may be your server name, but can also be a different server if you like. One calibration database service can be shared by multiple other servers, each with their own FIM source code folders (a "client" if you will). Each "client" server can have it's own calb_db_keys.env file and/or can share the .env with other servers. ie) dev-1 and prod-1 can share a .env file if they have a shared drive. + +3. Start the service from the /tools/calibration-db directory of the server that will run the calibration database service. + Using a bash terminal window, run + `docker-compose --env-file {path to .env file}/{your key file name} up --build -d` + ie) docker-compose --env-file /my_server/config/calb_db_keys.env up --build -d + + - If you get an error of permissions denied, you might need to upgrade the permissions to your calibration-db folder. From your /tools directory, run `chmod -R 777 calibration-db` (yes.. 777). You may need to add the word "sudo" at the front of the command, depending on your system configuration. + - If you get an error saying `command not found: docker compose`, you may need to install it via `sudo apt install docker-compose` or check your system configuration. + +4. You should be able to see a container named `fim_calibration_db` via a bash terminal command of `docker stats --no-stream`. + +------------------------------- +## Destroying the database (from the /tools/calibration-db directory) + +Using a bash terminal window, run + `docker-compose down` + `rm -rf pgdata` + +If that command errors out, use `docker-compose down --rmi local -v --remove-orphans`. + +------------------------------- +## Debugging Postgres DB + +Use the following command to connect to the postgres DB (using values from your .env file): + +`docker run -it --rm postgis/postgis psql -h CALIBRATION_DB_HOST -U CALIBRATION_DB_USER_NAME` (and follow prompt to enter password (CALIBRATION_DB_PASS)) +ie) `docker run -it --rm postgis/postgis psql -h my_server_name -U fim_postgres` (and follow prompt to enter password (CALIBRATION_DB_PASS)) + +### View names of databases (you can have more than one database in a postgres database server. ie) calibation and calibration-test. + +`\l` + + #### Note: FIM will create a database as it is being used. If you have not yet processed any HUCs, there will not be a database yet. + +### Connect to calibration DB + +`\c calibration` (or the name of the database you gave in the calib_db_keys.env (CALIBRATION_DB_NAME)) + +### View tables in DB (now connected) + +`\dt` + +### View columns in tables + +`\d+ hucs` +`\d+ points` + +### View number of rows in a table + +'select count(*) from table_name;` (you need the ";" on the end) + +Postgres uses psql. See [https://www.postgresql.org/docs/current/sql-commands.html]9https://www.postgresql.org/docs/current/sql-commands.html) for details on commands. + + +## System Notes + +The `docker-entrypoint-initdb.d` scripts only run once, when the DB is initially created. To rerun these scripts, you will need to destroy the database. +The `docker-entrypoint-initdb.d` scripts do not create the tables for the database, but rather simply enables the PostGIS extension. +If you are trying to connect to a database that is on the same server that you are working on, you have to use the full servername and not `localhost`. + diff --git a/tools/calibration-db/docker-compose.yml b/tools/calibration-db/docker-compose.yml new file mode 100755 index 000000000..36c962140 --- /dev/null +++ b/tools/calibration-db/docker-compose.yml @@ -0,0 +1,18 @@ +version: '3.5' +services: + calibration-db: + image: postgis/postgis + container_name: fim_calibration_db + restart: always + environment: + - POSTGRES_USER=${CALIBRATION_DB_USER_NAME} + - POSTGRES_PASSWORD=${CALIBRATION_DB_PASS} + - POSTGRES_DB=${CALIBRATION_DB_NAME} + - PGDATA=/var/lib/postgresql/data/fim/pgdata + ports: + - "5432:5432" + volumes: + - calibration_db_data:/var/lib/postgresql/data/fim + - ./docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d +volumes: + calibration_db_data: diff --git a/tools/calibration-db/docker-entrypoint-initdb.d/init-db.sh b/tools/calibration-db/docker-entrypoint-initdb.d/init-db.sh new file mode 100755 index 000000000..c60ee4d18 --- /dev/null +++ b/tools/calibration-db/docker-entrypoint-initdb.d/init-db.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e + +psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL + CREATE EXTENSION postgis; +EOSQL diff --git a/tools/combine_crosswalk_tables.py b/tools/combine_crosswalk_tables.py new file mode 100644 index 000000000..df2aabcf4 --- /dev/null +++ b/tools/combine_crosswalk_tables.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +import os +import glob +import argparse +import pandas as pd + +def combine_crosswalk_tables(data_directory, output_filename): + """ + Combines all hydrotables from a run into a single crosswalk table with HUC, BranchID, HydroID, feature_id, and LakeID + """ + + file_list = sorted(glob.glob(os.path.join(data_directory, '*', 'branches', '*', 'hydroTable_*.csv'))) + + dfs = list() + for filename in file_list: + if os.path.exists(filename): + file_df = pd.read_csv(filename, usecols=['HUC', 'HydroID', 'feature_id', 'LakeID'], dtype={'HUC':str}) + file_df.drop_duplicates(inplace=True) + file_df.rename(columns={'HUC':'huc8'}, inplace=True) + file_df['BranchID'] = os.path.split(os.path.dirname(filename))[1] + + dfs.append(file_df) + else: + print(f'{filename} is missing.') + + if len(dfs) > 1: + df = pd.concat(dfs) + + df.to_csv(output_filename, index=False) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Combines hydrotables from HUC/branch into a single crosswalk table') + parser.add_argument('-d', '--data-directory', help='Data directory (name of run)', type=str, required=True) + parser.add_argument('-o', '--output-filename', help='Filename for output', type=str, required=True) + + args = vars(parser.parse_args()) + + combine_crosswalk_tables(**args) \ No newline at end of file diff --git a/tools/compare_ms_and_non_ms_metrics.py b/tools/compare_ms_and_non_ms_metrics.py new file mode 100644 index 000000000..8edabc0e2 --- /dev/null +++ b/tools/compare_ms_and_non_ms_metrics.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +import pandas as pd +import numpy as np +import os +from tqdm import tqdm +import argparse +from concurrent.futures import ProcessPoolExecutor, as_completed + csi, mcc, tpr, far +from tools.shared_variables import TEST_CASES_DIR, OUTPUTS_DIR +from glob import glob +from itertools import product + + +def Compare_ms_and_non_ms_areas(): + + return(None) + + + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') + #parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=True) + + + #args = vars(parser.parse_args()) + + Compare_ms_and_non_ms_areas() diff --git a/tools/compile_comp_stats.py b/tools/compile_comp_stats.py new file mode 100755 index 000000000..b2bb0fe08 --- /dev/null +++ b/tools/compile_comp_stats.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +import pandas as pd +import numpy as np +from glob import glob, iglob +import argparse +import os + +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## + + +def Compile_comp_stats(hydrofabric_dirs): + + all_minutes = [] + all_models = [] + all_hucs = [] + + file_iterator = get_log_files(hydrofabric_dirs) + + for entry in file_iterator: + model, log = entry + log_file = open(log,'r') + + huc8code = log.split('/')[-1][0:8] + + for line in log_file: + if 'wall clock' in line: + time_string = line.strip().split(' ')[-1] + + time_string_split = time_string.split(':') + + if len(time_string_split) == 2: + minutes, seconds = time_string_split + + total_minutes = float(minutes)+float(seconds)/60 + + if len(time_string_split) == 3: + hours, minutes, seconds = time_string_split + + total_minutes = float(hours)*60+float(minutes)+float(seconds)/60 + + all_minutes.append(total_minutes) + all_models.append(model) + all_hucs.append(huc8code) + + df = pd.DataFrame({'Minutes':all_minutes,'Model':all_models, 'HUC': all_hucs}) + + total_per_huc = df.pivot_table(values='Minutes',index=['Model','HUC'],aggfunc=sum) + + print(total_per_huc.pivot_table(values='Minutes',index='Model',aggfunc=[np.mean,np.median,np.sum])) + + + +def get_log_files(hydrofabric_dirs): + + for hydrofabric_dir in hydrofabric_dirs: + log_dir = os.path.join(hydrofabric_dir,'logs') + + if os.path.join(log_dir,'branch'): + model = 'GMS' + if '_MS' in log_dir: + model = 'MS' + if '_FR' in log_dir: + model = 'FR' + + for fn in iglob(os.path.join(log_dir,'**','[0-9]*.log'),recursive=True): + yield(model,fn) + + +if __name__ == '__main__': + + ######################################################## + ''' + Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. + ''' + + ######################################################## + + + # parse arguments + parser = argparse.ArgumentParser(description='Get Comp Stats') + parser.add_argument('-y','--hydrofabric_dirs', help='Directory path to FIM hydrofabric by processing unit', required=True,nargs='+') + + + Compile_comp_stats( **vars(parser.parse_args()) ) diff --git a/tools/compile_computational_stats.py b/tools/compile_computational_stats.py new file mode 100755 index 000000000..e9f10cda4 --- /dev/null +++ b/tools/compile_computational_stats.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd +import numpy as np +from glob import iglob +from os.path import join + +# desired output for branches +# dataframe columns: HUC, branch_id, exit status, ,time, ram, + +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## + + +def compile_summary(gms_output_dir,ouput=None): + + unit_summary = join(gms_output_dir,logs, 'summary_gms_unit.log') + branch_summary = join(gms_output_dir,logs, 'summary_gms_branch.log') + + unit_summary = pd.read_csv(unit_summary,sep='\t') + branch_summary = pd.read_csv(branch_summary,sep='\t') + + +if __name__ == '__main__': + + ######################################################## + ''' + Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. + ''' + + ######################################################## + + + parser = argparse.ArgumentParser(description='Create stream network level paths') + parser.add_argument('-d','--gms-output-dir', help='Input stream network', required=True) + parser.add_argument('-o','--output', help='Input stream network', required=True) + + args = vars(parser.parse_args()) + + compile_summary(**args) + diff --git a/tools/composite_inundation.py b/tools/composite_inundation.py new file mode 100644 index 000000000..d5784594b --- /dev/null +++ b/tools/composite_inundation.py @@ -0,0 +1,479 @@ +#!/usr/bin/env python3 +import os, argparse, copy, sys +import json +import rasterio +import numpy as np +import pandas as pd + +from datetime import datetime +from multiprocessing import Pool +#from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed, wait +import concurrent.futures as cf +from tqdm import tqdm + +from inundation import inundate +from mosaic_inundation import Mosaic_inundation +from inundate_gms import Inundate_gms +from utils.shared_functions import FIM_Helpers as fh +from utils.shared_variables import elev_raster_ndv + +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## + + +class InundateModel_HUC(object): + + def __init__(self, source_directory, huc): + + self.source_directory = source_directory + self.huc = huc + + def inundate_huc(self, flows_file, composite_output_dir, output_name, log_file_path, + num_workers_branches, no_cleanup, verbose): + ''' + Processing: + Will inundate a single huc directory and if gms, will create an aggregate mosiac per huc + + Returns: + The map file of the inundated raster. + ''' + + source_huc_dir = os.path.join(self.source_directory, self.huc) + + # hucs do not need to exist as the list might have one huc from one + # source directory that is not in another. TODO: how to log this. + if (not os.path.exists(source_huc_dir)): + print(f"HUC {self.huc} does not exist in {self.source_directory}") + return None + + output_huc_dir = os.path.join(composite_output_dir, self.huc) + # Create output directory if it does not exist + if not os.path.isdir(output_huc_dir): + os.mkdir(output_huc_dir) + + inundation_map_file = None + + output_raster_name = os.path.join(output_huc_dir, output_name) + output_raster_name = fh.append_id_to_file_name(output_raster_name, [self.huc,self.model]) + + # adjust to add model and huc number + log_file = None + inundation_list_file = None + if (log_file_path != None): + log_file = os.path.join(log_file_path, f"{self.huc}_error_logs.txt") + inundation_list_file = os.path.join(log_file_path, + f"{self.huc}_inundation_file_list.csv") + + if (verbose): + print(f'... Creating an inundation map for the FIM4'\ + f' configuration for HUC {self.huc}...') + + if self.model in ["fr", "ms"]: + + rem = os.path.join(source_huc_dir, 'rem_zeroed_masked.tif') + catchments = os.path.join(source_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') + catchment_poly = os.path.join(source_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + hydro_table = os.path.join(source_huc_dir, 'hydroTable.csv') + + # Ensure that all of the required files exist in the huc directory + for file in (rem, catchments, catchment_poly, hydro_table): + if not os.path.exists(file): + raise Exception(f"The following file does not exist within the supplied FIM directory:\n{file}") + + # Run inundation() + # Must set workers to one as we are only processing one huc. + map_file = inundate(rem, + catchments, + catchment_poly, + hydro_table, + flows_file, + mask_type = None, + num_workers = 1, + inundation_raster = output_raster_name, + quiet = not verbose) + + #if verbose: + # print("Inundation Response:") + # print(map_file) + + if len(map_file) == 0: + raise Exception(f"Failed to inundate {extent_friendly} using the provided flows.") + + mosaic_file_path = map_file[0][0] + inundation_map_file = [self.model, self.huc, mosaic_file_path] + + else: # gms + # we are doing each huc one at a time + map_file = Inundate_gms( hydrofabric_dir = self.source_directory, + forecast = flows_file, + num_workers = num_workers_branches, + hucs = self.huc, + inundation_raster = output_raster_name, + verbose = verbose, + log_file = log_file, + output_fileNames = inundation_list_file ) + + mask_path_gms = os.path.join(source_huc_dir, 'wbd.gpkg') + + # we are going to mosaic the gms files per huc + # NOTE: Leave workers as 1, it fails to composite correctly if more than one. + mosaic_file_path = Mosaic_inundation( map_file, + mosaic_attribute = 'inundation_rasters', + mosaic_output = output_raster_name, + mask = mask_path_gms, + unit_attribute_name = 'huc8', + nodata = elev_raster_ndv, + workers = 1, + remove_inputs = not no_cleanup, + subset = None, + verbose = verbose ) + + inundation_map_file = [self.model, self.huc, mosaic_file_path] + + if verbose: print(f"Inundation for HUC {self.huc} is complete") + + return inundation_map_file + +class Composite_HUC(object): + + # Composites two source directories for a single huc + # Note: The huc does not need to exist in both source directories + @classmethod + def composite_huc(self, args): + + huc = args["current_huc"] + print(f"Processing huc {huc}") + composite_model_map_files = [] + for model in args["models"]: + + # setup original fim processed directory + if model == "ms" : source_dir = args["fim_dir_ms"] + elif model == "fr" : source_dir = args["fim_dir_fr"] + else: source_dir = args["gms_dir"] + + ci = InundateModel_HUC(model, source_dir, huc) + map_file = ci.inundate_huc(args["flows_file"], + args["composite_output_dir"], + args["output_name"], + args["log_file_path"], + args["num_workers_branches"], + args["no_cleanup"], + args["verbose"]) + if map_file is not None: + composite_model_map_files.append(map_file) + + # Composite the two final model outputs + inundation_map_file_df = pd.DataFrame(composite_model_map_files, + columns = ['model', 'huc8', 'inundation_rasters']) + + if args["verbose"]: + print("inundation_map_file_df") + print(inundation_map_file_df) + + composite_file_output = os.path.join(args["composite_output_dir"], huc, args["output_name"]) + composite_file_output = fh.append_id_to_file_name(composite_file_output, huc) + + # NOTE: Leave workers as 1, it fails to composite correctly if more than one. + # - Also. by adding the is_mosaic_for_gms_branches = False, Mosaic_inudation + # will not auto add the HUC into the output name (its default behaviour) + mosaic_file_path = Mosaic_inundation( inundation_map_file_df, + mosaic_attribute='inundation_rasters', + mosaic_output = composite_file_output, + mask = None, + unit_attribute_name = 'huc8', + nodata = elev_raster_ndv, + workers = 1, + remove_inputs = not args["no_cleanup"], + subset = None, + verbose = args["verbose"] ) + + if args["is_bin_raster"]: + if args["verbose"]: print("Converting to binary") + CompositeInundation.hydroid_to_binary(composite_file_output) + + +class CompositeInundation(object): + + @classmethod + def run_composite(self, args): + + def __validate_args(args): + + """ + Runs `inundate()` on any two of the following: + 1) FIM 3.X mainstem (MS) + 2) FIM 3.X full-resolution (FR) + 3) FIM 4.x (gms) + All three need to have outputs and composites results. Assumes that all products + necessary for `inundate()` are in each huc8 folder. + + Parameters + ---------- + fim_dir_ms : str + Path to MS FIM directory. This should be an output directory from `fim_run.sh`. + fim_dir_fr : str + Path to FR FIM directory. This should be an output directory from `fim_run.sh`. + gms_dir : str + Path to FIM4 GMS directory. This should be an output directory from `gms_run_unit, then gms_run_branch`. + huc: str, optional + If this value comes in, it shoudl be a single huc value. If it does not exist, + we use all hucs in the give source directories. + flows_file : str : + Can be a single file path to a forecast csv or a comma-separated list of files. + composite_output_dir : str + Folder path to write outputs. It will be created if it does not exist. + output_name : str, optional + Name for output raster. If not specified, by default the raster will be named 'inundation_composite_{flows_root}.tif'. + is_bin_raster : bool, optional + Flag to create binary raster as output. + num_workers_huc : int, optional + defaults to 1 and means the number of processes to be used for processing hucs + num_workers_branches : int, optional + defaults to 1 and means the number of processes to be used for processing gms branches + no_cleanup : bool, optional + If False, intermediate files are deleted. + verbose : bool, optional + show extra output. + """ + + if (args["fim_dir_ms"]) and (args["fim_dir_ms"].lower() == "none"): + args["fim_dir_ms"] = None + if (args["fim_dir_fr"]) and (args["fim_dir_fr"].lower() == "none"): + args["fim_dir_fr"] = None + if (args["gms_dir"]) and (args["gms_dir"].lower() == "none"): + args["gms_dir"] = None + + # count number of input dir types and ensure their are no duplciates. + dir_list_lowercase = [] # we use a forced lowercase to help ensure dups (might be mixed case) + dir_list_raw = [] + missing_dir_msg = "{} directory of {} does not exist" + args["models"] = [] + if (args["fim_dir_ms"] != None): + args["models"].append("ms") + assert os.path.isdir(args["fim_dir_ms"]), missing_dir_msg.format("ms", args["fim_dir_ms"]) + dir_list_raw.append(args["fim_dir_ms"]) + dir_list_lowercase.append(args["fim_dir_ms"].lower()) + + if (args["fim_dir_fr"] != None): + args["models"].append("fr") + assert os.path.isdir(args["fim_dir_fr"]), missing_dir_msg.format("fr", args["fim_dir_fr"]) + dir_list_raw.append(args["fim_dir_fr"]) + dir_list_lowercase.append(args["fim_dir_fr"].lower()) + + if (args["gms_dir"] != None): + args["models"].append("gms") + assert os.path.isdir(args["gms_dir"]), missing_dir_msg.format("gms", args["gms_dir"]) + dir_list_raw.append(args["gms_dir"]) + dir_list_lowercase.append(args["gms_dir"].lower()) + + if (len(args["models"]) != 2): + raise ValueError("Must submit exactly two directories (ms, fr and/or gms") + + # check for duplicate dir names (submitting same dir for two args) + if len(dir_list_lowercase) != len(set(dir_list_lowercase)): + raise ValueError("The two sources directories are the same path.") + + if not os.path.exists(args["flows_file"]): + print(f'{args["flows_file"]} does not exist. Please specify a flow file.') + + # check job numbers + assert args["num_workers_huc"] >= 1, "Number of huc workers should be 1 or greater" + assert args["num_workers_branches"] >= 1, "Number of branch workers should be 1 or greater" + + total_cpus_requested = args["num_workers_huc"] * args["num_workers_branches"] + total_cpus_available = os.cpu_count() + if total_cpus_requested > (total_cpus_available - 1): + raise ValueError('The HUC job num of workers, {}, multiplied by the branch workers number, {}, '\ + 'exceeds your machine\'s available CPU count of {} minus one. '\ + 'Please lower the num_workers_huc or num_workers_branches'\ + 'values accordingly.'.format(args["num_workers_huc"], + total_cpus_available, args["num_workers_branches"]) ) + + # Create output directory if it does not exist + if not os.path.isdir(args["composite_output_dir"]): + os.mkdir(args["composite_output_dir"]) + + # If no output name supplied, create one using the flows file name + # the output_name is the final composite output file. + # we also extract the basic file name without extension for use as the log file name + if not args["output_name"]: + flows_root = os.path.splitext(os.path.basename(args["flows_file"])) + root_output_file_name = f'inundation_composite_{flows_root[0]}' + args["output_name"] = f"{root_output_file_name}.tif" + else: + # see if the file name has a path or not, fail if it does + output_file_name_split = os.path.split(args["output_name"]) + if args["output_name"] == output_file_name_split[0]: + raise ValueError("""If submitting the -n (output file name), please ensure + it has no pathing. You can also leave it blank if you like.""") + + root_output_file_name = os.path.splitext(args["output_name"])[0] + + # setup log file and its directory + # Note: Log files are only created at this time if verbose\ + args["log_file_path"] = None + if (args["verbose"]): + args["log_file_path"] = os.path.join(args["composite_output_dir"], "mosaic_logs") + if not os.path.isdir(args["log_file_path"]): + os.mkdir(args["log_file_path"]) + + # Save run parameters up to this point + args_file = os.path.join(args["composite_output_dir"], root_output_file_name + '_args.json') + with open(args_file, 'w') as json_file: + json.dump(args, json_file) + print(f"Args printed to file at {args_file}") + + # make combined huc list, NOTE: not all source dirs will have the same huc folders + huc_list = set() + if (args["huc"] != None): + if (len(args["huc"]) != 8) or (not args["huc"].isnumeric()): + raise ValueError("Single huc value (-u arg) was submitted but appears invalid") + else: + huc_list.add(args["huc"]) + else: + for dir in dir_list_raw: + sub_dirs = [ item for item in os.listdir(dir) if os.path.isdir(os.path.join(dir, item)) ] + # Some directories may not be hucs (log folders, etc) + huc_folders = [ item for item in sub_dirs if item.isnumeric() ] + huc_set = set(huc_folders) + huc_list.update(huc_set) # will ensure no dups + + args["huc_list"] = huc_list + + return args + + args = __validate_args(args) + + huc_list = args["huc_list"] + number_huc_workers = args["num_workers_huc"] + #if len(huc_list == 1): # skip iterator + if (number_huc_workers == 1): + for huc in sorted(huc_list): + Composite_HUC.composite_huc(huc, args) + else: + + print(f"Processing {len(huc_list)} hucs") + args_list = [] + #sorted_hucs = sorted(huc_list) + #params_items = [(args, huc) for huc in huc_list] + for huc in sorted(huc_list): + huc_args = copy.deepcopy(args) + huc_args["current_huc"] = huc + args_list.append(huc_args) + + with cf.ProcessPoolExecutor(max_workers = number_huc_workers) as executor: + executor_gen = { executor.submit(Composite_HUC.composite_huc, params): params for params in args_list } + + for future in tqdm(cf.as_completed(executor_gen), + total = len(executor_gen), + desc = f"Running composite inundation with {number_huc_workers} workers" ): + + executor_gen[future] + + try: + future.result() + except Exception as exc: + print('{}, {}, {}'.format(hucCode,exc.__class__.__name__,exc)) + + + print("All hucs have been processed") + + @staticmethod + def hydroid_to_binary(hydroid_raster_filename): + #Converts hydroid positive/negative grid to 1/0 + #to_bin = lambda x: np.where(x > 0, 1, np.where(x == 0, -9999, 0)) + to_bin = lambda x: np.where(x > 0, 1, np.where(x != -9999, 0, -9999)) + hydroid_raster = rasterio.open(hydroid_raster_filename) + profile = hydroid_raster.profile # get profile for new raster creation later on + profile['nodata'] = -9999 + bin_raster = to_bin(hydroid_raster.read(1)) # converts neg/pos to 0/1 + # Overwrite inundation raster + with rasterio.open(hydroid_raster_filename, "w", **profile) as out_raster: + out_raster.write(bin_raster.astype(hydroid_raster.profile['dtype']), 1) + del hydroid_raster,profile,bin_raster + + + +if __name__ == '__main__': + + ''' + Runs inundate and compositing on any and exactly two of the following: + 1) FIM 3.X mainstem (MS) + 2) FIM 3.X full-resolution (FR) + 3) FIM 4.x (gms) + + Examples of usage + Notice: arg keys and values for some of the variants + -------- + a) ms and fr (single huc) + python3 /foss_fim/tools/composite_inundation.py -ms /outputs/inundation_test_1_FIM3_ms -fr /outputs/inundation_test_1_FIM3_fr -u /data/inputs/huc_lists/include_huc8.lst -f /data/test_cases/nws_test_cases/validation_data_nws/13090001/rgdt2/moderate/ahps_rgdt2_huc_13090001_flows_moderate.csv -o /outputs/inundation_test_1_comp/ -n test_inundation.tif + + a) ms and gms (all hucs in each folder) + python3 /foss_fim/tools/composite_inundation.py -ms /outputs/inundation_test_1_FIM3_ms -gms /outputs/inundation_test_1_gms -f /data/test_cases/nws_test_cases/validation_data_nws/13090001/rgdt2/moderate/ahps_rgdt2_huc_13090001_flows_moderate.csv -o /outputs/inundation_test_1_comp/ -c -jh 3 -jb 20 + + b) fr and gms (single huc) + python3 /foss_fim/tools/composite_inundation.py -fr /outputs/inundation_test_1_FIM3_fr -gms /outputs/inundation_test_1_gms -u 13090001 -f /data/inundation_review/inundation_nwm_recurr/nwm_recurr_flow_data/nwm21_17C_recurr_25_0_cms.csv -o /outputs/inundation_test_1_comp/ -n test_inundation.tif + ''' + + # parse arguments + parser = argparse.ArgumentParser(description="""Inundate FIM 3 full resolution + and mainstem outputs using a flow file and composite the results.""") + parser.add_argument('-ms','--fim-dir-ms', help='Source directory that contains MS FIM outputs.', + required=False, default=None) + parser.add_argument('-fr','--fim-dir-fr', help='Source directory that contains FR FIM outputs.', + required=False, default=None) + parser.add_argument('-gms','--gms-dir', help='Source directory that contains FIM4 GMS outputs.', + required=False, default=None) + parser.add_argument('-u','--huc', + help="""(Optional) If a single HUC is provided, only that HUC will be processed. + If not submitted, all HUCs in the source directories will be used.""", + required=False, default=None) + parser.add_argument('-f','--flows-file', + help='File path of flows csv.', + required=True) + parser.add_argument('-o','--composite-output-dir', help='Folder to write Composite Raster output.', + required=True) + parser.add_argument('-n','--output-name', help='File name for output(s).', + default=None, required=False) + parser.add_argument('-b','--is-bin-raster', + help="If flag is included, the output raster will be changed to wet/dry.", + required=False, default=False, action='store_true') + parser.add_argument('-jh', '--num-workers-huc', + help='Number of processes to use for HUC scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.', + required=False, default=1, type=int) + parser.add_argument('-jb', '--num-workers-branches', + help='Number of processes to use for Branch scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.', + required=False, default=1, type=int) + parser.add_argument('-c','--no_cleanup', + help='If flag used, intermediate rasters are NOT cleaned up.', + required=False, default=False, action='store_true') + parser.add_argument('-v','--verbose', help='Show additional outputs.', + required=False, default=False, action='store_true') + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + start_time = datetime.now() + dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + print("================================") + print(f"Start composite inundation - {dt_string}") + print() + + ci = CompositeInundation() + ci.run_composite(args) + + end_time = datetime.now() + dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + print("================================") + print(f"End composite inundation - {dt_string}") + + # calculate duration + time_duration = end_time - start_time + print(f"Duration: {str(time_duration).split('.')[0]}") + print() + diff --git a/tools/composite_ms_fr_inundation.py b/tools/composite_ms_fr_inundation.py deleted file mode 100644 index e0153e143..000000000 --- a/tools/composite_ms_fr_inundation.py +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/bin/env python3 -import os, argparse, rasterio -import numpy as np -import pandas as pd - -from inundation import inundate -from gms_tools.mosaic_inundation import Mosaic_inundation, __append_id_to_file_name - - -def composite_inundation(fim_dir_ms, fim_dir_fr, huc, flows, composite_output_dir, ouput_name='', - bin_rast_flag=False, depth_rast_flag=False, clean=True, quiet=True): - """ - Runs `inundate()` on FIM 3.X mainstem (MS) and full-resolution (FR) outputs and composites results. Assumes that all `fim_run` products - necessary for `inundate()` are in each huc8 folder. - - Parameters - ---------- - fim_dir_ms : str - Path to MS FIM directory. This should be an output directory from `fim_run.sh`. - fim_dir_fr : str - Path to FR FIM directory. This should be an output directory from `fim_run.sh`. - huc : str - HUC8 to run `inundate()`. This should be a folder within both `fim_dir_ms` and `fim_dir_fr`. - flows : str or pandas.DataFrame, can be a single file or a comma-separated list of files - File path to forecast csv or Pandas DataFrame with correct column names. - composite_output_dir : str - Folder path to write outputs. It will be created if it does not exist. - ouput_name : str, optional - Name for output raster. If not specified, by default the raster will be named 'inundation_composite_{flows_root}.tif'. - bin_rast_flag : bool, optional - Flag to create binary raster as output. If no raster flags are passed, this is the default behavior. - depth_rast_flag : bool, optional - Flag to create depth raster as output. - clean : bool, optional - If True, intermediate files are deleted. - quiet : bool, optional - Quiet output. - - Returns - ------- - None - - Raises - ------ - TypeError - Wrong input data types - AssertionError - Wrong input data types - - Notes - ----- - - Specifying a subset of the domain in rem or catchments to inundate on is achieved by the HUCs file or the forecast file. - - Examples - -------- - >>> import composite_ms_fr_inundation - >>> composite_ms_fr_inundation.composite_inundation( - '/home/user/fim_ouput_mainstem', - '/home/user/fim_ouput_fullres', - '12090301', - '/home/user/forecast_file.csv', - '/home/user/fim_inundation_composite', - 'inundation_composite.tif', - True, - False) - """ - # Set inundation raster to True if no output type flags are passed - if not (bin_rast_flag or depth_rast_flag): - bin_rast_flag = True - assert not (bin_rast_flag and depth_rast_flag), 'Output can only be binary or depth grid, not both' - assert os.path.isdir(fim_dir_ms), f'{fim_dir_ms} is not a directory. Please specify an existing MS FIM directory.' - assert os.path.isdir(fim_dir_fr), f'{fim_dir_fr} is not a directory. Please specify an existing FR FIM directory.' - assert os.path.exists(flows), f'{flows} does not exist. Please specify a flow file.' - - # Instantiate output variables - var_keeper = { - 'ms': { - 'dir': fim_dir_ms, - 'outputs': { - 'inundation_rast': os.path.join(composite_output_dir, f'{huc}_inundation_ms.tif') if bin_rast_flag else None, - 'depth_rast': os.path.join(composite_output_dir, f'{huc}_depth_ms.tif') if depth_rast_flag else None - } - }, - 'fr': { - 'dir': fim_dir_fr, - 'outputs': { - 'inundation_rast': os.path.join(composite_output_dir, f'{huc}_inundation_fr.tif') if bin_rast_flag else None, - 'depth_rast': os.path.join(composite_output_dir, f'{huc}_depth_fr.tif') if depth_rast_flag else None - } - } - } - # Build inputs to inundate() based on the input folders and huc - if not quiet: print(f"HUC {huc}") - for extent in var_keeper: - rem = os.path.join(var_keeper[extent]['dir'], huc, 'rem_zeroed_masked.tif') - catchments = os.path.join(var_keeper[extent]['dir'], huc, 'gw_catchments_reaches_filtered_addedAttributes.tif') - catchment_poly = os.path.join(var_keeper[extent]['dir'], huc, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') - hydro_table = os.path.join(var_keeper[extent]['dir'], huc, 'hydroTable.csv') - - # Ensure that all of the required files exist in the huc directory - for file in (rem, catchments, catchment_poly, hydro_table): - if not os.path.exists(file): - raise Exception(f"The following file does not exist within the supplied FIM directory:\n{file}") - - # Run inundation() - extent_friendly = "mainstem (MS)" if extent=="ms" else "full-resolution (FR)" - grid_type = "an inundation" if bin_rast_flag else "a depth" - if not quiet: print(f" Creating {grid_type} map for the {extent_friendly} configuration...") - result = inundate(rem,catchments,catchment_poly,hydro_table,flows,mask_type=None, - inundation_raster= var_keeper[extent]['outputs']['inundation_rast'], - depths= var_keeper[extent]['outputs']['depth_rast'], - quiet= quiet) - if result != 0: - raise Exception(f"Failed to inundate {rem} using the provided flows.") - - # If no output name supplied, create one using the flows file name - if not ouput_name: - flows_root = os.path.splitext(os.path.basename(flows))[0] - ouput_name = os.path.join(composite_output_dir, f'inundation_composite_{flows_root}.tif') - else: - ouput_name = os.path.join(composite_output_dir, ouput_name) - - # Composite MS and FR - inundation_map_file = { - 'huc8' : [huc] * 2, - 'branchID' : [None] * 2, - 'inundation_rasters': [var_keeper['fr']['outputs']['inundation_rast'], - var_keeper['ms']['outputs']['inundation_rast']], - 'depths_rasters': [var_keeper['fr']['outputs']['depth_rast'], - var_keeper['ms']['outputs']['depth_rast']] - } - inundation_map_file = pd.DataFrame(inundation_map_file) - Mosaic_inundation( - inundation_map_file, - mosaic_attribute='depths_rasters' if depth_rast_flag else 'inundation_rasters', - mosaic_output=ouput_name, - mask=catchment_poly, - unit_attribute_name='huc8', - nodata=-9999, - workers=1, - remove_inputs=clean, - subset=None,verbose=not quiet - ) - if bin_rast_flag: - hydroid_to_binary(__append_id_to_file_name(ouput_name, huc)) - -def hydroid_to_binary(hydroid_raster_filename): - '''Converts hydroid positive/negative grid to 1/0''' - - #to_bin = lambda x: np.where(x > 0, 1, np.where(x == 0, -9999, 0)) - to_bin = lambda x: np.where(x > 0, 1, np.where(x != -9999, 0, -9999)) - hydroid_raster = rasterio.open(hydroid_raster_filename) - profile = hydroid_raster.profile # get profile for new raster creation later on - profile['nodata'] = -9999 - bin_raster = to_bin(hydroid_raster.read(1)) # converts neg/pos to 0/1 - # Overwrite inundation raster - with rasterio.open(hydroid_raster_filename, "w", **profile) as out_raster: - out_raster.write(bin_raster.astype(hydroid_raster.profile['dtype']), 1) - del hydroid_raster,profile,bin_raster - - -if __name__ == '__main__': - - # parse arguments - parser = argparse.ArgumentParser(description='Inundate FIM 3 full resolution and mainstem outputs using a flow file and composite the results.') - parser.add_argument('-ms','--fim-dir-ms',help='Directory that contains MS FIM outputs.',required=True) - parser.add_argument('-fr','--fim-dir-fr',help='Directory that contains FR FIM outputs.',required=True) - parser.add_argument('-u','--huc',help='HUC within FIM directories to inunundate. Can be a comma-separated list.',required=True) - parser.add_argument('-f','--flows-file',help='File path of flows csv or comma-separated list of paths if running multiple HUCs',required=True) - parser.add_argument('-o','--ouput-dir',help='Folder to write Composite Raster output.',required=True) - parser.add_argument('-n','--ouput-name',help='File name for output(s).',default=None,required=False) - parser.add_argument('-b','--bin-raster',help='Output raster is a binary wet/dry grid. This is the default if no raster flags are passed.',required=False,default=False,action='store_true') - parser.add_argument('-d','--depth-raster',help='Output raster is a depth grid.',required=False,default=False,action='store_true') - parser.add_argument('-j','--num-workers',help='Number of concurrent processesto run.',required=False,default=1,type=int) - parser.add_argument('-c','--clean',help='If flag used, intermediate rasters are NOT cleaned up.',required=False,default=True,action='store_false') - parser.add_argument('-q','--quiet',help='Quiet terminal output.',required=False,default=False,action='store_true') - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - fim_dir_ms = args['fim_dir_ms'] - fim_dir_fr = args['fim_dir_fr'] - hucs = args['huc'].replace(' ', '').split(',') - flows_files = args['flows_file'].replace(' ', '').split(',') - num_workers = int(args['num_workers']) - output_dir = args['ouput_dir'] - ouput_name = args['ouput_name'] - bin_raster = bool(args['bin_raster']) - depth_raster = bool(args['depth_raster']) - clean = bool(args['clean']) - quiet = bool(args['quiet']) - - assert num_workers >= 1, "Number of workers should be 1 or greater" - assert len(flows_files) == len(hucs), "Number of hucs must be equal to the number of forecasts provided" - assert not (bin_raster and depth_raster), "Cannot use both -b and -d flags" - - # Create output directory if it does not exist - if not os.path.isdir(output_dir): - os.mkdir(output_dir) - - # Create nested list for input into multi-threading - arg_list = [] - for huc, flows_file in zip(hucs, flows_files): - arg_list.append((fim_dir_ms, fim_dir_fr, huc, flows_file, output_dir, ouput_name, bin_raster, depth_raster, clean, quiet)) - - # Multi-thread for each huc in input hucs - if num_workers > 1: - from multiprocessing import Pool - with Pool(processes=num_workers) as pool: - # Run composite_inundation() - pool.starmap(composite_inundation, arg_list) - else: # run linear if jobs == 1 - for arg in arg_list: - composite_inundation(*arg) diff --git a/tools/consolidate_metrics.py b/tools/consolidate_metrics.py new file mode 100755 index 000000000..0b470631d --- /dev/null +++ b/tools/consolidate_metrics.py @@ -0,0 +1,339 @@ +#!/usr/bin/env python3 + +import pandas as pd +import os +import re +import json +import numpy as np +from glob import glob +from tqdm import tqdm +import argparse +from collections import defaultdict +from tools_shared_variables import TEST_CASES_DIR,\ + PREVIOUS_FIM_DIR,\ + OUTPUTS_DIR,\ + INPUTS_DIR,\ + AHPS_BENCHMARK_CATEGORIES +from tools_shared_functions import csi,far,tpr,mcc + +# set display options +pd.set_option('display.max_rows', None) +pd.set_option('display.max_columns', None) + + +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## + +def Consolidate_metrics( benchmarks=['all'],versions=['all'], + zones=['total_area'],matching_hucs_only=True, + metrics_output_csv=None, + impute_missing_ms=False + ): + + """ Consolidates metrics into single dataframe """ + + if isinstance(benchmarks,str): + benchmarks = [benchmarks] + elif isinstance(benchmarks,list): + pass + else: + benchmarks = list(benchmarks) + + if isinstance(versions,str): + versions = [versions] + elif isinstance(benchmarks,list): + pass + else: + versions = list(versions) + + if isinstance(zones,str): + zones = [zones] + elif isinstance(zones,list): + pass + else: + zones = list(zones) + + + # loop through benchmarks + consolidated_metrics_df = [ f for f in return_dataframe_for_benchmark_source(benchmarks,zones) ] + + # concat + consolidated_metrics_df = pd.concat(consolidated_metrics_df,ignore_index=True) + + # find matching rows + consolidated_metrics_df = find_matching_rows_by_attribute_value(consolidated_metrics_df,'version',versions) + + if impute_missing_ms: + consolidated_metrics_df = impute_missing_MS_with_FR(consolidated_metrics_df) + + if metrics_output_csv is not None: + consolidated_metrics_df.to_csv(metrics_output_csv, index=False) + + """ + #print(consolidated_metrics_df.columns);exit() + consolidated_metrics_pivot = pd.pivot_table( + consolidated_metrics_df, + values=['FP_area_km2','FN_area_km2','TP_area_km2','contingency_tot_area_km2','obsPositive_area_km2'], + columns=['extent_config'], + index=['magnitude'], + aggfunc=np.sum + ) + print(consolidated_metrics_pivot);exit()""" + + consolidated_secondary_metrics = pivot_metrics(consolidated_metrics_df) + print(consolidated_secondary_metrics) + + return(consolidated_metrics_df,consolidated_secondary_metrics) + + +def impute_missing_MS_with_FR(consolidated_metrics_df): + + + # make sure you have one version per extent_config + fr_extent_config = 'FR' + ms_extent_config = 'MS' + extent_config = [fr_extent_config, ms_extent_config] + + indices_of_ec = {ec : consolidated_metrics_df.index[consolidated_metrics_df.loc[:,'extent_config'] == ec] for ec in extent_config } + + unique_hucs_dict = dict() + for ec in extent_config: + + unique_version = consolidated_metrics_df.loc[ indices_of_ec[ec],'version'].unique() + + if len(unique_version) > 1: + raise ValueError(f"{ec} version has more than one extent. Only pass one version per extent configuration.") + unique_hucs = set(consolidated_metrics_df.loc[indices_of_ec[ec],'huc'].unique()) + + unique_hucs_dict[ec] = unique_hucs + + + unique_hucs = unique_hucs_dict[fr_extent_config] - unique_hucs_dict[ms_extent_config] + + # copy those rows over with extent_config set to MS + unique_fr_mask = consolidated_metrics_df.loc[ indices_of_ec['FR'],'huc'].isin(unique_hucs) + unique_fr_indices = unique_fr_mask.index[unique_fr_mask] + + fr_entries = consolidated_metrics_df.loc[unique_fr_indices,:].reset_index(drop=True) + fr_entries.loc[:,'extent_config'] = "MS" + fr_entries.loc[:,'version'] = consolidated_metrics_df.loc[ indices_of_ec['MS'], 'version'].unique() + + consolidated_metrics_df = pd.concat((consolidated_metrics_df,fr_entries)).reset_index(drop=True) + + return(consolidated_metrics_df) + + + +def pivot_metrics(consolidated_metrics_df): + + ''' Pivots metrics to provide summary of results ''' + + # pivot out + consolidated_metrics_pivot = pd.pivot_table( + consolidated_metrics_df, + values=['true_positives_count','false_positives_count', + 'false_negatives_count','true_negatives_count'], + index=['magnitude','extent_config','version'], + aggfunc=sum + ) + + + return( apply_metrics_to_pivot_table(consolidated_metrics_pivot) ) + + + +def apply_metrics_to_pivot_table(consolidated_metrics_pivot): + + # metrics to run + metrics_functions = { 'CSI': csi , 'TPR' : tpr, 'FAR' : far, 'MCC' : mcc } + #metrics_functions = { 'CSI': csi } + + def row_wise_function(df,mn,mf): + + ''' applies function along rows of dataframes ''' + + return( + pd.Series( + mf( + df['true_positives_count'], + df['false_positives_count'], + df['false_negatives_count'], + df['true_negatives_count'] + ), + name=mn + ) + ) + + column_generator = ( row_wise_function(consolidated_metrics_pivot,met_name,met_func) + for met_name,met_func in metrics_functions.items() + ) + + consolidated_secondary_metrics = pd.concat(column_generator,axis=1) + + return(consolidated_secondary_metrics) + + +def find_matching_rows_by_attribute_value(df,attributes,matches): + + if (len(matches) == 1) & ('all' in matches): + return(df) + else: + df = df.loc[df.loc[:,attributes].isin(matches)] + + return(df) + + +def return_dataframe_for_benchmark_source(benchmarks,zones=['total_area']): + + """ returns a dataframe of results given a name for a benchmark source """ + + benchmark_function_dict = { + 'ble' : consolidate_metrics('ble',zones), + 'ifc' : consolidate_metrics('ifc',zones) + } + + # if all cycle through all functions, else go through selection + if (len(benchmarks)== 1) & ('all' in benchmarks): + + for f in benchmark_function_dict.values(): + yield(f) + + else: + + for b in benchmarks: + try: + yield( benchmark_function_dict[b] ) + except KeyError: + raise ValueError(f"Benchmark '{b}' not supported. "\ + f"Pass 'all' or one of these"\ + f"{list(benchmark_function_dict.keys())}") + + +def consolidate_metrics(benchmark,zones=['total_area']): + + """ consolidates ble metrics """ + + # get filenames for metrics for ble + files_to_consolidate = list() + for zone in zones: + file_pattern_to_glob = os.path.join(TEST_CASES_DIR,f'{benchmark}_test_cases','**',f'{zone}_stats.csv') + files_to_consolidate.extend(glob(file_pattern_to_glob, recursive=True)) + + # make a metrics dataframe generator + metrics_df_generator = metrics_data_frame_generator_from_files(files_to_consolidate) + + # concat said generator of files + metrics_df = pd.concat(metrics_df_generator, ignore_index=True) + + # make a meta-data + metadata_df = metadata_dataframe_from_file_names_for_ble(files_to_consolidate) + + # concat metrics and metadata dataframes + return( + pd.concat( (metadata_df,metrics_df), axis=1) + ) + + +def metrics_data_frame_generator_from_files(file_names): + + """ Reads in metrics dataframes from filenames """ + + for f in file_names: + yield pd.read_csv(f,index_col=0).T.reset_index(drop=True) + + +def metadata_dataframe_from_file_names_for_ble(file_names): + + """ Makes a dataframe for meta-data """ + + file_name_index_dict = { + 'benchmark_source' : parse_benchmark_source(file_names), + 'version' : parse_version_name(file_names), + 'huc' : parse_huc(file_names), + 'magnitude' : parse_magnitude(file_names), + 'extent_config' : parse_eval_metadata(file_names,'model'), + 'calibrated' : parse_eval_metadata(file_names,'calibrated') + } + + return( + pd.DataFrame(file_name_index_dict) + ) + + +def parse_benchmark_source(file_names): + + """ Parses benchmark source """ + + for f in file_names: + yield f.split('/')[3].split('_')[0] + + +def parse_version_name(file_names): + + """ Parses version name """ + + for f in file_names: + yield f.split('/')[6] + + +def parse_magnitude(file_names): + + """ Parses magnitude """ + + for f in file_names: + yield f.split('/')[7] + + +def parse_huc(file_names): + + """ parses huc """ + + for f in file_names: + yield f.split('/')[4].split('_')[0] + + +def parse_eval_metadata(file_names,metadata_field): + + """ parsing eval metadata json files """ + + for f in file_names: + root_dir = os.path.abspath(os.sep) + dir_of_metadata = f.split('/')[0:7] + eval_metadata_filepath = os.path.join(root_dir,*dir_of_metadata,'eval_metadata.json') + + # read eval data, if no file write None + try: + with open(eval_metadata_filepath) as fObj: + eval_metadata = json.load(fObj) + except FileNotFoundError: + yield None + else: + yield eval_metadata[metadata_field] + + +if __name__ == '__main__': + + ######################################################## + ''' + Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. + ''' + + ######################################################## + + # Parse arguments. + parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') + parser.add_argument('-b','--benchmarks',help='Allowed benchmarks', required=False, default='all', nargs="+") + parser.add_argument('-v','--versions',help='Allowed versions', required=False, default='all', nargs="+") + parser.add_argument('-z','--zones',help='Allowed zones', required=False, default='total_area', nargs="+") + parser.add_argument('-o','--metrics-output-csv',help='File path to outputs csv', required=False, default=None) + parser.add_argument('-i','--impute-missing_ms',help='Imputes FR metrics in HUCS with no MS. Only supports one version per extent config', required=False, action='store_true',default=False) + + args = vars(parser.parse_args()) + + Consolidate_metrics(**args) diff --git a/tools/copy_test_case_folders.py b/tools/copy_test_case_folders.py new file mode 100644 index 000000000..c2e16fe23 --- /dev/null +++ b/tools/copy_test_case_folders.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 + +import os +import argparse +import shutil +import sys + +# importing python folders in other direcories +sys.path.append('/foss_fim/src/') +import aggregate_branch_lists as agg + +####### +# Feb 16, 2023: This file is likely now deprecated. If it is used again, the aggregate_branch_lists file will need to +# be reconsiiled as a newer one exists in the src directory with a similar name. +####### + +def copy_folders(folder_name_list, + source_dir, + target_dir, + create_fim_input_list=False): + + ''' + Summary: Scans the source_directory looking for huc values from the huc list. Once found, + the huc directory is copied to the target directory. All recusive files/folders wil be copied. + + Line items each be on a new line (ie "\n") + + ***** Note: The tool will still work, if the list is not a list of hucs. + It can be just a list of folder names. + Input: + - folder_name_list: A file and path to a .txt or .lst file with a list of line delimited huc numbers. + - source_dir: The root folder where the huc (or named) folders reside. + - target_dir: The root folder where the huc folders will be copied to. Note. All contents of + each huc folder, including branch folders if applicable, will be copied, in the extact + structure as the source directory. Note: The target folder need not pre-exist. + - create_fim_input_list: If this flag is set to True, after coping the folders, the + "aggregate_branch_lists.py" file will be called in order to make the fim_input.csv file. + The fim_input.csv is required for futher processing such as reprocessing branchs or set up + for test cases. + Output: + - A copy of huc directories (or named folders) as listed in the folder_name_list. + ''' + + if (not os.path.exists(folder_name_list)): + raise FileNotFoundError(f"Sorry. The file {folder_name_list} does not exist") + + if (not os.path.exists(source_dir)): + raise NotADirectoryError(f"Sorry. Source folder of {source_dir} does not exist") + + with open(folder_name_list, "r") as fp: + raw_folder_names = fp.read().split("\n") + + # split on new line can add an extra row of a blank value if a newline char exists on the end. + # Some lines may have extra spaces, or dups. It is ok if the value is not necessarily a huc + folder_names = set() # this will ensure unique names + for folder_name in raw_folder_names: + folder_name = folder_name.strip() + if (folder_name) != '': + folder_names.add(folder_name) + + sorted_folder_names = sorted(folder_names) + + print(f"{str(len(sorted_folder_names))} folders to be copied") + ctr = 0 + + for folder_name in sorted_folder_names: + src_folder = os.path.join(source_dir, folder_name) + if not os.path.exists(src_folder): + print(f"source folder not found: {src_folder}") + else: + target_folder = os.path.join(target_dir, folder_name) + print(f"coping folder : {target_folder}") + shutil.copytree(src_folder, target_folder, dirs_exist_ok=True) + ctr+=1 + + print(f"{str(ctr)} folders have been copied to {target_dir}") + + if create_fim_input_list == True: + # call this code, which scans each huc (unit) directory looking for the branch_id.lst + # and adds them together to create the fim_inputs.csv file + # Note: folder_name_list needs to be a huc list to work) + agg.aggregate_branch_lists(folder_name_list, target_dir, "fim_inputs.csv") + print("fim_inputs.csv created") + + +if __name__ == '__main__': + +# Remember: This is for pulling out only folders that are related to a huc list (such as an alpha test list) +# and it has to be run on each root folder, one at a time (for now. aka.. no wildcards) + +# Sample Usage: +#python /foss_fim/tools/copy_test_case_folders.py -f /data/inputs/huc_lists/huc_list_for_alpha_tests_20220420.lst -s /outputs/copy_test_synth/ -t /outputs/copy_test_synth_combined -a + +# NOTE the 'a' at the end meaning go ahead create the gms_input.csv. This is normally +# left for the last folder to be copied over. + + parser = argparse.ArgumentParser(description='Based on the huc list parameter, ' \ + 'find and copy the full huc (or other) directories.') + + parser.add_argument('-f','--folder_name_list', + help='file and path to the huc list. Note: The list does not ' \ + 'necessarily be a list of huc, just a list of unique values', + required=True) + parser.add_argument('-s','--source_dir', + help='Source folder to be scanned for unique folders', + required=True) + parser.add_argument('-t','--target_dir', + help='Target folder where the folders will be copied to', + required=True) + + parser.add_argument('-a','--create_fim_input_list', + help='Create the fim_input.csv list after copying', + required=False, default=False, action='store_true') + + args = vars(parser.parse_args()) + + copy_folders(**args) diff --git a/tools/cygnss_preprocessing.py b/tools/cygnss_preprocessing.py new file mode 100755 index 000000000..5d4450834 --- /dev/null +++ b/tools/cygnss_preprocessing.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 + +from shapely.geometry import box +import datetime +import numpy as np +import pandas as pd +import geopandas as gpd +import argparse +from datetime import datetime +from make_boxes_from_bounds import find_hucs_of_bounding_boxes +import requests +from concurrent.futures import ThreadPoolExecutor,as_completed +import os +from tqdm import tqdm +from foss_fim.tools.inundation import read_nwm_forecast_file + + +################################## +## +## Likely Deprecated: File appears to be no longer used. Noticed Jan 16, 2023 +## Might want to be kept for possible re-use at a later time? +## +################################## + + +def cygnss_preprocessing(bounding_boxes_file,wbd=None,projection_of_boxes='EPSG:4329',wbd_layer='WBDHU8',forecast_output_file=None,retrieve=True,workers=6,download_directory=None, daily_mean_forecast_files=None): + + _, bounding_boxes = find_hucs_of_bounding_boxes(bounding_boxes_file,wbd=wbd,projection_of_boxes=projection_of_boxes,wbd_layer=wbd_layer) + + # load bounding box file + bounding_boxes['event_date'] = pd.to_datetime(bounding_boxes['event_date'],utc=True) + bounding_boxes.reset_index(drop=True,inplace=True) + + wbdcol_name = 'HUC'+wbd_layer[-1] + + # expand dates + datetime_indices = bounding_boxes.apply(lambda df:pd.date_range(df['event_date'],periods=24,closed=None,freq='H',tz='UTC'),axis=1) + + datetime_indices.name = 'date_time' + datetime_indices=pd.DataFrame(datetime_indices) + datetime_indices = datetime_indices.join(bounding_boxes[['Name',wbdcol_name]]) + + # append columns to expanded dates + forecast_df = pd.DataFrame() + for idx,row in datetime_indices.iterrows(): + dt_df = row['date_time'].to_frame(index=False,name='date_time') + + row = row.drop('date_time') + + dt_df = dt_df.join(pd.concat([pd.DataFrame(row).T]*len(dt_df),ignore_index=True)) + + forecast_df = pd.concat((forecast_df,dt_df),ignore_index=True) + + # add extra dry date + #additional_date_df = forecast_df[['Name',wbdcol_name]].merge(bounding_boxes[['additional_date',wbdcol_name]],left_on=wbdcol_name,right_on=wbdcol_name).drop_duplicates(ignore_index=True) + + #forecast_df = pd.concat((forecast_df,additional_date_df.rename(columns={'additional_date':'date_time'})),ignore_index=True) + + forecast_df = forecast_df.sort_values(['Name',wbdcol_name],ignore_index=True) + + + #forecast_df['date_time'] = forecast_df.apply(lambda df : df['date_time'].replace(hour=18,minute=0),axis=1) + + forecast_df = forecast_df.rename(columns={wbdcol_name:'huc'}) + + forecast_df = construct_nwm_forecast_filenames_and_retrieve(forecast_df,download_directory,retrieve=retrieve,workers=workers) + + + # take daily means + def get_forecast(forecast_row): + + try: + forecast_table = read_nwm_forecast_file(forecast_row['forecast_file']) + except FileNotFoundError: + print(f"Skipping file {forecast_row['forecast_file']}") + return None + + return(forecast_table) + + # filter out 2021 + years = forecast_df['date_time'].dt.year < 2021 + forecast_df = forecast_df.loc[years,:] + + # unique dates and hourly samples + unique_forecast_df = forecast_df.drop(columns='huc').drop_duplicates() + unique_forecast_df.reset_index(inplace=True,drop=True) + + dates = unique_forecast_df["date_time"].map(lambda t: t.date()) + unique_dates, hourly_samples_per_date = np.unique(dates,return_counts=True) + + unique_dict = dict(zip(unique_dates,hourly_samples_per_date)) + + final_forecast_df = forecast_df.groupby(pd.Grouper(key='date_time',freq='d')).first().dropna() + final_forecast_df.set_index(pd.to_datetime(final_forecast_df.index.date,utc=True),drop=True,inplace=True) + forecast_df_dates = forecast_df.copy() + forecast_df_dates.date_time = forecast_df.date_time.dt.date + final_forecast_df = final_forecast_df.merge(forecast_df,left_index=True, right_on='date_time') + + final_forecast_df.reset_index(drop=True,inplace=True) + final_forecast_df['date_time'] = final_forecast_df.date_time.dt.date + + final_forecast_df.drop(columns={'huc_x','Name_x','forecast_file_x','forecast_url_x','forecast_url_y'},inplace=True) + + final_forecast_df.rename(columns={'Name_y':'Name','huc_y':'huc','forecast_file_y':'forecast_file'},inplace=True) + + def update_daily(daily_nwm_forecast_df,current_date,daily_mean_forecast_files,final_forecast_df): + daily_mean = daily_nwm_forecast_df.mean(axis=1).rename('discharge') + + current_date_string = current_date.strftime(format='%Y%m%d') + filename,ext = os.path.basename(daily_mean_forecast_files).split('.') + outfile = os.path.join(os.path.dirname(daily_mean_forecast_files),filename+'_'+ current_date_string + '.'+ext) + daily_mean.to_csv(outfile,index=True) + + final_forecast_df.loc[final_forecast_df['date_time'] == current_date,'forecast_file'] = outfile + + daily_nwm_forecast_df = None + + return(daily_nwm_forecast_df,current_date,daily_mean_forecast_files,final_forecast_df) + + + daily_nwm_forecast_df = None + ii = 0 + current_date_time = unique_forecast_df.loc[0,'date_time'] + current_date = current_date_time.date() + for i,row in tqdm(unique_forecast_df.iterrows(),total=len(unique_forecast_df),desc='Daily Means'): + + current_nwm_forecast_df = get_forecast(row) + + if ii == unique_dict[current_date]: + + daily_nwm_forecast_df,current_date,daily_mean_forecast_files,final_forecast_df = update_daily(daily_nwm_forecast_df,current_date,daily_mean_forecast_files,final_forecast_df) + + if current_nwm_forecast_df is not None: + + if daily_nwm_forecast_df is None: + ii = 0 + daily_nwm_forecast_df = pd.DataFrame(np.empty((len(current_nwm_forecast_df),unique_dict[current_date]))) + daily_nwm_forecast_df.set_index(current_nwm_forecast_df.index,inplace=True,drop=True) + + daily_nwm_forecast_df.loc[:,ii] = current_nwm_forecast_df.discharge + #daily_nwm_forecast_df.rename(columns={ii:current_date_time}) + + ii += 1 + + current_date_time = row['date_time'] + current_date = current_date_time.date() + + daily_nwm_forecast_df,current_date,daily_mean_forecast_files,final_forecast_df = update_daily(daily_nwm_forecast_df,current_date,daily_mean_forecast_files,final_forecast_df) + + + if forecast_output_file is not None: + #final_forecast_df.to_csv(forecast_output_file,index=False,date_format='%Y-%m-%d %H:%M:%S%Z') + final_forecast_df.to_csv(forecast_output_file,index=False,date_format='%Y-%m-%d') + + + +def construct_nwm_forecast_filenames_and_retrieve(forecast_df,download_directory,retrieve=True,workers=1): + # make forecast file names for NWM and retrieve + + #construct url + #url = f'{year}/{year}{month}{day}{time}.CHRTOUT_DOMAIN1.comp' + + make_url = lambda df: "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/model_output/" + \ + str(df['date_time'].year) + '/' + str(df['date_time'].year) + \ + str(df['date_time'].month).zfill(2) + \ + str(df['date_time'].day).zfill(2) + \ + str(df['date_time'].hour).zfill(2) + \ + str(df['date_time'].minute).zfill(2) +\ + ".CHRTOUT_DOMAIN1.comp" + + def make_file_names(df,download_directory): + # assumes that the last segment after the / represents the file name + url = df['forecast_url'] + file_name_start_pos = url.rfind("/") + 1 + file_name = url[file_name_start_pos:] + + file_name = os.path.join(download_directory,file_name) + + return(file_name) + + + urls = forecast_df.apply(make_url,axis=1) + forecast_df['forecast_url'] = urls + + file_names = forecast_df.apply(lambda df: make_file_names(df,download_directory),axis=1) + forecast_df['forecast_file'] = file_names + + if not retrieve: + return(forecast_df) + + download_df = forecast_df[['forecast_url','forecast_file']].drop_duplicates() + + + def download_url(url,file_name): + r = requests.get(url, stream=True) + if r.status_code == requests.codes.ok: + with open(file_name, 'wb') as f: + for data in r: + f.write(data) + return url + + pool = ThreadPoolExecutor(max_workers=workers) + + results = { pool.submit(download_url,*(url,file_name)) : (url,file_name) for idx,(url,file_name) in download_df.iterrows() } + + for future in tqdm(as_completed(results),total=len(download_df),desc='Acquiring NWM forecasts'): + + url,file_name = results[future] + + try: + future.result() + except Exception as exc: + print('error',exc,url) + + pool.shutdown(wait=True) + + return(forecast_df) + + +if __name__ == '__main__': + + + ################################## + ## + ## Likely Deprecated: File appears to be no longer used. Noticed Jan 16, 2023 + ## Might want to be kept for possible re-use at a later time? + ## + ################################## + + # parse arguments + parser = argparse.ArgumentParser(description='Find hucs for bounding boxes') + parser.add_argument('-b','--bounding-boxes-file', help='Bounding box file', required=True) + parser.add_argument('-w','--wbd', help='WBD file', required=True) + parser.add_argument('-f','--forecast-output-file', help='Forecast file', required=False,default=None) + parser.add_argument('-r','--retrieve', help='Forecast file', required=False,default=False,action='store_true') + parser.add_argument('-j','--workers', help='Forecast file', required=False,default=1,type=int) + parser.add_argument('-d','--download_directory', help='Forecast file', required=False,default=1) + parser.add_argument('-m','--daily-mean-forecast-files', help='Daily Mean Forecast file', required=False,default=None) + + args=vars(parser.parse_args()) + + cygnss_preprocessing(**args) diff --git a/tools/eval_alt_catfim.py b/tools/eval_alt_catfim.py index ffe86cf0e..d5ce867d8 100644 --- a/tools/eval_alt_catfim.py +++ b/tools/eval_alt_catfim.py @@ -195,8 +195,8 @@ def process_alt_comparison(args): mask_values=None stats_modes_list=['total_area'] test_id='' - mask_dict={'levees': {'path': '/data/test_cases/other/zones/leveed_areas_conus.shp', 'buffer': None, 'operation': 'exclude'}, - 'waterbodies': {'path': '/data/test_cases/other/zones/nwm_v2_reservoirs.shp', 'buffer': None, 'operation': 'exclude'}, + mask_dict={'levees': {'path': '/data/inputs/nld_vectors/Levee_protected_areas.gpkg', 'buffer': None, 'operation': 'exclude'}, + 'waterbodies': {'path': '/data/inputs/nwm_hydrofabric/nwm_lakes.gpkg', 'buffer': None, 'operation': 'exclude'}, site: {'path': '/data/test_cases/{benchmark_type}_test_cases/validation_data_{benchmark_type}/{huc}/{site}/{site}_domain.shp'.format(benchmark_type=benchmark_type, site=site, category=category, huc=huc), 'buffer': None, 'operation': 'include'}} json_list.append(stats_json) diff --git a/tools/eval_plots.py b/tools/eval_plots.py index 6c2dcf00f..256342015 100644 --- a/tools/eval_plots.py +++ b/tools/eval_plots.py @@ -8,6 +8,7 @@ import matplotlib.pyplot as plt import seaborn as sns import re +import glob import os import sys sys.path.append('/foss_fim/src') @@ -24,7 +25,9 @@ ######################################################################### #Create boxplot ######################################################################### -def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False): +def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, + title_text, fim_configuration, textbox_str = False, + simplify_legend = False, dest_file = False): ''' Create boxplots. @@ -86,15 +89,17 @@ def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ elif 'fim_2' in label: label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() elif 'fim_3' in label and len(label) < 20: - label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + label_dict[label] = re.split('_fr|_ms|_comp', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() if label.endswith('_c'): label_dict[label] = label_dict[label] + ' c' + elif 'fim_4' in label and len(label) < 20: + label_dict[label] = label.replace('_','.').replace('fim.','FIM ') else: label_dict[label] = label #Define simplified labels as a list. new_labels = [label_dict[label] for label in org_labels] #Define legend location. FAR needs to be in different location than CSI/POD. - if y_field == 'FAR': + if y_field in ['FAR', 'PND','EQUITABLE_THREAT_SCORE']: legend_location = 'upper right' else: legend_location = 'lower left' @@ -151,7 +156,7 @@ def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annot #Set xticks and yticks and background horizontal line. axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) axes.set(xlim=(0.0,1.0),xticks = np.arange(0,1.1,0.1)) - axes.grid(b=True, which='major', axis='both') + axes.grid(visible=True, which='major', axis='both') #Set sizes of ticks and legend. axes.tick_params(labelsize = 'xx-large') @@ -189,7 +194,9 @@ def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annot ######################################################################### #Create barplot ######################################################################### -def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False): +def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, + title_text, fim_configuration, textbox_str = False, + simplify_legend = False, display_values = False, dest_file = False): ''' Create barplots. @@ -253,9 +260,11 @@ def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ elif 'fim_2' in label: label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() elif 'fim_3' in label and len(label) < 20: - label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + label_dict[label] = re.split('_fr|_ms|_comp', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() if label.endswith('_c'): label_dict[label] = label_dict[label] + ' c' + elif 'fim_4' in label and len(label) < 20: + label_dict[label] = label.replace('_','.').replace('fim.','FIM ') else: label_dict[label] = label #Define simplified labels as a list. @@ -339,7 +348,9 @@ def filter_dataframe(dataframe, unique_field): ############################################################################## #Main function to analyze metric csv. ############################################################################## -def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , spatial = False, fim_1_ms = False, site_barplots = False): +def eval_plots(metrics_csv, workspace, versions = [], + stats = ['CSI','FAR','TPR','PND','MCC','EQUITABLE_THREAT_SCORE'] , spatial = False, + fim_1_ms = False, site_barplots = False): ''' Creates plots and summary statistics using metrics compiled from @@ -454,7 +465,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' # If versions are supplied then filter out if versions: #Filter out versions based on supplied version list - metrics = csv_df.query('version.str.startswith(tuple(@versions))') + metrics = csv_df.query('version.str.startswith(tuple(@versions))', engine='python') else: metrics = csv_df @@ -481,13 +492,13 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' base_resolution = 'nws_lid' # Filter the dataset based on query (IMPORTED FROM TOOLS_SHARED_VARIABLES.py) - ahps_metrics = benchmark_metrics.query(DISCARD_AHPS_QUERY) + ahps_metrics = benchmark_metrics.query(DISCARD_AHPS_QUERY, engine='python') # Filter out all instances where the base_resolution doesn't exist across all desired fim versions for a given magnitude all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(ahps_metrics, base_resolution) # If source is 'ble', set base_resolution and append ble dataset to all_datasets dictionary - elif benchmark_source in ['ble', 'ifc']: + elif benchmark_source in ['ble', 'ifc', 'ras2fim']: # Set the base processing unit for ble runs base_resolution = 'huc' @@ -514,6 +525,9 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' elif dataset_name == 'ifc': magnitude_order = ['2yr','5yr','10yr','25yr','50yr','100yr','200yr','500yr'] base_resolution = 'huc' + elif dataset_name == 'ras2fim': + magnitude_order = ['2yr','5yr','10yr','25yr','50yr','100yr'] + base_resolution = 'huc' elif dataset_name in ['usgs','nws']: magnitude_order = ['action','minor','moderate','major'] base_resolution = 'nws_lid' @@ -597,14 +611,19 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ############################################################### #This section will join ahps metrics to a spatial point layer ############################################################### - if all_datasets.get(('nws','MS')) and all_datasets.get(('usgs','MS')): + if (all_datasets.get(('nws','MS')) and all_datasets.get(('usgs','MS'))) or \ + (all_datasets.get(('nws','COMP')) and all_datasets.get(('usgs','COMP'))): # export composite metrics to shp #Get point data for ahps sites #Get metrics for usgs and nws benchmark sources - usgs_dataset,sites = all_datasets.get(('usgs','MS')) - nws_dataset, sites = all_datasets.get(('nws','MS')) + try: + usgs_dataset,sites = all_datasets.get(('usgs','MS')) + nws_dataset, sites = all_datasets.get(('nws','MS')) + except TypeError: # Composite metrics + usgs_dataset,sites = all_datasets.get(('usgs','COMP')) + nws_dataset, sites = all_datasets.get(('nws','COMP')) #Append usgs/nws dataframes and filter unnecessary columns and rename remaining. - all_ahps_datasets = usgs_dataset.append(nws_dataset) - all_ahps_datasets = all_ahps_datasets.filter(['huc','nws_lid','version','magnitude','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source']) + all_ahps_datasets = pd.concat([usgs_dataset, nws_dataset]) + all_ahps_datasets = all_ahps_datasets.filter(['huc','nws_lid','version','magnitude','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','PND','MCC','EQUITABLE_THREAT_SCORE','benchmark_source']) all_ahps_datasets.rename(columns = {'benchmark_source':'source'}, inplace = True) #Get spatial data from WRDS @@ -631,24 +650,45 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ################################################################ #This section joins ble (FR) metrics to a spatial layer of HUCs. ################################################################ - if all_datasets.get(('ble','FR')) and all_datasets.get(('ifc','FR')): + if (all_datasets.get(('ble','FR')) and all_datasets.get(('ifc','FR')) and all_datasets.get(('ras2fim','FR'))) or \ + (all_datasets.get(('ble','COMP')) and all_datasets.get(('ifc','COMP')) and all_datasets.get(('ras2fim','COMP'))): #Select BLE, FR dataset. - ble_dataset, sites = all_datasets.get(('ble','FR')) - ifc_dataset, sites = all_datasets.get(('ifc','FR')) - huc_datasets = ble_dataset.append(ifc_dataset) + try: + ble_dataset, sites = all_datasets.get(('ble','FR')) + ifc_dataset, sites = all_datasets.get(('ifc','FR')) + ras2fim_dataset, sites = all_datasets.get(('ras2fim','FR')) + except TypeError: + ble_dataset, sites = all_datasets.get(('ble','COMP')) + ifc_dataset, sites = all_datasets.get(('ifc','COMP')) + ras2fim_dataset, sites = all_datasets.get(('ras2fim','COMP')) + + huc_datasets = pd.concat([ble_dataset, ifc_dataset, ras2fim_dataset]) #Read in HUC spatial layer wbd_gdf = gpd.read_file(Path(WBD_LAYER), layer = 'WBDHU8') #Join metrics to HUC spatial layer wbd_with_metrics = wbd_gdf.merge(huc_datasets, how = 'inner', left_on = 'HUC8', right_on = 'huc') #Filter out unnecessary columns - wbd_with_metrics = wbd_with_metrics.filter(['version','magnitude','huc','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source','geometry']) + wbd_with_metrics = wbd_with_metrics.filter(['version','magnitude','huc','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','PND','MCC','EQUITABLE_THREAT_SCORE','benchmark_source','geometry']) wbd_with_metrics.rename(columns = {'benchmark_source':'source'}, inplace = True ) #Project to VIZ projection wbd_with_metrics = wbd_with_metrics.to_crs(VIZ_PROJECTION) #Write out to file wbd_with_metrics.to_file(Path(workspace) / 'fim_performance_polys.shp') else: - print('BLE/IFC FR datasets not analyzed, no spatial data created.\nTo produce spatial data analyze a FR version') + print('BLE/IFC/RAS2FIM FR datasets not analyzed, no spatial data created.\nTo produce spatial data analyze a FR version') + +def convert_shapes_to_csv(workspace): + + # Convert any geopackage in the root level of output_mapping_dir to CSV and rename. + shape_list = glob.glob(os.path.join(workspace, '*.shp')) + for shape in shape_list: + gdf = gpd.read_file(shape) + parent_directory = os.path.split(shape)[0] + file_name = shape.replace('.shp', '.csv') + csv_output_path = os.path.join(parent_directory, file_name) + gdf.to_csv(csv_output_path) + + ####################################################################### if __name__ == '__main__': # Parse arguments @@ -656,7 +696,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' parser.add_argument('-m','--metrics_csv', help = 'Metrics csv created from synthesize test cases.', required = True) parser.add_argument('-w', '--workspace', help = 'Output workspace', required = True) parser.add_argument('-v', '--versions', help = 'List of versions to be plotted/aggregated. Versions are filtered using the "startswith" approach. For example, ["fim_","fb1"] would retain all versions that began with "fim_" (e.g. fim_1..., fim_2..., fim_3...) as well as any feature branch that began with "fb". An other example ["fim_3","fb"] would result in all fim_3 versions being plotted along with the fb.', nargs = '+', default = []) - parser.add_argument('-s', '--stats', help = 'List of statistics (abbrev to 3 letters) to be plotted/aggregated', nargs = '+', default = ['CSI','TPR','FAR'], required = False) + parser.add_argument('-s', '--stats', help = 'List of statistics (abbrev to 3 letters) to be plotted/aggregated', nargs = '+', default = ['CSI','TPR','FAR','PND','MCC','EQUITABLE_THREAT_SCORE'], required = False) parser.add_argument('-sp', '--spatial', help = 'If enabled, creates spatial layers with metrics populated in attribute table.', action = 'store_true', required = False) parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) parser.add_argument('-i', '--site_plots', help = 'If enabled individual barplots for each site are created.', action = 'store_true', required = False) @@ -677,3 +717,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' print('The following AHPS sites are considered "BAD_SITES": ' + ', '.join(BAD_SITES)) print('The following query is used to filter AHPS: ' + DISCARD_AHPS_QUERY) eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, spatial = sp, fim_1_ms = f, site_barplots = i) + + # Convert output shapefiles to CSV + print("Converting to CSVs...") + convert_shapes_to_csv(w) diff --git a/tools/eval_plots_stackedbar.py b/tools/eval_plots_stackedbar.py new file mode 100644 index 000000000..8cf8705fc --- /dev/null +++ b/tools/eval_plots_stackedbar.py @@ -0,0 +1,324 @@ +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import sys +import argparse +from pathlib import Path +sys.path.append('/foss_fim/tools/') +from tools_shared_variables import BAD_SITES + +from matplotlib.patches import Patch +from matplotlib.lines import Line2D +import mpl_toolkits.axisartist as AA +mag_dict = {'action':'1_act', 'minor':'2_min', 'moderate':'3_mod', 'major':'4_maj', + '100yr':100, '500yr':500, '2yr':2,'5yr':5, + '10yr':10, '25yr':25, '50yr':50, '200yr':200} +inverted_mag = dict((v, k) for k, v in mag_dict.items()) + +def eval_plot_stack_data_prep(metric_csv, versions=[]): + + # Load in FIM 4 CSV and select the proper version metrics + metrics = pd.read_csv(metric_csv, dtype={'huc':str}) + if versions: + versions = list(versions) + # Check to make sure requested versions are in the metrics file + for v in versions: + assert v in metrics.version.unique(), f"{v} is not included in {metric_csv}.\nThe available options are {sorted(metrics.version.unique())}" + # Filter the metrics to the requested versions + metrics = metrics.loc[metrics.version.isin(versions)] + # Change the magnitudes to make them more sort-friendly for the plot below + metrics.magnitude = metrics.magnitude.apply(lambda m: mag_dict[m]) + # Fill the non-AHPS sites with values for nws_lid + metrics.nws_lid.fillna(metrics.huc +'_'+ metrics.benchmark_source, inplace=True) + # Create multi-index for easy merging and plotting + metrics.set_index(['benchmark_source', 'nws_lid', 'magnitude'], inplace=True, drop=False) + + # Normalize data to the total of TP + FN + metrics['TP_FN_sum'] = metrics['false_negatives_count'] + metrics['true_positives_count'] + metrics['TP_norm'] = metrics['true_positives_count'] / metrics['TP_FN_sum'] + metrics['FN_norm'] = metrics['false_negatives_count'] / metrics['TP_FN_sum'] + metrics['FP_norm'] = metrics['false_positives_count'] / metrics['TP_FN_sum'] + metrics['FP_norm'].replace([np.inf, -np.inf], np.nan, inplace=True) # some bad sites will divide by 0 + + return metrics + +def eval_plot_stack_indiv(metric_csv, versions, outfig, category): + + # Check inputs + assert category in ('nws','usgs','ble','ifc','ras2fim'), f"category must be one of ('nws','usgs','ble','ifc','ras2fim'), not {category}" + + metrics_df = eval_plot_stack_data_prep(metric_csv, versions) + + # Filter the plotting data to the selected category + data = metrics_df.loc[(category)] + num_subplots = len(data.nws_lid.unique()) + xmax = data['FP_norm'].max() + 1 + # Save version input order for plotting + version_dict = {v:i for i,v in enumerate(versions)} + data = data.assign(plot_order=data['version']) + data['plot_order'] = data.plot_order.map(version_dict) + data['magnitude_'] = data.magnitude + # Create the plot + fig, ax = plt.subplots(num_subplots, 1, figsize=(8, len(data)*0.25), dpi=100, facecolor='white', subplot_kw={'axes_class':AA.Axes}) + # Create a subplot for every site (nws_lid) + for i, site in enumerate(sorted(data.nws_lid.unique())): + subplot_data = data.loc[(site)] + subplot_data = subplot_data.sort_values(['magnitude_','plot_order'], ascending=False) + new_y = [j*1.25 for j in range(len(subplot_data))] + # Stacked horizontal bar plots + ax[i].barh(y=new_y, width='TP_norm', left=0.0, color='#2c7bb6', + linewidth=0.5, data=subplot_data) + ax[i].barh(y=new_y, width='FN_norm', left='TP_norm', color='#fdae61', + linewidth=0.5, data=subplot_data) + ax[i].barh(y=new_y, width='FP_norm', left=1.0, color='#d7191c', + linewidth=0.5, data=subplot_data) + # Plot the CSI and MCC scores + ax[i].scatter(y=new_y, x=subplot_data['CSI'].array, c='k', s=15, marker=r'x', zorder=3, linewidth=0.75) + ax[i].scatter(y=new_y, x=subplot_data['MCC'].array, s=15, marker=r'o', zorder=3, linewidths=.75, facecolor='None', edgecolors='k') + # Various axis customizations + ax[i].set_yticks(new_y, labels=subplot_data.version + ' ' + subplot_data.magnitude.map(inverted_mag).astype(str)) + ax[i].axis["left"].label.set(visible=True, text=site, rotation=90, pad=10, ha='right') + ax[i].axis["left"].major_ticks.set(tick_out=True) + ax[i].axis["left"].major_ticklabels.set(ha='left') + ax[i].set_xlim(0, xmax) + ax[i].set_ylim(-1, new_y[-1]+1) + num_mags = len(subplot_data.magnitude.unique()) + hlines = [j for j in np.linspace(new_y[-1]/num_mags, new_y[-1], num_mags-1, endpoint=False)] + ax[i].hlines(hlines, xmin=0, xmax=xmax, colors=['0.9' if i % 2 == 0 else 'w']*(num_mags-1), linestyles='dotted', zorder=0) + ax[i].grid(axis='x', color='0.8', zorder=0) + ax[i].axis['bottom'].set_visible(True if i == num_subplots-1 else False) + ax[i].axis['top'].set_visible(True if i == 0 else False) + ax[i].axis['top'].major_ticklabels.set_visible(True if i == 0 else False) + ax[i].set_facecolor('w' if i % 2 == 0 else '0.9') + # Label sites that have been identified as "Bad" + if site and site in BAD_SITES: + ax[i].text(xmax/2, new_y[-1]/2, '--BAD SITE--', horizontalalignment='center', verticalalignment='center') + ax[i].set_facecolor('0.67') + plt.subplots_adjust(wspace=0, hspace=0) + ax[0].set_title(f'{category.upper()} FIM Evaluation | Individual Sites', loc='center', pad=40) + TP_patch = Patch(color='#2c7bb6', linewidth=0.5, label=f'True Positive') + FN_patch = Patch(color='#fdae61', linewidth=0.5, label=f'False Negative') + FP_patch = Patch(color='#d7191c', linewidth=0.5, label=f'False Positive') + x_marker = Line2D([0], [0], marker='x', color='None', markeredgecolor='k', markerfacecolor='k', label='CSI Score', markersize=5, linewidth=0.5) + o_marker = Line2D([0], [0], marker='o', color='None', markeredgecolor='k', markerfacecolor='None', label='MCC Score', markersize=5, linewidth=10) + # Get the height of the figure in pixels so we can put the legend in a consistent position + ax_pixel_height = ax[0].get_window_extent().transformed(fig.dpi_scale_trans.inverted()).height + handles = [TP_patch, FN_patch, FP_patch, x_marker, o_marker] + ax[0].legend(loc='center', ncol=len(handles), handles=handles, fontsize=8, columnspacing=1, + bbox_to_anchor=(0.5, (0.3+ax_pixel_height)/ax_pixel_height)) + plt.savefig(outfig, bbox_inches='tight') + + return metrics_df + +def eval_plot_stack(metric_csv, versions, category, outfig, show_iqr=False): + + # Check inputs + assert category in ('nws','usgs','ble','ifc','ras2fim'), f"category must be one of ('nws','usgs','ble','ifc','ras2fim'), not {category}" + + metrics_df = eval_plot_stack_data_prep(metric_csv, versions) + metrics_df = metrics_df.loc[~metrics_df.nws_lid.isin(BAD_SITES)] + grouped = metrics_df.reset_index(drop=True).groupby(['benchmark_source','version','magnitude'], sort=False) + count_df = grouped.count()['CSI'] + metrics_df = grouped.median() + metrics_df['TP_norm_q1'] = metrics_df['TP_norm'] - grouped['TP_norm'].quantile(0.25) + metrics_df['TP_norm_q3'] = grouped['TP_norm'].quantile(0.75) - metrics_df['TP_norm'] + metrics_df['FP_norm_q1'] = metrics_df['FP_norm'] - grouped['FP_norm'].quantile(0.25) + metrics_df['FP_norm_q3'] = grouped['FP_norm'].quantile(0.75) - metrics_df['FP_norm'] + + # Filter the plotting data to the selected category + data = metrics_df.loc[(category)].swaplevel() + num_subplots = len(data.reset_index().magnitude.unique()) + num_ver = len(data.reset_index().version.unique()) + xmax = 1+ data['FP_norm'].max() + data['FP_norm_q3'][data['FP_norm'].idxmax()] + # Save version input order for plotting + version_dict = {v:i for i,v in enumerate(versions)} + data = data.assign(plot_order=data.index.get_level_values('version')) + data['plot_order'] = data.plot_order.map(version_dict) + + # Create the plot + fig, ax = plt.subplots(num_subplots, 1, figsize=(8, len(data)*0.25), dpi=100, facecolor='white', subplot_kw={'axes_class':AA.Axes}) + # Create a subplot for every flow (nws_lid) + for i, mag in enumerate(sorted(data.reset_index().magnitude.unique())): + subplot_data = data.loc[(mag)] + subplot_data = subplot_data.sort_values(['plot_order'], ascending=False) + new_y = [j*1.25 for j in range(len(subplot_data))] + # Stacked horizontal bar plots + ax[i].barh(y=new_y, width='TP_norm', left=0.0, color='#2c7bb6', + linewidth=0.5, data=subplot_data, + xerr=[subplot_data['TP_norm_q1'], subplot_data['TP_norm_q3']] if show_iqr else None, + error_kw=dict(elinewidth=1)) + ax[i].barh(y=new_y, width='FN_norm', left='TP_norm', color='#fdae61', + linewidth=0.5, data=subplot_data) + ax[i].barh(y=new_y, width='FP_norm', left=1.0, color='#d7191c', + linewidth=0.5, data=subplot_data, + xerr=[subplot_data['FP_norm_q1'], subplot_data['FP_norm_q3']] if show_iqr else None, + error_kw=dict(elinewidth=1)) + # Plot the CSI and MCC scores + ax[i].scatter(y=new_y, x=subplot_data['CSI'].array, c='k', s=15, marker=r'x', zorder=3, linewidth=0.75) + ax[i].scatter(y=new_y, x=subplot_data['MCC'].array, s=15, marker=r'o', zorder=3, linewidths=.75, facecolor='None', edgecolors='k') + # Various axis customizations + ax[i].set_yticks(new_y, labels=subplot_data.index)#, ha='left') + ax[i].axis["left"].label.set(visible=True, text=inverted_mag[mag], rotation=90, pad=10, ha='right') + ax[i].axis["left"].major_ticks.set(tick_out=True) + ax[i].axis["left"].major_ticklabels.set(ha='left') + n = count_df.loc[(category, versions[0], mag)] + ax[i].axis["right"].label.set(visible=True, text=f'n={n}', rotation=270, pad=5, ha='left') + ax[i].set_xlim(0, xmax) + ax[i].set_ylim(-1.25, num_ver+1.25) + ax[i].grid(axis='x', color='0.8', zorder=0) + ax[i].axis['bottom'].set_visible(True if i == num_subplots-1 else False) + ax[i].axis['top'].set_visible(True if i == 0 else False) + ax[i].axis['top'].major_ticklabels.set_visible(True if i == 0 else False) + ax[i].set_facecolor('w' if i % 2 == 0 else '0.9') + plt.subplots_adjust(wspace=0, hspace=0) + ax[0].set_title(f'{category.upper()} FIM Evaluation', loc='center', pad=35) + TP_patch = Patch(color='#2c7bb6', linewidth=0.5, label=f'True Positive') + FN_patch = Patch(color='#fdae61', linewidth=0.5, label=f'False Negative') + FP_patch = Patch(color='#d7191c', linewidth=0.5, label=f'False Positive') + x_marker = Line2D([0], [0], marker='x', color='None', markeredgecolor='k', markerfacecolor='k', label='CSI Score', markersize=5, linewidth=10) + o_marker = Line2D([0], [0], marker='o', color='None', markeredgecolor='k', markerfacecolor='None', label='MCC Score', markersize=5, linewidth=10) + # Get the height of the figure in pixels so we can put the legend in a consistent position + ax_pixel_height = ax[0].get_window_extent().transformed(fig.dpi_scale_trans.inverted()).height + handles = [TP_patch, FN_patch, FP_patch, x_marker, o_marker] + ax[0].legend(loc='center', ncol=len(handles), handles=handles, fontsize=8, columnspacing=1, + bbox_to_anchor=(0.5, (0.3+ax_pixel_height)/ax_pixel_height)) + plt.savefig(outfig, bbox_inches='tight') + + return data + +def diff_bar_plots(versions, metric_csv, category, outfig, stat='CSI'): + + # Check inputs + assert category in ('nws','usgs','ble','ifc','ras2fim'), f"category must be one of ('nws','usgs','ble','ifc','ras2fim'), not {category}" + assert stat in ('CSI', 'MCC', 'TPR', 'PND', 'FAR'), f"stat must be one of ('CSI', 'MCC', 'TPR', 'PND', 'FAR'), not {stat}" + assert len(versions) == 2, f"You can only produce the comparison plots when comparing 2 fim versions. You have input {len(versions)}." + # CSI, MCC, and TPR scores are better when closer to 1, however PND and FAR scores are better when closer to 0 + better_score = 'positive' if stat in ('CSI', 'MCC', 'TPR') else 'negative' + + # Load in FIM 4 CSV and select the proper version metrics + metrics = pd.read_csv(metric_csv, dtype={'huc':str}) + # Check to make sure requested versions are in the metrics file + for v in versions: + assert v in metrics.version.unique(), f"{v} is not included in {metric_csv}.\nThe available options are {sorted(metrics.version.unique())}" + # Change the magnitudes to make them more sort-friendly for the plot below + metrics.magnitude = metrics.magnitude.apply(lambda m: mag_dict[m]) + # Fill the non-AHPS sites with values for nws_lid + metrics.nws_lid.fillna(metrics.huc +'_'+ metrics.benchmark_source, inplace=True) + # Create multi-index for easy merging and plotting + metrics.set_index(['benchmark_source', 'nws_lid', 'magnitude'], inplace=True, drop=False) + + # Separate versions into 2 dataframes + metrics_base = metrics.loc[metrics.version == versions[0]].copy() + metrics_new = metrics.loc[metrics.version == versions[1]].copy() + + # Create a new column in the base dataframe and merge it into the comparison dataframe + metrics_base[f'BASE_{stat}'] = metrics_base[stat] + metrics_base.drop(columns=stat, inplace=True) + fim_compare = metrics_base.merge(metrics_new[stat], left_index=True, right_index=True, how='inner').sort_index(level=0,sort_remaining=True) + fim_compare[f'{stat}_diff'] = fim_compare[stat] - fim_compare[f'BASE_{stat}'] + # Color the boxes according to improved vs regressed scores. See definition of 'better_score' above for details. + if better_score == 'positive': + fim_compare['color'] = fim_compare.apply(lambda row: 'None' if row[f'{stat}_diff'] < 0 else 'g', axis=1) + fim_compare['edge_color'] = fim_compare.apply(lambda row: 'r' if row[f'{stat}_diff'] < 0 else 'g', axis=1) + elif better_score == 'negative': + fim_compare['color'] = fim_compare.apply(lambda row: 'None' if row[f'{stat}_diff'] > 0 else 'g', axis=1) + fim_compare['edge_color'] = fim_compare.apply(lambda row: 'r' if row[f'{stat}_diff'] > 0 else 'g', axis=1) + + # Filter the plotting data to the selected category + data = fim_compare.loc[(category)] + num_subplots = len(data.nws_lid.unique()) + num_mags = len(data.magnitude.unique()) + # Create the plot + fig, ax = plt.subplots(num_subplots, 1, figsize=(8, len(data)*0.18), dpi=100, facecolor='white') + # Create a subplot for every site (nws_lid) + for i, site in enumerate(data.nws_lid.unique()): + subplot_data = data.loc[(site)] + ax[i].barh(y=np.arange(len(subplot_data)), width=f'{stat}_diff', left=f'BASE_{stat}', color='color', + edgecolor='edge_color', linewidth=0.5, data=subplot_data, zorder=3) + ax[i].set_yticks(np.arange(len(subplot_data)), labels=subplot_data.index.map(inverted_mag).astype(str)) + ax[i].set_xticks(np.linspace(0,1,11)) + ax[i].set_ylabel(site, rotation='horizontal', labelpad=40) + ax[i].set_xlim(0, 1) + ax[i].set_ylim(-0.5, num_mags-0.25) + ax[i].grid(axis='x', color='0.8', zorder=0) + ax[i].spines['bottom'].set_color('0' if i == num_subplots-1 else 'None') + ax[i].spines['top'].set_color('0' if i == 0 else '0.8') + ax[i].set_facecolor('w' if i % 2 == 0 else '0.9') + ax[i].tick_params(axis="x", + bottom=True if i == num_subplots-1 else False, + top=True if i == 0 else False, + labelbottom=True if i == num_subplots-1 else False, + labeltop=True if i == 0 else False) + # Label sites that have been identified as "Bad" + if site and site in BAD_SITES: + ax[i].text(0.1, 1.5, '--BAD SITE--', horizontalalignment='center', verticalalignment='center') + ax[i].set_facecolor('0.67') + plt.subplots_adjust(wspace=0, hspace=0) + ax[0].set_title(f'{category.upper()} {stat} Comparison\n{versions[0]} v {versions[1]}', loc='center', pad=45) + g_patch = Patch(color='g', linewidth=0.5, label=f'{stat} Score Improvement') + r_patch = Patch(facecolor='None', edgecolor='r', linewidth=0.5, label=f'{stat} Score Regression') + # Get the height of the figure in pixels so we can put the legend in a consistent position + ax_pixel_height = ax[0].get_window_extent().transformed(fig.dpi_scale_trans.inverted()).height + ax[0].legend(loc='center', ncol=2, handles=[g_patch, r_patch], fontsize=8, + bbox_to_anchor=(0.5, (0.4+ax_pixel_height)/ax_pixel_height)) + plt.savefig(outfig, bbox_inches='tight') + + return fim_compare + +def iter_benchmarks(metric_csv, workspace, versions=[], individual_plots=False, show_iqr=False, diff_stat=False): + + # Import metrics csv as DataFrame and initialize all_datasets dictionary + csv_df = pd.read_csv(metric_csv, dtype = {'huc':str}) + + # If versions are supplied then filter out + if versions: + #Filter out versions based on supplied version list + metrics = csv_df.query('version.str.startswith(tuple(@versions))', engine='python') + else: + metrics = csv_df + + # Group by benchmark source + benchmark_by_source = metrics.groupby(['benchmark_source', 'extent_config']) + for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: + + # Define and create the output workspace as a subfolder within the supplied workspace + output_workspace = Path(workspace) / benchmark_source / extent_configuration.lower() + output_workspace.mkdir(parents = True, exist_ok = True) + output_png = Path(output_workspace) / f"{benchmark_source}_{extent_configuration.lower()}_stackedbar{'_indiv'if individual_plots else ''}.png" + if diff_stat: + output_png = Path(output_workspace) / f"{benchmark_source}_{extent_configuration.lower()}_diff_plot_{diff_stat}.png" + diff_bar_plots(versions, metric_csv, category=benchmark_source, outfig=output_png, stat=diff_stat) + elif individual_plots: + output_png = Path(output_workspace) / f"{benchmark_source}_{extent_configuration.lower()}_stackedbar_indiv.png" + eval_plot_stack_indiv(metric_csv=metric_csv, versions=versions, category=benchmark_source, outfig=output_png) + else: + output_png = Path(output_workspace) / f"{benchmark_source}_{extent_configuration.lower()}_stackedbar.png" + eval_plot_stack(metric_csv=metric_csv, versions=versions, category=benchmark_source, show_iqr=show_iqr, outfig=output_png) + + +if __name__ == '__main__': + # Parse arguments + parser = argparse.ArgumentParser(description = f'Plot and aggregate statistics for benchmark datasets (BLE/AHPS libraries)') + parser.add_argument('-m','--metric_csv', help = 'Metrics csv created from synthesize test cases.', required = True) + parser.add_argument('-w', '--workspace', help = 'Output workspace', required = True) + parser.add_argument('-v', '--versions', help = 'List of versions to be plotted/aggregated. Versions are filtered using the "startswith" approach. For example, ["fim_","fb1"] would retain all versions that began with "fim_" (e.g. fim_1..., fim_2..., fim_3...) as well as any feature branch that began with "fb". An other example ["fim_3","fb"] would result in all fim_3 versions being plotted along with the fb.', nargs = '+', default = []) + parser.add_argument('-i', '--site-plots', help = 'If enabled individual barplots for each site are created.', action = 'store_true', required = False) + parser.add_argument('-iqr', '--show-iqr', help = 'If enabled, inter-quartile range error bars will be added.', action = 'store_true', required = False) + parser.add_argument('-d', '--diff-stat', help = 'This option creates diff plots instead of stacked bar plots. Only 2 versions can be used with this option. ' + \ + "Input one of the following statistics to be used for comparison: ('CSI', 'MCC', 'TPR', 'PND', 'FAR').", default = False, required = False) + + # Extract to dictionary and assign to variables + args = vars(parser.parse_args()) + + # Finalize Variables + m = args['metric_csv'] + w = args['workspace'] + v = args['versions'] + i = args['site_plots'] + iqr = args['show_iqr'] + d = args['diff_stat'] + + # Run eval_plots function + print('The following AHPS sites are considered "BAD_SITES": ' + ', '.join(BAD_SITES)) + iter_benchmarks(m, w, v, i, iqr, d) + diff --git a/tools/evaluate_continuity.py b/tools/evaluate_continuity.py new file mode 100644 index 000000000..b16c6b140 --- /dev/null +++ b/tools/evaluate_continuity.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 + +import geopandas as gpd +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt + +def evaluate_continuity(stream_network_file,forecast_file,stream_network_outfile=None, + confluences_only=False,plot_file=None): + + stream_network = gpd.read_file(stream_network_file) + forecast = pd.read_csv(forecast_file) + + stream_network = stream_network.merge(forecast,how='left',left_on='feature_id',right_on='feature_id') + + stream_network['discharge'] = stream_network['discharge'].fillna(0) + + + toNodes = set(stream_network['To_Node'].tolist()) + + # upstream dictionary and confluences + upstream_dictionary,confluences = {},set() + for idx,reach in stream_network.iterrows(): + + fromNode = reach['From_Node'] + hydroId= reach['HydroID'] + + if fromNode in toNodes: + upstream_indices = stream_network['To_Node'] == fromNode + upstream_discharges = np.array( stream_network.loc[upstream_indices,'discharge'].tolist() ) + upstream_dictionary[hydroId] = upstream_discharges + + isconfluence = len(upstream_discharges) > 1 + if isconfluence: + confluences.add(hydroId) + + # filter out non-confluences + if confluences_only: + hydroIDs = stream_network['HydroID'].tolist() + confluence_bool = np.array( [True if h in confluences else False for h in hydroIDs] ) + stream_network = stream_network.loc[confluence_bool,:] + + actual_discharges, expected_discharges = [], [] + expected_dischages_dict = dict() + for idx,reach in stream_network.iterrows(): + + hydroId = reach['HydroID'] + + try: + upstream_discharges = upstream_dictionary[hydroId] + except KeyError: + expected_dischages_dict[hydroId] = 0 + continue + + actual_discharges += [reach['discharge']] + expected_discharges += [np.sum(upstream_discharges)] + expected_dischages_dict[hydroId] = np.sum(upstream_discharges) + + + actual_discharges, expected_discharges = np.array(actual_discharges), np.array(expected_discharges) + + # add to stream_network + expected_discharges_df = pd.DataFrame.from_dict(expected_dischages_dict,orient='index', + columns=['expected_discharges']) + stream_network = stream_network.merge(expected_discharges_df, left_on='HydroID', right_index=True, how='left') + + number_of_reaches = len(stream_network) + SMAPE = smape(actual_discharges,expected_discharges) + diff = actual_discharges - expected_discharges + + print("Number of No Flow Reaches: {} out of {}".format((stream_network['discharge']==0).sum(),number_of_reaches)) + print("SMAPE = {}%".format(SMAPE[0])) + print("Diff (<0) = {}".format(np.sum(diff<0))) + print("Diff (>0) = {}".format(np.sum(diff>0))) + print("Diff (=0) = {}".format(np.sum(diff==0))) + print("Diff (>-10,<10) = {}".format(np.sum(np.logical_or(diff>-10,diff<10)))) + print("Median diff: {}".format(np.nanmedian(diff))) + print("Mean diff: {}".format(np.nanmean(diff))) + + if confluences_only: + nbins = 50 + xlim = (-2000,2000) + ylim = (0,50) + title = 'Discharge Errors (CMS) At Confluence Reaches' + else: + nbins = 500 + xlim = (-450,450) + ylim = (0,60) + title = 'Discharge Errors (CMS)' + + fig = plt.figure(1) + ax = plt.subplot(111) + n, bins, patches = ax.hist(diff, nbins, facecolor='blue', alpha=0.4) + try: + plt.xlim(xlim) + plt.ylim(ylim) + except UnboundLocalError: + pass + plt.title(title) + plt.xlabel('Discharge Errors (CMS) = actual - expected') + plt.ylabel('Count (Peak not show)') + + if plot_file is not None: + fig.savefig(plot_file) + + if stream_network_outfile is not None: + stream_network.to_file(stream_network_outfile,index=False,driver='GPKG') + + return(stream_network) + + +def smape(predicted,actual): + + assert len(predicted) == len(actual),"Predicted and actual need to have same length" + + sape = 100 * (np.abs(predicted-actual) / (np.abs(predicted) + np.abs(actual))) + + return (np.nanmean(sape),sape) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Evaluating continuity') + parser.add_argument('-s','--stream-network-file',help='Stream Network',required=True) + parser.add_argument('-f','--forecast-file',help='Forecast File',required=True) + parser.add_argument('-o','--stream-network-outfile',help='Stream Network Outfile',required=False,default=None) + parser.add_argument('-c','--confluences-only',help='Only at confluence reaches',required=False,default=False,action='store_true') + parser.add_argument('-p','--plot-file',help='Plot File',required=False,default=None) + + + + args = vars(parser.parse_args()) + + evaluate_continuity(**args) diff --git a/tools/fimr_to_benchmark.py b/tools/fimr_to_benchmark.py new file mode 100644 index 000000000..0d4184815 --- /dev/null +++ b/tools/fimr_to_benchmark.py @@ -0,0 +1,160 @@ + +import geopandas as gpd +import pandas as pd +import os +from tools_shared_functions import get_metadata +from dotenv import load_dotenv +load_dotenv() +import argparse +import rasterio +#from rasterio import features +import affine +import rasterio.mask + +def fimr_to_benchmark(fimr_path, output_path): + ''' + This function converts USFIMR shapefiles from this website (https://waterserv.ua.edu/datasets/usfimr/) to rasters that c an + be used for FIM Alpha evaluations and calibration. Note only USFIMR data with flow values (q) can be used here. + + Inputs: 1. FIMR Path - Path where th FIMR Shapefile was downloaded from the aformentioned website. + 2. Output Path_flow file - Path the flow file will be sent to. + 3. Output path raster - path the raster will be sent too. There will be a file for each huc with the huc number in it. + + Outputs: 1. flow_file.csv - This is a flow file for the river. The flow values are from the most upstream usgs gage. + 2. rasterized{huc number}.tif - This is the fimr data that has been converted to a raster. It is on the huc8 scale so it provides + each huc as a separate file. The inundated areas are assigned 1 and the dry areas are assigned 0. + + Other Files Used: 1. wbd - The huc 8 boundaries throughout the country + 2. flowlines - the flowlines along the all the rivers in the country + ''' + # input validation + if (not os.path.exists(fimr_path)): + raise Exception(f"Sorry. {fimr_path} does not exist") + + if (not os.path.exists(output_path)): + raise Exception(f"Sorry. {output_path} does not exist") + + API_BASE_URL = os.getenv("API_BASE_URL") + + # Path for the wbd data and nwm flowlines + wbd_path = '/data/inputs/wbd/WBD_National.gpkg' + flowlines_path = '/data/inputs/nwm_hydrofabric/nwm_flows_ms_wrds.gpkg' + + # Saving wbd, flowlines, and fimr data as variables + wbd = gpd.read_file(wbd_path, layer = 'WBDHU8') + flowlines = gpd.read_file(flowlines_path) + fimr = gpd.read_file(fimr_path) + + # Joining the flowlines and fimr to output only the flowlines in the inundated area + joined_flowlines_fimr = gpd.sjoin(flowlines, fimr) + + # Renaming index_right to prevent it from causing an error in the next spatial join + joined_flowlines_fimr = joined_flowlines_fimr.rename(columns = {'index_right':'right_index'}) + # Getting a list of all the unique feature Ids from the joined flowlines + unique_feature_ids = joined_flowlines_fimr.drop_duplicates('ID') + # Changing cfs to cms + unique_feature_ids['Q_at_Image_cms'] = unique_feature_ids['Q_at_Image']*0.028317 + # Make a discharge dataframe with only the feture ID and the Q values + feature_discharge = unique_feature_ids[['ID','Q_at_Image_cms']] + + #flow_file_path = os.path.join(output_path, 'flow_file.csv') + #feature_discharge.to_csv(flow_file_path, header = ['feature_id', 'discharge'],index = False) + #print('flow file created at ',flow_file_path) + + # Takes the huc 8s that overlap the flowlines for the river + huc_joined = gpd.sjoin(wbd, joined_flowlines_fimr) + huc_list = huc_joined.HUC8.unique() + # Overlay the hucs + fimr_hucs = wbd.loc[wbd.HUC8.isin(huc_list)] + fimr_huc_union = gpd.overlay(joined_flowlines_fimr, fimr_hucs, how = 'union') + fimr_huc_union = fimr_huc_union.rename(columns = {'Shape_Length':'Shapelength'}) + # Renames Shape_Length as Shapelength to eliminate a duplicate column and let it be exported + huc_joined = huc_joined.rename(columns = {'Shape_Length':'Shapelength'}) + + # Takes the usgs location ids + gages = fimr['USGS_Gage'] + split_gages = gages.iloc[0].split( ", ") + # Takes the first usgs gauge idea that has the streamflow data for the whole fimr area + first_gage = (split_gages[0]) + + # Get metadata for all usgs_site_codes that are active in the U.S. + metadata_url = f'{API_BASE_URL}/metadata' + # Define arguments to retrieve metadata and then get metadata from WRDS + select_by = 'usgs_site_code' + selector = [first_gage] #change to usgs location id from fimr dataframe + must_include = 'usgs_data.active' + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = None ) + + # Takes the stream order at the gauge + stream_order = metadata_df['nwm_feature_data_stream_order'] + stream_order_float = stream_order[0] + + # Gets flow lines where the streasm order is equal to the magnitude of the mainstem + correct_flow = joined_flowlines_fimr.loc[joined_flowlines_fimr['order_'] == stream_order_float] + + # Exploding the fimr inundaton polygon + exploded_fimr = fimr.explode() + # Taking only the fimr polygons that overlap the flowlines to minimize noise + clean_fimr = gpd.sjoin(exploded_fimr, correct_flow) + # Renaming columns to prevent duplicate column names in overlay + clean_fimr = clean_fimr.rename(columns = {'index_right':'index_from_right'}) + + # Overlaying the cleaned fimr data and the hucs it is in + fimr_huc_union = gpd.overlay(clean_fimr, fimr_hucs, how = 'union') + fimr_huc_union = fimr_huc_union.rename(columns = {'Shape_Length':'Shapelength'}) + # Renames Shape_Length as Shapelength to eliminate a duplicate column and let it be exported + huc_joined = huc_joined.rename(columns = {'Shape_Length':'Shapelength'}) + + # Looping through the three huc8s, getting the fimr data in each + for huc in huc_list: + + fimr_huc = fimr_huc_union.loc[fimr_huc_union.HUC8 == huc] + shapes = [] + + for index,row in fimr_huc.iterrows(): + + if pd.isna(row.right_index): + value = 0 + else: + value = 1 + + shapes.append((row.geometry, value)) + + #Outputting Flow File + + flow_file_path = os.path.join(output_path, 'fimr_huc_' + huc + '_flows.csv') + feature_discharge.to_csv(flow_file_path, header = ['feature_id', 'discharge'],index = False) + print('flow file created at',flow_file_path) + # output path for the rasters, float causes it to keep all the digits + out_path_raster = os.path.join(output_path,'fimr_huc_' + huc + '_extent.tif') + + #Rasterizing + + minx, miny, maxx, maxy = fimr_huc.geometry.total_bounds + transform = affine.Affine(10, 0, minx, 0, -10, maxy) + + # Makeing the empty raster + raster_shape = (int((maxy - miny)//10), int((maxx - minx)//10)) + raster = rasterio.features.rasterize(shapes, + out_shape = raster_shape, + transform = transform, + fill = -999) + + with rasterio.open(out_path_raster, 'w', driver = 'GTiff', height = raster_shape[0], width = raster_shape[1], count = 1, dtype = raster.dtype, nodata = -999, + crs = '+proj=aea +lat_0=23 +lon_0=-96 +lat_1=29.5 +lat_2=45.5 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs',transform = transform) as dst_dataset: + # writing array into the empty raster + dst_dataset.write_band(1, raster) + print('Rasterized fimr file created at ', out_path_raster) + + # Parse Arguments +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description = 'Get FIMR data for the different rivers from the different files') + + parser.add_argument('-f','--fimr_path', type = str, help = 'the path for the shapefile inputed from the FIMR dastabase', required = True) + parser.add_argument('-o','--output_path', type = str, help = 'output path for the flow file and FIMR rasters.', required = True) + args = vars(parser.parse_args()) + + fimr_to_benchmark(**args) + + diff --git a/tools/find_max_catchment_breadth.py b/tools/find_max_catchment_breadth.py new file mode 100755 index 000000000..e8da34aa8 --- /dev/null +++ b/tools/find_max_catchment_breadth.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +import os +import pandas as pd +import geopandas as gpd +from glob import glob,iglob +import argparse +from shapely.geometry import Point + + +def Find_max_catchment_breadth(hydrofabric_dir): + + catchments_fileNames = glob_file_paths_for_catchments(hydrofabric_dir) + + list_with_max_lengths_within_files = [] + + for idx,cfn in enumerate(catchments_fileNames): + + catchments = gpd.read_file(cfn) + + lengths = catchments.apply(get_length_and_width_of_poly,axis=1) + + max_length_within_file = lengths.median() + + list_with_max_lengths_within_files.append(max_length_within_file) + + print(idx,max_length_within_file) + + + print(max(list_with_max_lengths_within_files)) + + +def glob_file_paths_for_catchments(hydrofabric_dir): + + file_pattern_to_glob = os.path.join(hydrofabric_dir,'**','gw_catchments_reaches_filtered_addedAttributes_crosswalked*.gpkg') + + catchments_fileNames = iglob(file_pattern_to_glob) + + return(catchments_fileNames) + + +def get_length_and_width_of_poly(geodataframe_row): + + poly = geodataframe_row['geometry'] + + # get minimum bounding box around polygon + box = poly.minimum_rotated_rectangle + + # get coordinates of polygon vertices + x, y = box.exterior.coords.xy + + # get length of bounding box edges + edge_length = (Point(x[0], y[0]).distance(Point(x[1], y[1])), Point(x[1], y[1]).distance(Point(x[2], y[2]))) + + # get length of polygon as the longest edge of the bounding box + length = max(edge_length) + + # get width of polygon as the shortest edge of the bounding box + #width = min(edge_length) + + return(length) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Find the maximum breadth of catchments in given hydrofabric directory') + parser.add_argument('-y','--hydrofabric-dir',help='Path to hydrofabric directory',required=True) + + args = vars(parser.parse_args()) + + Find_max_catchment_breadth(**args) + diff --git a/tools/find_test_case_folders.py b/tools/find_test_case_folders.py new file mode 100644 index 000000000..c71d397e4 --- /dev/null +++ b/tools/find_test_case_folders.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +import os +import argparse +import re + +def create_huc_list(input_root_search_folder, + output_path, + overwrite=False): + + ''' + Summary: This scans an input directory, such as test_cases, and looks for each + unique huc value through those folders. This gives us a single list of all hucs + that can be used for alpha or sierra tests. + This looks for first level folders using the following convention: + - *_test_cases (ie usgs_test_cases) + Then subfolders using the following convention: + - {8 numbers}_{3 to 7 characters}. ie) 01080005_usgs + Input: + - input_root_search_folder: A fully qualified root path (relative to Docker pathing) to the folder that all hucs + are in. + - output_path: a path and file name (preferred as .lst or .txt) where the list of line delimited hucs + will be copied. The file and path do not need to pre-exist. + - overwrite: If the file exists and the overwrite flag is false, an error will be issued. Else, it will + be fully overwritten. + Output: + - a line delimited list of all huc numbers that have test cases available. + ''' + + if (not os.path.exists(input_root_search_folder)): + raise NotADirectoryError(f"Sorry. Search_folder of {input_root_search_folder} does not exist") + + if (os.path.exists(output_path) and not overwrite): + raise Exception(f"Sorry. The file {output_path} already exists. Use 'overwrite' argument if desired.") + + hucs = set() + + for test_case in [test_case for test_case in os.listdir(input_root_search_folder) + if re.match('.*_test_cases', test_case)]: + for test_id in [test_id for test_id in os.listdir(os.path.join(input_root_search_folder, test_case)) + if re.match('\d{8}_\w{3,7}', test_id)]: + hucs.add(test_id[:8]) + + sorted_hucs = sorted(hucs) + #print(sorted_hucs) + print(f"{str(len(sorted_hucs))} hucs found") + + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # Save to disk + with (open(output_path, 'w')) as output_file: + #output_file.writelines(sorted_hucs) + for huc in sorted_hucs: + output_file.write(huc) + output_file.write('\n') + + print(f"huc list saved at {output_path}") + + +if __name__ == '__main__': + +# Sample Usage: python /foss_fim/tools/find_test_case_hucs.py -d /data/test_cases/ -f /some_directory/huc_list_for_tests_22020420.lst -o + + parser = argparse.ArgumentParser(description='Finds all unique hucs that have test case data.') + parser.add_argument('-d','--input_root_search_folder', + help='Root folder to be scanned for unique hucs that have test case', + required=True) + parser.add_argument('-f','--output_path', + help='Folder path and file name to be saved (.txt or .lst suggested).', + required=True) + parser.add_argument('-o','--overwrite', + help='Overwrite the file if already existing? (default false)', + action='store_true' ) + + args = vars(parser.parse_args()) + + create_huc_list(**args) diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py index bcd6fc14d..53f0d5968 100755 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -1,15 +1,168 @@ #!/usr/bin/env python3 import os -import subprocess import argparse +import csv +import traceback +import sys +from datetime import datetime import time from pathlib import Path import geopandas as gpd import pandas as pd +import rasterio +from rasterio.warp import calculate_default_transform, reproject, Resampling +import glob +from generate_categorical_fim_flows import generate_catfim_flows +from generate_categorical_fim_mapping import manage_catfim_mapping, post_process_cat_fim_for_viz +from tools_shared_functions import get_thresholds, get_nwm_segs, get_datum, ngvd_to_navd_ft, filter_nwm_segments_by_stream_order +from concurrent.futures import ProcessPoolExecutor, as_completed, wait +import numpy as np +from utils.shared_variables import VIZ_PROJECTION +from tools_shared_variables import (acceptable_coord_acc_code_list, + acceptable_coord_method_code_list, + acceptable_alt_acc_thresh, + acceptable_alt_meth_code_list, + acceptable_site_type_list) -def update_mapping_status(output_mapping_dir, output_flows_dir): +def process_generate_categorical_fim(fim_run_dir, job_number_huc, job_number_inundate, + stage_based, output_folder, overwrite, search, + lid_to_run, job_number_intervals, past_major_interval_cap): + + print("================================") + print("Start generate categorical fim") + overall_start_time = datetime.now() + dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + print (f"started: {dt_string}") + print() + + # Check job numbers and raise error if necessary + total_cpus_requested = job_number_huc * job_number_inundate * job_number_intervals + total_cpus_available = os.cpu_count() - 1 + if total_cpus_requested > total_cpus_available: + raise ValueError('The HUC job number, {}, multiplied by the inundate job number, {}, '\ + 'exceeds your machine\'s available CPU count minus one. '\ + 'Please lower the job_number_huc or job_number_inundate '\ + 'values accordingly.'.format(job_number_huc, job_number_inundate) ) + + # Define default arguments. Modify these if necessary + fim_version = os.path.split(fim_run_dir)[1] + + # Append option configuration (flow_based or stage_based) to output folder name. + if stage_based: + file_handle_appendage, catfim_method = "_stage_based", "STAGE-BASED" + else: + file_handle_appendage, catfim_method = "_flow_based", "FLOW-BASED" + + # Define output directories + output_catfim_dir_parent = output_folder + file_handle_appendage + output_flows_dir = os.path.join(output_catfim_dir_parent, 'flows') + output_mapping_dir = os.path.join(output_catfim_dir_parent, 'mapping') + attributes_dir = os.path.join(output_catfim_dir_parent, 'attributes') + + # Create output directories + if not os.path.exists(output_catfim_dir_parent): os.mkdir(output_catfim_dir_parent) + if not os.path.exists(output_flows_dir): os.mkdir(output_flows_dir) + if not os.path.exists(output_mapping_dir): os.mkdir(output_mapping_dir) + if not os.path.exists(attributes_dir): os.mkdir(attributes_dir) + + # Define upstream and downstream search in miles + nwm_us_search, nwm_ds_search = search, search + fim_dir = fim_version + + # Set up logging + log_dir = os.path.join(output_catfim_dir_parent, 'logs') + log_file = os.path.join(log_dir, 'errors.log') + + # STAGE-BASED + if stage_based: + # Generate Stage-Based CatFIM mapping + nws_sites_layer = generate_stage_based_categorical_fim(output_mapping_dir, fim_version, fim_run_dir, + nwm_us_search, nwm_ds_search, job_number_inundate, + lid_to_run, attributes_dir, job_number_intervals, + past_major_interval_cap, job_number_huc) + + job_number_tif = job_number_inundate * job_number_intervals + print("Post-processing TIFs...") + post_process_cat_fim_for_viz(job_number_huc, job_number_tif, output_mapping_dir, attributes_dir, log_file=log_file, fim_version=fim_version) + + # Updating mapping status + print('Updating mapping status...') + update_mapping_status(str(output_mapping_dir), str(output_flows_dir), nws_sites_layer, stage_based) + + # FLOW-BASED + else: + fim_dir = "" + print('Creating flow files using the ' + catfim_method + ' technique...') + start = time.time() + nws_sites_layer = generate_catfim_flows(output_flows_dir, nwm_us_search, nwm_ds_search, stage_based, fim_dir, lid_to_run, attributes_dir, job_number_huc) + end = time.time() + elapsed_time = (end-start)/60 + + print(f'Finished creating flow files in {elapsed_time} minutes') + # Generate CatFIM mapping + print('Begin mapping') + start = time.time() + manage_catfim_mapping(fim_run_dir, output_flows_dir, output_mapping_dir, attributes_dir, job_number_huc, job_number_inundate, overwrite, depthtif=False) + end = time.time() + elapsed_time = (end-start)/60 + print(f'Finished mapping in {elapsed_time} minutes') + + # Updating mapping status + print('Updating mapping status') + update_mapping_status(str(output_mapping_dir), str(output_flows_dir), nws_sites_layer, stage_based) + + # Create CSV versions of the final geopackages. + print('Creating CSVs. This may take several minutes.') + reformatted_catfim_method = catfim_method.lower().replace('-', '_') + create_csvs(output_mapping_dir, reformatted_catfim_method) + + print("================================") + print("End generate categorical fim") + + end_time = datetime.now() + dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + print (f"ended: {dt_string}") + + # calculate duration + time_duration = end_time - overall_start_time + print(f"Duration: {str(time_duration).split('.')[0]}") + print() + +def create_csvs(output_mapping_dir, reformatted_catfim_method): + ''' + Produces CSV versions of desired geopackage in the output_mapping_dir. + + Parameters + ---------- + output_mapping_dir : STR + Path to the output directory of all inundation maps. + reformatted_catfim_method : STR + Text to append to CSV to communicate the type of CatFIM. + + Returns + ------- + None. + + ''' + + # Convert any geopackage in the root level of output_mapping_dir to CSV and rename. + gpkg_list = glob.glob(os.path.join(output_mapping_dir, '*.gpkg')) + for gpkg in gpkg_list: + print(f"Creating CSV for {gpkg}") + gdf = gpd.read_file(gpkg) + parent_directory = os.path.split(gpkg)[0] + if 'catfim_library' in gpkg: + file_name = reformatted_catfim_method + '_catfim.csv' + if 'nws_lid_sites' in gpkg: + file_name = reformatted_catfim_method + '_catfim_sites.csv' + + csv_output_path = os.path.join(parent_directory, file_name) + gdf.to_csv(csv_output_path) + + +def update_mapping_status(output_mapping_dir, output_flows_dir, nws_sites_layer, stage_based): ''' Updates the status for nws_lids from the flows subdirectory. Status is updated for sites where the inundation.py routine was not able to @@ -38,73 +191,593 @@ def update_mapping_status(output_mapping_dir, output_flows_dir): mapping_df['did_it_map'] = 'no' mapping_df['map_status'] = ' and all categories failed to map' - # Import shapefile output from flows creation - shapefile = Path(output_flows_dir)/'nws_lid_flows_sites.shp' - flows_df = gpd.read_file(shapefile) + # Import geopackage output from flows creation + flows_df = gpd.read_file(nws_sites_layer) + + try: + # Join failed sites to flows df + flows_df = flows_df.merge(mapping_df, how = 'left', on = 'nws_lid') + + # Switch mapped column to no for failed sites and update status + flows_df.loc[flows_df['did_it_map'] == 'no', 'mapped'] = 'no' + flows_df.loc[flows_df['did_it_map']=='no','status'] = flows_df['status'] + flows_df['map_status'] + + # # Perform pass for HUCs where mapping was skipped due to missing data #TODO check with Brian + # if stage_based: + # missing_mapping_hucs = + # else: + # flows_hucs = [i.stem for i in Path(output_flows_dir).iterdir() if i.is_dir()] + # mapping_hucs = [i.stem for i in Path(output_mapping_dir).iterdir() if i.is_dir()] + # missing_mapping_hucs = list(set(flows_hucs) - set(mapping_hucs)) + # + # # Update status for nws_lid in missing hucs and change mapped attribute to 'no' + # flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'status'] = flows_df['status'] + ' and all categories failed to map because missing HUC information' + # flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'mapped'] = 'no' + + # Clean up GeoDataFrame and rename columns for consistency + flows_df = flows_df.drop(columns = ['did_it_map','map_status']) + flows_df = flows_df.rename(columns = {'nws_lid':'ahps_lid'}) + + # Write out to file + flows_df.to_file(nws_sites_layer) + except: + print("No LIDs") - # Join failed sites to flows df - flows_df = flows_df.merge(mapping_df, how = 'left', on = 'nws_lid') +def produce_inundation_map_with_stage_and_feature_ids(rem_path, catchments_path, hydroid_list, hand_stage, lid_directory, category, huc, lid, branch): + + # Open rem_path and catchment_path using rasterio. + rem_src = rasterio.open(rem_path) + catchments_src = rasterio.open(catchments_path) + rem_array = rem_src.read(1) + catchments_array = catchments_src.read(1) + + # Use numpy.where operation to reclassify rem_path on the condition that the pixel values are <= to hand_stage and the catchments + # value is in the hydroid_list. + reclass_rem_array = np.where((rem_array<=hand_stage) & (rem_array != rem_src.nodata), 1, 0).astype('uint8') + hydroid_mask = np.isin(catchments_array, hydroid_list) + target_catchments_array = np.where((hydroid_mask == True) & (catchments_array != catchments_src.nodata), 1, 0).astype('uint8') + masked_reclass_rem_array = np.where((reclass_rem_array == 1) & (target_catchments_array == 1), 1, 0).astype('uint8') + + # Save resulting array to new tif with appropriate name. brdc1_record_extent_18060005.tif + is_all_zero = np.all((masked_reclass_rem_array == 0)) + + if not is_all_zero: + output_tif = os.path.join(lid_directory, lid + '_' + category + '_extent_' + huc + '_' + branch + '.tif') + with rasterio.Env(): + profile = rem_src.profile + profile.update(dtype=rasterio.uint8) + profile.update(nodata=10) + + with rasterio.open(output_tif, 'w', **profile) as dst: + dst.write(masked_reclass_rem_array, 1) + +def mark_complete(site_directory): + marker_file = Path(site_directory) / 'complete.txt' + marker_file.touch() + return + +def iterate_through_huc_stage_based(workspace, huc, fim_dir, huc_dictionary, threshold_url, flood_categories, all_lists, past_major_interval_cap, number_of_jobs, number_of_interval_jobs, attributes_dir, nwm_flows_df): + missing_huc_files = [] + all_messages = [] + stage_based_att_dict = {} + + print(f'Iterating through {huc}...') + # Make output directory for huc. + huc_directory = os.path.join(workspace, huc) + if not os.path.exists(huc_directory): + os.mkdir(huc_directory) + + # Define paths to necessary HAND and HAND-related files. + usgs_elev_table = os.path.join(fim_dir, huc, 'usgs_elev_table.csv') + branch_dir = os.path.join(fim_dir, huc, 'branches') + + # Loop through each lid in nws_lids list + nws_lids = huc_dictionary[huc] + for lid in nws_lids: + lid = lid.lower() # Convert lid to lower case + # -- If necessary files exist, continue -- # + if not os.path.exists(usgs_elev_table): + all_messages.append([f'{lid}:usgs_elev_table missing, likely unacceptable gage datum error--more details to come in future release']) + continue + if not os.path.exists(branch_dir): + all_messages.append([f'{lid}:branch directory missing']) + continue + usgs_elev_df = pd.read_csv(usgs_elev_table) + # Make lid_directory. + + lid_directory = os.path.join(huc_directory, lid) + if not os.path.exists(lid_directory): + os.mkdir(lid_directory) + else: + complete_marker = os.path.join(lid_directory, 'complete.txt') + if os.path.exists(complete_marker): + all_messages.append([f"{lid}: already completed in previous run."]) + continue + # Get stages and flows for each threshold from the WRDS API. Priority given to USGS calculated flows. + stages, flows = get_thresholds(threshold_url = threshold_url, select_by = 'nws_lid', selector = lid, threshold = 'all') + + if stages == None: + all_messages.append([f'{lid}:error getting thresholds from WRDS API']) + continue + # Check if stages are supplied, if not write message and exit. + if all(stages.get(category, None)==None for category in flood_categories): + all_messages.append([f'{lid}:missing threshold stages']) + continue - # Switch mapped column to no for failed sites and update status - flows_df.loc[flows_df['did_it_map'] == 'no', 'mapped'] = 'no' - flows_df.loc[flows_df['did_it_map']=='no','status'] = flows_df['status'] + flows_df['map_status'] + try: + # Drop columns that offend acceptance criteria + usgs_elev_df['acceptable_codes'] = (usgs_elev_df['usgs_data_coord_accuracy_code'].isin(acceptable_coord_acc_code_list) + & usgs_elev_df['usgs_data_coord_method_code'].isin(acceptable_coord_method_code_list) + & usgs_elev_df['usgs_data_alt_method_code'].isin(acceptable_alt_meth_code_list) + & usgs_elev_df['usgs_data_site_type'].isin(acceptable_site_type_list)) + + usgs_elev_df = usgs_elev_df.astype({'usgs_data_alt_accuracy_code': float}) + usgs_elev_df['acceptable_alt_error'] = np.where(usgs_elev_df['usgs_data_alt_accuracy_code'] <= acceptable_alt_acc_thresh, True, False) + + acceptable_usgs_elev_df = usgs_elev_df[(usgs_elev_df['acceptable_codes'] == True) & (usgs_elev_df['acceptable_alt_error'] == True)] + except Exception as e: + # Not sure any of the sites actually have those USGS-related + # columns in this particular file, so just assume it's fine to use - # Perform pass for HUCs where mapping was skipped due to missing data #TODO check with Brian - flows_hucs = [i.stem for i in Path(output_flows_dir).iterdir() if i.is_dir()] - mapping_hucs = [i.stem for i in Path(output_mapping_dir).iterdir() if i.is_dir()] - missing_mapping_hucs = list(set(flows_hucs) - set(mapping_hucs)) + #print("(Various columns related to USGS probably not in this csv)") + acceptable_usgs_elev_df = usgs_elev_df + + # Get the dem_adj_elevation value from usgs_elev_table.csv. Prioritize the value that is not from branch 0. + try: + matching_rows = acceptable_usgs_elev_df.loc[acceptable_usgs_elev_df['nws_lid'] == lid.upper(), 'dem_adj_elevation'] + + if len(matching_rows) == 2: # It means there are two level paths, use the one that is not 0 + lid_usgs_elev = acceptable_usgs_elev_df.loc[(acceptable_usgs_elev_df['nws_lid'] == lid.upper()) & ('levpa_id' != 0), 'dem_adj_elevation'].values[0] + else: + lid_usgs_elev = acceptable_usgs_elev_df.loc[acceptable_usgs_elev_df['nws_lid'] == lid.upper(), 'dem_adj_elevation'].values[0] + except IndexError: # Occurs when LID is missing from table + all_messages.append([f'{lid}:likely unacceptable gage datum error or accuracy code(s); please see acceptance criteria']) + continue + # Initialize nested dict for lid attributes + stage_based_att_dict.update({lid:{}}) + + # Find lid metadata from master list of metadata dictionaries. + metadata = next((item for item in all_lists if item['identifiers']['nws_lid'] == lid.upper()), False) + lid_altitude = metadata['usgs_data']['altitude'] + + # Filter out sites that don't have "good" data + try: + if not metadata['usgs_data']['coord_accuracy_code'] in \ + acceptable_coord_acc_code_list: + print(f"\t{lid}: {metadata['usgs_data']['coord_accuracy_code']} Not in acceptable coord acc codes") + continue + if not metadata['usgs_data']['coord_method_code'] in \ + acceptable_coord_method_code_list: + print(f"\t{lid}: Not in acceptable coord method codes") + continue + if not metadata['usgs_data']['alt_method_code'] in \ + acceptable_alt_meth_code_list: + print(f"\t{lid}: Not in acceptable alt method codes") + continue + if not metadata['usgs_data']['site_type'] in \ + acceptable_site_type_list: + print(f"\t{lid}: Not in acceptable site type codes") + continue + if not float(metadata['usgs_data']['alt_accuracy_code']) <= \ + acceptable_alt_acc_thresh: + print(f"\t{lid}: Not in acceptable threshold range") + continue + except Exception as e: + print(e) + continue + + ### --- Do Datum Offset --- ### + #determine source of interpolated threshold flows, this will be the rating curve that will be used. + rating_curve_source = flows.get('source') + if rating_curve_source is None: + all_messages.append([f'{lid}:No source for rating curve']) + continue + # Get the datum and adjust to NAVD if necessary. + nws_datum_info, usgs_datum_info = get_datum(metadata) + if rating_curve_source == 'USGS Rating Depot': + datum_data = usgs_datum_info + elif rating_curve_source == 'NRLDB': + datum_data = nws_datum_info + + # If datum not supplied, skip to new site + datum = datum_data.get('datum', None) + if datum is None: + all_messages.append([f'{lid}:datum info unavailable']) + continue + # _________________________________________________________________________________________________________# + # SPECIAL CASE: Workaround for "bmbp1" where the only valid datum is from NRLDB (USGS datum is null). + # Modifying rating curve source will influence the rating curve and datum retrieved for benchmark determinations. + if lid == 'bmbp1': + rating_curve_source = 'NRLDB' + # ___________________________________________________________________# + + # SPECIAL CASE: Custom workaround these sites have faulty crs from WRDS. CRS needed for NGVD29 conversion to NAVD88 + # USGS info indicates NAD83 for site: bgwn7, fatw3, mnvn4, nhpp1, pinn4, rgln4, rssk1, sign4, smfn7, stkn4, wlln7 + # Assumed to be NAD83 (no info from USGS or NWS data): dlrt2, eagi1, eppt2, jffw3, ldot2, rgdt2 + if lid in ['bgwn7', 'dlrt2','eagi1','eppt2','fatw3','jffw3','ldot2','mnvn4','nhpp1','pinn4','rgdt2','rgln4','rssk1','sign4','smfn7','stkn4','wlln7' ]: + datum_data.update(crs = 'NAD83') + # ___________________________________________________________________# + + # SPECIAL CASE: Workaround for bmbp1; CRS supplied by NRLDB is mis-assigned (NAD29) and is actually NAD27. + # This was verified by converting USGS coordinates (in NAD83) for bmbp1 to NAD27 and it matches NRLDB coordinates. + if lid == 'bmbp1': + datum_data.update(crs = 'NAD27') + # ___________________________________________________________________# + + # SPECIAL CASE: Custom workaround these sites have poorly defined vcs from WRDS. VCS needed to ensure datum reported in NAVD88. + # If NGVD29 it is converted to NAVD88. + # bgwn7, eagi1 vertical datum unknown, assume navd88 + # fatw3 USGS data indicates vcs is NAVD88 (USGS and NWS info agree on datum value). + # wlln7 USGS data indicates vcs is NGVD29 (USGS and NWS info agree on datum value). + if lid in ['bgwn7','eagi1','fatw3']: + datum_data.update(vcs = 'NAVD88') + elif lid == 'wlln7': + datum_data.update(vcs = 'NGVD29') + # _________________________________________________________________________________________________________# + + # Adjust datum to NAVD88 if needed + # Default datum_adj_ft to 0.0 + datum_adj_ft = 0.0 + crs = datum_data.get('crs') + if datum_data.get('vcs') in ['NGVD29', 'NGVD 1929', 'NGVD,1929', 'NGVD OF 1929', 'NGVD']: + # Get the datum adjustment to convert NGVD to NAVD. Sites not in contiguous US are previously removed otherwise the region needs changed. + try: + datum_adj_ft = ngvd_to_navd_ft(datum_info = datum_data, region = 'contiguous') + except Exception as e: + e = str(e) + if crs == None: + all_messages.append([f'{lid}:NOAA VDatum adjustment error, CRS is missing']) + if 'HTTPSConnectionPool' in e: + time.sleep(10) # Maybe the API needs a break, so wait 10 seconds + try: + datum_adj_ft = ngvd_to_navd_ft(datum_info = datum_data, region = 'contiguous') + except Exception: + all_messages.append([f'{lid}:NOAA VDatum adjustment error, possible API issue']) + if 'Invalid projection' in e: + all_messages.append([f'{lid}:NOAA VDatum adjustment error, invalid projection: crs={crs}']) + continue + + ### -- Concluded Datum Offset --- ### + # Get mainstem segments of LID by intersecting LID segments with known mainstem segments. + unfiltered_segments = list(set(get_nwm_segs(metadata))) + + # Filter segments to be of like stream order. + desired_order = metadata['nwm_feature_data']['stream_order'] + segments = filter_nwm_segments_by_stream_order(unfiltered_segments, desired_order, nwm_flows_df) + action_stage = stages['action'] + minor_stage = stages['minor'] + moderate_stage = stages['moderate'] + major_stage = stages['major'] + stage_list = [i for i in [action_stage, minor_stage, moderate_stage, major_stage] if i is not None] + # Create a list of stages, incrementing by 1 ft. + if stage_list == []: + all_messages.append([f'{lid}:no stage values available']) + continue + interval_list = np.arange(min(stage_list), max(stage_list) + past_major_interval_cap, 1.0) # Go an extra 10 ft beyond the max stage, arbitrary + # For each flood category + for category in flood_categories: + + # Pull stage value and confirm it's valid, then process + stage = stages[category] + + if stage != None and datum_adj_ft != None and lid_altitude != None: + + # Call function to execute mapping of the TIFs. + messages, hand_stage, datum_adj_wse, datum_adj_wse_m = produce_stage_based_catfim_tifs(stage, datum_adj_ft, branch_dir, lid_usgs_elev, lid_altitude, fim_dir, segments, lid, huc, lid_directory, category, number_of_jobs) + all_messages += messages + + # Extra metadata for alternative CatFIM technique. TODO Revisit because branches complicate things + stage_based_att_dict[lid].update({category: {'datum_adj_wse_ft': datum_adj_wse, + 'datum_adj_wse_m': datum_adj_wse_m, + 'hand_stage': hand_stage, + 'datum_adj_ft': datum_adj_ft, + 'lid_alt_ft': lid_altitude, + 'lid_alt_m': lid_altitude*0.3048}}) + # If missing HUC file data, write message + if huc in missing_huc_files: + all_messages.append([f'{lid}:missing some HUC data']) + + # Now that the "official" category maps are made, produce the incremental maps. + with ProcessPoolExecutor(max_workers=number_of_interval_jobs) as executor: + try: + for interval_stage in interval_list: + # Determine category the stage value belongs with. + if action_stage <= interval_stage < minor_stage: + category = 'action_' + str(interval_stage).replace('.', 'p') + 'ft' + if minor_stage <= interval_stage < moderate_stage: + category = 'minor_' + str(interval_stage).replace('.', 'p') + 'ft' + if moderate_stage <= interval_stage < major_stage: + category = 'moderate_' + str(interval_stage).replace('.', 'p') + 'ft' + if interval_stage >= major_stage: + category = 'major_' + str(interval_stage).replace('.', 'p') + 'ft' + executor.submit(produce_stage_based_catfim_tifs, interval_stage, datum_adj_ft, branch_dir, lid_usgs_elev, lid_altitude, fim_dir, segments, lid, huc, lid_directory, category, number_of_jobs) + except TypeError: # sometimes the thresholds are Nonetypes + pass + + # Create a csv with same information as geopackage but with each threshold as new record. + # Probably a less verbose way. + csv_df = pd.DataFrame() + for threshold in flood_categories: + try: + line_df = pd.DataFrame({'nws_lid': [lid], + 'name':metadata['nws_data']['name'], + 'WFO': metadata['nws_data']['wfo'], + 'rfc':metadata['nws_data']['rfc'], + 'huc':[huc], + 'state':metadata['nws_data']['state'], + 'county':metadata['nws_data']['county'], + 'magnitude': threshold, + 'q':flows[threshold], + 'q_uni':flows['units'], + 'q_src':flows['source'], + 'stage':stages[threshold], + 'stage_uni':stages['units'], + 's_src':stages['source'], + 'wrds_time':stages['wrds_timestamp'], + 'nrldb_time':metadata['nrldb_timestamp'], + 'nwis_time':metadata['nwis_timestamp'], + 'lat':[float(metadata['nws_preferred']['latitude'])], + 'lon':[float(metadata['nws_preferred']['longitude'])], + 'dtm_adj_ft': stage_based_att_dict[lid][threshold]['datum_adj_ft'], + 'dadj_w_ft': stage_based_att_dict[lid][threshold]['datum_adj_wse_ft'], + 'dadj_w_m': stage_based_att_dict[lid][threshold]['datum_adj_wse_m'], + 'lid_alt_ft': stage_based_att_dict[lid][threshold]['lid_alt_ft'], + 'lid_alt_m': stage_based_att_dict[lid][threshold]['lid_alt_m']}) + csv_df = csv_df.append(line_df) + + except Exception as e: + print(e) + + # Round flow and stage columns to 2 decimal places. + csv_df = csv_df.round({'q':2,'stage':2}) + # If a site folder exists (ie a flow file was written) save files containing site attributes. + output_dir = os.path.join(workspace, huc, lid) + if os.path.exists(output_dir): + # Export DataFrame to csv containing attributes + csv_df.to_csv(os.path.join(attributes_dir, f'{lid}_attributes.csv'), index = False) + else: + all_messages.append([f'{lid}:missing all calculated flows']) + + # If it made it to this point (i.e. no continues), there were no major preventers of mapping + all_messages.append([f'{lid}:OK']) + mark_complete(output_dir) + # Write all_messages by HUC to be scraped later. + messages_dir = os.path.join(workspace, 'messages') + if not os.path.exists(messages_dir): + os.mkdir(messages_dir) + huc_messages_csv = os.path.join(messages_dir, huc + '_messages.csv') + with open(huc_messages_csv, 'w') as output_csv: + writer = csv.writer(output_csv) + writer.writerows(all_messages) + + +def generate_stage_based_categorical_fim(workspace, fim_version, fim_dir, nwm_us_search, nwm_ds_search, number_of_jobs, lid_to_run, attributes_dir, number_of_interval_jobs, past_major_interval_cap, job_number_huc): - # Update status for nws_lid in missing hucs and change mapped attribute to 'no' - flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'status'] = flows_df['status'] + ' and all categories failed to map because missing HUC information' - flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'mapped'] = 'no' + flood_categories = ['action', 'minor', 'moderate', 'major', 'record'] - # Clean up GeoDataFrame and rename columns for consistency - flows_df = flows_df.drop(columns = ['did_it_map','map_status']) - flows_df = flows_df.rename(columns = {'nws_lid':'ahps_lid'}) + huc_dictionary, out_gdf, metadata_url, threshold_url, all_lists, nwm_flows_df = generate_catfim_flows(workspace, nwm_us_search, nwm_ds_search, stage_based=True, fim_dir=fim_dir, lid_to_run=lid_to_run) + + with ProcessPoolExecutor(max_workers=job_number_huc) as executor: + for huc in huc_dictionary: + executor.submit(iterate_through_huc_stage_based, workspace, huc, + fim_dir, huc_dictionary, threshold_url, flood_categories, + all_lists, past_major_interval_cap, number_of_jobs, + number_of_interval_jobs, attributes_dir, nwm_flows_df) + + print('Wrapping up Stage-Based CatFIM...') + csv_files = os.listdir(attributes_dir) + all_csv_df = pd.DataFrame() + refined_csv_files_list = [] + for csv_file in csv_files: + full_csv_path = os.path.join(attributes_dir, csv_file) + # HUC has to be read in as string to preserve leading zeros. + try: + temp_df = pd.read_csv(full_csv_path, dtype={'huc':str}) + all_csv_df = all_csv_df.append(temp_df, ignore_index = True) + refined_csv_files_list.append(csv_file) + except Exception: # Happens if a file is empty (i.e. no mapping) + pass + # Write to file + all_csv_df.to_csv(os.path.join(workspace, 'nws_lid_attributes.csv'), index = False) + + # This section populates a geopackage of all potential sites and details + # whether it was mapped or not (mapped field) and if not, why (status field). + + # Preprocess the out_gdf GeoDataFrame. Reproject and reformat fields. + viz_out_gdf = out_gdf.to_crs(VIZ_PROJECTION) + viz_out_gdf.rename(columns = {'identifiers_nwm_feature_id': 'nwm_seg', 'identifiers_nws_lid':'nws_lid', 'identifiers_usgs_site_code':'usgs_gage'}, inplace = True) + viz_out_gdf['nws_lid'] = viz_out_gdf['nws_lid'].str.lower() + + # Using list of csv_files, populate DataFrame of all nws_lids that had + # a flow file produced and denote with "mapped" column. + nws_lids = [] + for csv_file in csv_files: + nws_lids.append(csv_file.split('_attributes')[0]) + lids_df = pd.DataFrame(nws_lids, columns = ['nws_lid']) + lids_df['mapped'] = 'yes' + + # Identify what lids were mapped by merging with lids_df. Populate + # 'mapped' column with 'No' if sites did not map. + viz_out_gdf = viz_out_gdf.merge(lids_df, how = 'left', on = 'nws_lid') + viz_out_gdf['mapped'] = viz_out_gdf['mapped'].fillna('no') + + # Create list from all messages in messages dir. + messages_dir = os.path.join(workspace, 'messages') + all_messages = [] + all_message_csvs = os.listdir(messages_dir) + for message_csv in all_message_csvs: + full_message_csv_path = os.path.join(messages_dir, message_csv) + with open(full_message_csv_path, newline='') as message_file: + reader = csv.reader(message_file) + for row in reader: + all_messages.append(row) + + # Filter out columns and write out to file + nws_sites_layer = os.path.join(workspace, 'nws_lid_sites.gpkg') - # Write out to file - nws_lid_path = Path(output_mapping_dir) / 'nws_lid_sites.shp' - flows_df.to_file(nws_lid_path) + # Only write to sites geopackage if it didn't exist yet + # (and this line shouldn't have been reached if we had an interrupted + # run previously and are picking back up with a restart) + if not os.path.exists(nws_sites_layer): + + # Write messages to DataFrame, split into columns, aggregate messages. + messages_df = pd.DataFrame(all_messages, columns = ['message']) + messages_df = messages_df['message'].str.split(':', n = 1, expand = True).rename(columns={0:'nws_lid', 1:'status'}) + status_df = messages_df.groupby(['nws_lid'])['status'].apply(', '.join).reset_index() + + # Join messages to populate status field to candidate sites. Assign + # status for null fields. + viz_out_gdf = viz_out_gdf.merge(status_df, how = 'left', on = 'nws_lid') + + # viz_out_gdf['status'] = viz_out_gdf['status'].fillna('OK') + + # Add acceptance criteria to viz_out_gdf before writing + viz_out_gdf['acceptable_coord_acc_code_list'] = str(acceptable_coord_acc_code_list) + viz_out_gdf['acceptable_coord_method_code_list'] = str(acceptable_coord_method_code_list) + viz_out_gdf['acceptable_alt_acc_thresh'] = float(acceptable_alt_acc_thresh) + viz_out_gdf['acceptable_alt_meth_code_list'] = str(acceptable_alt_meth_code_list) + viz_out_gdf['acceptable_site_type_list'] = str(acceptable_site_type_list) -if __name__ == '__main__': + viz_out_gdf.to_file(nws_sites_layer, driver='GPKG') + + return nws_sites_layer + +def produce_stage_based_catfim_tifs(stage, datum_adj_ft, branch_dir, lid_usgs_elev, lid_altitude, fim_dir, segments, lid, huc, lid_directory, category, number_of_jobs): + messages = [] + + # Determine datum-offset water surface elevation (from above). + datum_adj_wse = stage + datum_adj_ft + lid_altitude + datum_adj_wse_m = datum_adj_wse*0.3048 # Convert ft to m + + # Subtract HAND gage elevation from HAND WSE to get HAND stage. + hand_stage = datum_adj_wse_m - lid_usgs_elev + + # Produce extent tif hand_stage. Multiprocess across branches. + branches = os.listdir(branch_dir) + with ProcessPoolExecutor(max_workers=number_of_jobs) as executor: + for branch in branches: + # Define paths to necessary files to produce inundation grids. + full_branch_path = os.path.join(branch_dir, branch) + rem_path = os.path.join(fim_dir, huc, full_branch_path, 'rem_zeroed_masked_' + branch + '.tif') + catchments_path = os.path.join(fim_dir, huc, full_branch_path, 'gw_catchments_reaches_filtered_addedAttributes_' + branch + '.tif') + hydrotable_path = os.path.join(fim_dir, huc, full_branch_path, 'hydroTable_' + branch + '.csv') + + if not os.path.exists(rem_path): + messages.append([f"{lid}:rem doesn't exist"]) + continue + if not os.path.exists(catchments_path): + messages.append([f"{lid}:catchments files don't exist"]) + continue + if not os.path.exists(hydrotable_path): + messages.append([f"{lid}:hydrotable doesn't exist"]) + continue + + # Use hydroTable to determine hydroid_list from site_ms_segments. + hydrotable_df = pd.read_csv(hydrotable_path) + hydroid_list = [] + + # Determine hydroids at which to perform inundation + for feature_id in segments: + try: + subset_hydrotable_df = hydrotable_df[hydrotable_df['feature_id'] == int(feature_id)] + hydroid_list += list(subset_hydrotable_df.HydroID.unique()) + except IndexError: + pass + + if len(hydroid_list) == 0: +# messages.append(f"{lid}:no matching hydroids") # Some branches don't have matching hydroids + continue + + # If no segments, write message and exit out + if not segments: + messages.append([f'{lid}:missing nwm segments']) + continue + + # Create inundation maps with branch and stage data + try: + print("Generating stage-based FIM for " + huc + " and branch " + branch) + executor.submit(produce_inundation_map_with_stage_and_feature_ids, rem_path, catchments_path, hydroid_list, hand_stage, lid_directory, category, huc, lid, branch) + except Exception: + messages.append([f'{lid}:inundation failed at {category}']) + + # -- MOSAIC -- # + # Merge all rasters in lid_directory that have the same magnitude/category. + path_list = [] + lid_dir_list = os.listdir(lid_directory) + print("Merging " + category) + for f in lid_dir_list: + if category in f: + path_list.append(os.path.join(lid_directory, f)) + path_list.sort() # To force branch 0 first in list, sort + + if len(path_list) > 0: + zero_branch_grid = path_list[0] + zero_branch_src = rasterio.open(zero_branch_grid) + zero_branch_array = zero_branch_src.read(1) + summed_array = zero_branch_array # Initialize it as the branch zero array + + # Loop through remaining items in list and sum them with summed_array + for remaining_raster in path_list[1:]: + remaining_raster_src = rasterio.open(remaining_raster) + remaining_raster_array_original = remaining_raster_src.read(1) + + # Reproject non-branch-zero grids so I can sum them with the branch zero grid + remaining_raster_array = np.empty(zero_branch_array.shape, dtype=np.int8) + reproject(remaining_raster_array_original, + destination = remaining_raster_array, + src_transform = remaining_raster_src.transform, + src_crs = remaining_raster_src.crs, + src_nodata = remaining_raster_src.nodata, + dst_transform = zero_branch_src.transform, + dst_crs = zero_branch_src.crs, + dst_nodata = -1, + dst_resolution = zero_branch_src.res, + resampling = Resampling.nearest) + # Sum rasters + summed_array = summed_array + remaining_raster_array + + del zero_branch_array # Clean up + + # Define path to merged file, in same format as expected by post_process_cat_fim_for_viz function + output_tif = os.path.join(lid_directory, lid + '_' + category + '_extent.tif') + profile = zero_branch_src.profile + summed_array = summed_array.astype('uint8') + with rasterio.open(output_tif, 'w', **profile) as dst: + dst.write(summed_array, 1) + del summed_array + + return messages, hand_stage, datum_adj_wse, datum_adj_wse_m + + +if __name__ == '__main__': + # Parse arguments parser = argparse.ArgumentParser(description = 'Run Categorical FIM') - parser.add_argument('-f','--fim_version',help='Name of directory containing outputs of fim_run.sh',required=True) - parser.add_argument('-j','--number_of_jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + parser.add_argument('-f', '--fim_run_dir', help='Path to directory containing HAND outputs, e.g. /data/previous_fim/fim_4_0_9_2', + required=True) + parser.add_argument('-jh','--job_number_huc',help='Number of processes to use for HUC scale operations.'\ + ' HUC and inundation job numbers should multiply to no more than one less than the CPU count of the'\ + ' machine. CatFIM sites generally only have 2-3 branches overlapping a site, so this number can be kept low (2-4)', required=False, default=1, type=int) + parser.add_argument('-jn','--job_number_inundate', help='Number of processes to use for inundating'\ + ' HUC and inundation job numbers should multiply to no more than one less than the CPU count'\ + ' of the machine.', required=False, default=1, type=int) + parser.add_argument('-a', '--stage_based', help = 'Run stage-based CatFIM instead of flow-based?'\ + ' NOTE: flow-based CatFIM is the default.', required=False, default=False, action='store_true') + parser.add_argument('-t', '--output_folder', help = 'Target: Where the output folder will be', + required = False, default = '/data/catfim/') + parser.add_argument('-o','--overwrite', help='Overwrite files', required=False, action="store_true") + parser.add_argument('-s','--search', help='Upstream and downstream search in miles. How far up and downstream do you want to go?', + required=False, default='5') + parser.add_argument('-l','--lid_to_run', help='NWS LID, lowercase, to produce CatFIM for. Currently only accepts one. Default is all sites', + required=False, default='all') + parser.add_argument('-ji','--job_number_intervals', help='Number of processes to use for inundating multiple intervals in stage-based'\ + ' inundation and interval job numbers should multiply to no more than one less than the CPU count'\ + ' of the machine.', required=False, default=1, type=int) + parser.add_argument('-mc','--past_major_interval_cap', help='Stage-Based Only. How many feet past major do you want to go for the interval FIMs?'\ + ' of the machine.', required=False, default=5.0, type=float) + args = vars(parser.parse_args()) + process_generate_categorical_fim(**args) - # Get arguments - fim_version = args['fim_version'] - number_of_jobs = args['number_of_jobs'] - - # Define default arguments. Modify these if necessary - fim_run_dir = Path(f'{fim_version}') - fim_version_folder = os.path.basename(fim_version) - output_flows_dir = Path(f'/data/catfim/{fim_version_folder}/flows') - output_mapping_dir = Path(f'/data/catfim/{fim_version_folder}/mapping') - nwm_us_search = '5' - nwm_ds_search = '5' - write_depth_tiff = False - - ## Run CatFIM scripts in sequence - # Generate CatFIM flow files - print('Creating flow files') - start = time.time() - subprocess.call(['python3','/foss_fim/tools/generate_categorical_fim_flows.py', '-w' , str(output_flows_dir), '-u', nwm_us_search, '-d', nwm_ds_search]) - end = time.time() - elapsed_time = (end-start)/60 - print(f'Finished creating flow files in {elapsed_time} minutes') - - # Generate CatFIM mapping - print('Begin mapping') - start = time.time() - subprocess.call(['python3','/foss_fim/tools/generate_categorical_fim_mapping.py', '-r' , str(fim_run_dir), '-s', str(output_flows_dir), '-o', str(output_mapping_dir), '-j', str(number_of_jobs)]) - end = time.time() - elapsed_time = (end-start)/60 - print(f'Finished mapping in {elapsed_time} minutes') - - # Updating mapping status - print('Updating mapping status') - update_mapping_status(str(output_mapping_dir), str(output_flows_dir)) diff --git a/tools/generate_categorical_fim_flows.py b/tools/generate_categorical_fim_flows.py index d2f5f0501..3c64b25ac 100755 --- a/tools/generate_categorical_fim_flows.py +++ b/tools/generate_categorical_fim_flows.py @@ -1,8 +1,12 @@ #!/usr/bin/env python3 from pathlib import Path import pandas as pd +import numpy as np +import geopandas as gpd +from datetime import datetime import time -from tools_shared_functions import aggregate_wbd_hucs, mainstem_nwm_segs, get_thresholds, flow_data, get_metadata, get_nwm_segs +from tools_shared_functions import aggregate_wbd_hucs, mainstem_nwm_segs, get_thresholds, flow_data, get_metadata, get_nwm_segs, get_datum, ngvd_to_navd_ft, filter_nwm_segments_by_stream_order +from concurrent.futures import ProcessPoolExecutor, as_completed, wait import argparse from dotenv import load_dotenv import os @@ -10,8 +14,6 @@ sys.path.append('/foss_fim/src') from utils.shared_variables import VIZ_PROJECTION -EVALUATED_SITES_CSV = r'/data/inputs/ahps_sites/evaluated_ahps_sites.csv' - def get_env_paths(): load_dotenv() @@ -21,7 +23,114 @@ def get_env_paths(): return API_BASE_URL, WBD_LAYER -def generate_catfim_flows(workspace, nwm_us_search, nwm_ds_search): +def process_generate_flows(huc, huc_dictionary, threshold_url, all_lists, workspace, attributes_dir, huc_messages_dir, nwm_flows_df): + + # Process each huc unit, first define message variable and flood categories. + all_messages = [] + flood_categories = ['action', 'minor', 'moderate', 'major', 'record'] + + print(f'Iterating through {huc}') + #Get list of nws_lids + nws_lids = huc_dictionary[huc] + #Loop through each lid in list to create flow file + for lid in nws_lids: + #Convert lid to lower case + lid = lid.lower() + #Get stages and flows for each threshold from the WRDS API. Priority given to USGS calculated flows. + print("getting thresholds") + stages, flows = get_thresholds(threshold_url = threshold_url, select_by = 'nws_lid', selector = lid, threshold = 'all') + if stages == None or flows == None: + print("Likely WRDS error") + continue + #Check if stages are supplied, if not write message and exit. + if all(stages.get(category, None)==None for category in flood_categories): + message = f'{lid}:missing threshold stages' + all_messages.append(message) + continue + #Check if calculated flows are supplied, if not write message and exit. + if all(flows.get(category, None) == None for category in flood_categories): + message = f'{lid}:missing calculated flows' + all_messages.append(message) + continue + #find lid metadata from master list of metadata dictionaries (line 66). + metadata = next((item for item in all_lists if item['identifiers']['nws_lid'] == lid.upper()), False) + + #Get mainstem segments of LID by intersecting LID segments with known mainstem segments. + unfiltered_segments = list(set(get_nwm_segs(metadata))) + + desired_order = metadata['nwm_feature_data']['stream_order'] + # Filter segments to be of like stream order. + print("filtering segments") + start = time.time() + segments = filter_nwm_segments_by_stream_order(unfiltered_segments, desired_order, nwm_flows_df) + end = time.time() + elapsed_time = round(((end-start)/60), 6) + print(f'Finished filtering segments in {elapsed_time} minutes') + #if no segments, write message and exit out + if not segments: + message = f'{lid}:missing nwm segments' + all_messages.append(message) + continue + # For each flood category + for category in flood_categories: + #G et the flow + flow = flows[category] + # If there is a valid flow value, write a flow file. + if flow: + #round flow to nearest hundredth + flow = round(flow,2) + #Create the guts of the flow file. + flow_info = flow_data(segments,flow) + #Define destination path and create folders + output_file = workspace / huc / lid / category / (f'ahps_{lid}_huc_{huc}_flows_{category}.csv') + output_file.parent.mkdir(parents = True, exist_ok = True) + #Write flow file to file + flow_info.to_csv(output_file, index = False) + else: + message = f'{lid}:{category} is missing calculated flow' + all_messages.append(message) + # Get various attributes of the site. + lat = float(metadata['nws_preferred']['latitude']) + lon = float(metadata['nws_preferred']['longitude']) + wfo = metadata['nws_data']['wfo'] + rfc = metadata['nws_data']['rfc'] + state = metadata['nws_data']['state'] + county = metadata['nws_data']['county'] + name = metadata['nws_data']['name'] + flow_source = flows['source'] + stage_source = stages['source'] + wrds_timestamp = stages['wrds_timestamp'] + nrldb_timestamp = metadata['nrldb_timestamp'] + nwis_timestamp = metadata['nwis_timestamp'] + + # Create a csv with same information as shapefile but with each threshold as new record. + csv_df = pd.DataFrame() + for threshold in flood_categories: + line_df = pd.DataFrame({'nws_lid': [lid], 'name':name, 'WFO': wfo, 'rfc':rfc, 'huc':[huc], 'state':state, 'county':county, 'magnitude': threshold, 'q':flows[threshold], 'q_uni':flows['units'], 'q_src':flow_source, 'stage':stages[threshold], 'stage_uni':stages['units'], 's_src':stage_source, 'wrds_time':wrds_timestamp, 'nrldb_time':nrldb_timestamp,'nwis_time':nwis_timestamp, 'lat':[lat], 'lon':[lon]}) + csv_df = csv_df.append(line_df) + #Round flow and stage columns to 2 decimal places. + csv_df = csv_df.round({'q':2,'stage':2}) + + # If a site folder exists (ie a flow file was written) save files containing site attributes. + output_dir = workspace / huc / lid + if output_dir.exists(): + #Export DataFrame to csv containing attributes + csv_df.to_csv(os.path.join(attributes_dir, f'{lid}_attributes.csv'), index = False) + message = f'{lid}:flows available' + all_messages.append(message) + else: + message = f'{lid}:missing all calculated flows' + all_messages.append(message) + + # Write all_messages to huc-specific file. + print("Writing message file for huc") + huc_messages_txt_file = os.path.join(huc_messages_dir, str(huc) + '_messages.txt') + with open(huc_messages_txt_file, 'w') as f: + for item in all_messages: + f.write("%s\n" % item) + + +def generate_catfim_flows(workspace, nwm_us_search, nwm_ds_search, stage_based, fim_dir, lid_to_run, attributes_dir="", job_number_huc=1): ''' This will create static flow files for all nws_lids and save to the workspace directory with the following format: @@ -33,7 +142,6 @@ def generate_catfim_flows(workspace, nwm_us_search, nwm_ds_search): This will use the WRDS API to get the nwm segments as well as the flow values for each threshold at each nws_lid and then create the necessary flow file to use for inundation mapping. - Parameters ---------- workspace : STR @@ -48,10 +156,10 @@ def generate_catfim_flows(workspace, nwm_us_search, nwm_ds_search): Returns ------- None. - ''' - - all_start = time.time() + + all_start = datetime.now() + API_BASE_URL, WBD_LAYER = get_env_paths() #Define workspace and wbd_path as a pathlib Path. Convert search distances to integer. workspace = Path(workspace) nwm_us_search = int(nwm_us_search) @@ -60,171 +168,139 @@ def generate_catfim_flows(workspace, nwm_us_search, nwm_ds_search): threshold_url = f'{API_BASE_URL}/nws_threshold' ################################################################### - #Create workspace + # Create workspace workspace.mkdir(parents=True,exist_ok = True) - print('Retrieving metadata...') - #Get metadata for 'CONUS' - conus_list, conus_dataframe = get_metadata(metadata_url, select_by = 'nws_lid', selector = ['all'], must_include = 'nws_data.rfc_forecast_point', upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search ) + # Create HUC message directory to store messages that will be read and joined after multiprocessing + huc_messages_dir = os.path.join(workspace, 'huc_messages') + if not os.path.exists(huc_messages_dir): + os.mkdir(huc_messages_dir) + + # Open NWM flows geopackage + nwm_flows_gpkg = r'/data/inputs/nwm_hydrofabric/nwm_flows.gpkg' + nwm_flows_df = gpd.read_file(nwm_flows_gpkg) - #Get metadata for Islands - islands_list, islands_dataframe = get_metadata(metadata_url, select_by = 'state', selector = ['HI','PR'] , must_include = None, upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + print(f'Retrieving metadata for site(s): {lid_to_run}...') + start_dt = datetime.now() + + #Get metadata for 'CONUS' + print(metadata_url) + if lid_to_run != 'all': + all_lists, conus_dataframe = get_metadata(metadata_url, select_by = 'nws_lid', selector = [lid_to_run], must_include = 'nws_data.rfc_forecast_point', upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + else: + # Get CONUS metadata + conus_list, conus_dataframe = get_metadata(metadata_url, select_by = 'nws_lid', selector = ['all'], must_include = 'nws_data.rfc_forecast_point', upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + # Get metadata for Islands + islands_list, islands_dataframe = get_metadata(metadata_url, select_by = 'state', selector = ['HI','PR'] , must_include = None, upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + # Append the dataframes and lists + all_lists = conus_list + islands_list + print(len(all_lists)) - #Append the dataframes and lists - all_lists = conus_list + islands_list + end_dt = datetime.now() + time_duration = end_dt - start_dt + print(f"Retrieving metadata Duration: {str(time_duration).split('.')[0]}") + print() print('Determining HUC using WBD layer...') - #Assign HUCs to all sites using a spatial join of the FIM 3 HUC layer. - #Get a dictionary of hucs (key) and sites (values) as well as a GeoDataFrame - #of all sites used later in script. - huc_dictionary, out_gdf = aggregate_wbd_hucs(metadata_list = all_lists, wbd_huc8_path = WBD_LAYER) - - #Get all possible mainstem segments - print('Getting list of mainstem segments') - #Import list of evaluated sites - print(EVALUATED_SITES_CSV) - print(os.path.exists(EVALUATED_SITES_CSV)) - list_of_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].to_list() - #The entire routine to get mainstems is hardcoded in this function. - ms_segs = mainstem_nwm_segs(metadata_url, list_of_sites) - - #Loop through each huc unit, first define message variable and flood categories. - all_messages = [] - flood_categories = ['action', 'minor', 'moderate', 'major', 'record'] - for huc in huc_dictionary: - print(f'Iterating through {huc}') - #Get list of nws_lids - nws_lids = huc_dictionary[huc] - #Loop through each lid in list to create flow file - for lid in nws_lids: - #Convert lid to lower case - lid = lid.lower() - #Get stages and flows for each threshold from the WRDS API. Priority given to USGS calculated flows. - stages, flows = get_thresholds(threshold_url = threshold_url, select_by = 'nws_lid', selector = lid, threshold = 'all') - #Check if stages are supplied, if not write message and exit. - if all(stages.get(category, None)==None for category in flood_categories): - message = f'{lid}:missing threshold stages' - all_messages.append(message) - continue - #Check if calculated flows are supplied, if not write message and exit. - if all(flows.get(category, None) == None for category in flood_categories): - message = f'{lid}:missing calculated flows' - all_messages.append(message) - continue - - #find lid metadata from master list of metadata dictionaries (line 66). - metadata = next((item for item in all_lists if item['identifiers']['nws_lid'] == lid.upper()), False) + start_dt = datetime.now() + + # Assign HUCs to all sites using a spatial join of the FIM 3 HUC layer. + # Get a dictionary of hucs (key) and sites (values) as well as a GeoDataFrame + # of all sites used later in script. + huc_dictionary, out_gdf = aggregate_wbd_hucs(metadata_list = all_lists, wbd_huc8_path = WBD_LAYER, retain_attributes=True) + # Drop list fields if invalid + out_gdf = out_gdf.drop(['downstream_nwm_features'], axis=1, errors='ignore') + out_gdf = out_gdf.drop(['upstream_nwm_features'], axis=1, errors='ignore') + out_gdf = out_gdf.astype({'metadata_sources': str}) - #Get mainstem segments of LID by intersecting LID segments with known mainstem segments. - segments = get_nwm_segs(metadata) - site_ms_segs = set(segments).intersection(ms_segs) - segments = list(site_ms_segs) - #if no segments, write message and exit out - if not segments: - print(f'{lid} no segments') - message = f'{lid}:missing nwm segments' - all_messages.append(message) - continue - #For each flood category - for category in flood_categories: - #Get the flow - flow = flows[category] - #If there is a valid flow value, write a flow file. - if flow: - #round flow to nearest hundredth - flow = round(flow,2) - #Create the guts of the flow file. - flow_info = flow_data(segments,flow) - #Define destination path and create folders - output_file = workspace / huc / lid / category / (f'ahps_{lid}_huc_{huc}_flows_{category}.csv') - output_file.parent.mkdir(parents = True, exist_ok = True) - #Write flow file to file - flow_info.to_csv(output_file, index = False) - else: - message = f'{lid}:{category} is missing calculated flow' - all_messages.append(message) + end_dt = datetime.now() + time_duration = end_dt - start_dt + print(f"Determining HUC using WBD layer Duration: {str(time_duration).split('.')[0]}") + print() + + if stage_based: + return huc_dictionary, out_gdf, metadata_url, threshold_url, all_lists, nwm_flows_df - #Get various attributes of the site. - lat = float(metadata['usgs_preferred']['latitude']) - lon = float(metadata['usgs_preferred']['longitude']) - wfo = metadata['nws_data']['wfo'] - rfc = metadata['nws_data']['rfc'] - state = metadata['nws_data']['state'] - county = metadata['nws_data']['county'] - name = metadata['nws_data']['name'] - flow_units = flows['units'] - flow_source = flows['source'] - stage_units = stages['units'] - stage_source = stages['source'] - wrds_timestamp = stages['wrds_timestamp'] - nrldb_timestamp = metadata['nrldb_timestamp'] - nwis_timestamp = metadata['nwis_timestamp'] + print("Generating flows for hucs using " + str(job_number_huc) + " jobs...") + start_dt = datetime.now() + + with ProcessPoolExecutor(max_workers=job_number_huc) as executor: + for huc in huc_dictionary: + executor.submit(process_generate_flows, huc, huc_dictionary, threshold_url, all_lists, workspace, attributes_dir, huc_messages_dir, nwm_flows_df) - #Create a csv with same information as shapefile but with each threshold as new record. - csv_df = pd.DataFrame() - for threshold in flood_categories: - line_df = pd.DataFrame({'nws_lid': [lid], 'name':name, 'WFO': wfo, 'rfc':rfc, 'huc':[huc], 'state':state, 'county':county, 'magnitude': threshold, 'q':flows[threshold], 'q_uni':flows['units'], 'q_src':flow_source, 'stage':stages[threshold], 'stage_uni':stages['units'], 's_src':stage_source, 'wrds_time':wrds_timestamp, 'nrldb_time':nrldb_timestamp,'nwis_time':nwis_timestamp, 'lat':[lat], 'lon':[lon]}) - csv_df = csv_df.append(line_df) - #Round flow and stage columns to 2 decimal places. - csv_df = csv_df.round({'q':2,'stage':2}) - - #If a site folder exists (ie a flow file was written) save files containing site attributes. - output_dir = workspace / huc / lid - if output_dir.exists(): - #Export DataFrame to csv containing attributes - csv_df.to_csv(output_dir / f'{lid}_attributes.csv', index = False) - else: - message = f'{lid}:missing all calculated flows' - all_messages.append(message) - - print('wrapping up...') + end_dt = datetime.now() + time_duration = end_dt - start_dt + print(f"Generating flows for hucs Duration: {str(time_duration).split('.')[0]}") + print() + + print('Wrapping up flows generation...') #Recursively find all *_attributes csv files and append - csv_files = list(workspace.rglob('*_attributes.csv')) + csv_files = os.listdir(attributes_dir) all_csv_df = pd.DataFrame() for csv in csv_files: - #Huc has to be read in as string to preserve leading zeros. - temp_df = pd.read_csv(csv, dtype={'huc':str}) + full_csv_path = os.path.join(attributes_dir, csv) + # Huc has to be read in as string to preserve leading zeros. + temp_df = pd.read_csv(full_csv_path, dtype={'huc':str}) all_csv_df = all_csv_df.append(temp_df, ignore_index = True) - #Write to file - all_csv_df.to_csv(workspace / 'nws_lid_attributes.csv', index = False) + # Write to file + all_csv_df.to_csv(os.path.join(workspace, 'nws_lid_attributes.csv'), index = False) - #This section populates a shapefile of all potential sites and details - #whether it was mapped or not (mapped field) and if not, why (status field). + # This section populates a shapefile of all potential sites and details + # whether it was mapped or not (mapped field) and if not, why (status field). - #Preprocess the out_gdf GeoDataFrame. Reproject and reformat fields. + # Preprocess the out_gdf GeoDataFrame. Reproject and reformat fields. viz_out_gdf = out_gdf.to_crs(VIZ_PROJECTION) viz_out_gdf.rename(columns = {'identifiers_nwm_feature_id': 'nwm_seg', 'identifiers_nws_lid':'nws_lid', 'identifiers_usgs_site_code':'usgs_gage'}, inplace = True) viz_out_gdf['nws_lid'] = viz_out_gdf['nws_lid'].str.lower() - #Using list of csv_files, populate DataFrame of all nws_lids that had - #a flow file produced and denote with "mapped" column. - nws_lids = [file.stem.split('_attributes')[0] for file in csv_files] + # Using list of csv_files, populate DataFrame of all nws_lids that had + # a flow file produced and denote with "mapped" column. + nws_lids = [] + for csv_file in csv_files: + nws_lids.append(csv_file.split('_attributes')[0]) lids_df = pd.DataFrame(nws_lids, columns = ['nws_lid']) lids_df['mapped'] = 'yes' - #Identify what lids were mapped by merging with lids_df. Populate - #'mapped' column with 'No' if sites did not map. + # Identify what lids were mapped by merging with lids_df. Populate + # 'mapped' column with 'No' if sites did not map. viz_out_gdf = viz_out_gdf.merge(lids_df, how = 'left', on = 'nws_lid') viz_out_gdf['mapped'] = viz_out_gdf['mapped'].fillna('no') - #Write messages to DataFrame, split into columns, aggregate messages. - messages_df = pd.DataFrame(all_messages, columns = ['message']) + # Read all messages for all HUCs TODO + huc_message_list = [] + huc_messages_dir_list = os.listdir(huc_messages_dir) + for huc_message_file in huc_messages_dir_list: + full_path_file = os.path.join(huc_messages_dir, huc_message_file) + with open(full_path_file, 'r') as f: + if full_path_file.endswith('.txt'): + lines = f.readlines() + for line in lines: + huc_message_list.append(line) + + # Write messages to DataFrame, split into columns, aggregate messages. + messages_df = pd.DataFrame(huc_message_list, columns = ['message']) messages_df = messages_df['message'].str.split(':', n = 1, expand = True).rename(columns={0:'nws_lid', 1:'status'}) status_df = messages_df.groupby(['nws_lid'])['status'].apply(', '.join).reset_index() - #Join messages to populate status field to candidate sites. Assign - #status for null fields. + # Join messages to populate status field to candidate sites. Assign + # status for null fields. viz_out_gdf = viz_out_gdf.merge(status_df, how = 'left', on = 'nws_lid') viz_out_gdf['status'] = viz_out_gdf['status'].fillna('all calculated flows available') - #Filter out columns and write out to file - viz_out_gdf = viz_out_gdf.filter(['nws_lid','usgs_gage','nwm_seg','HUC8','mapped','status','geometry']) - viz_out_gdf.to_file(workspace /'nws_lid_flows_sites.shp') + # Filter out columns and write out to file +# viz_out_gdf = viz_out_gdf.filter(['nws_lid','usgs_gage','nwm_seg','HUC8','mapped','status','geometry']) + nws_lid_layer = os.path.join(workspace, 'nws_lid_sites.gpkg').replace('flows', 'mapping') + + viz_out_gdf.to_file(nws_lid_layer, driver='GPKG') - #time operation - all_end = time.time() - print(f'total time is {round((all_end - all_start)/60),1} minutes') + # time operation + all_end = datetime.now() + all_time_duration = all_end - all_start + print(f"Duration: {str(all_time_duration).split('.')[0]}") + print() + return nws_lid_layer + if __name__ == '__main__': #Parse arguments @@ -232,8 +308,9 @@ def generate_catfim_flows(workspace, nwm_us_search, nwm_ds_search): parser.add_argument('-w', '--workspace', help = 'Workspace where all data will be stored.', required = True) parser.add_argument('-u', '--nwm_us_search', help = 'Walk upstream on NWM network this many miles', required = True) parser.add_argument('-d', '--nwm_ds_search', help = 'Walk downstream on NWM network this many miles', required = True) + parser.add_argument('-a', '--stage_based', help = 'Run stage-based CatFIM instead of flow-based? NOTE: flow-based CatFIM is the default.', required=False, default=False, action='store_true') + parser.add_argument('-f', '--fim-dir', help='Path to FIM outputs directory. Only use this option if you are running in alt-catfim mode.',required=False,default="") args = vars(parser.parse_args()) #Run get_env_paths and static_flow_lids - API_BASE_URL, WBD_LAYER = get_env_paths() generate_catfim_flows(**args) diff --git a/tools/generate_categorical_fim_mapping.py b/tools/generate_categorical_fim_mapping.py index 0827d3f08..85a5fe5f6 100755 --- a/tools/generate_categorical_fim_mapping.py +++ b/tools/generate_categorical_fim_mapping.py @@ -2,33 +2,24 @@ import sys import os -from multiprocessing import Pool +from concurrent.futures import ProcessPoolExecutor, as_completed, wait import argparse import traceback import rasterio import geopandas as gpd import pandas as pd -import shutil from rasterio.features import shapes from shapely.geometry.polygon import Polygon from shapely.geometry.multipolygon import MultiPolygon -from inundation import inundate sys.path.append('/foss_fim/src') from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION from utils.shared_functions import getDriver +from mosaic_inundation import Mosaic_inundation +from inundate_gms import Inundate_gms -INPUTS_DIR = r'/data/inputs' -magnitude_list = ['action', 'minor', 'moderate','major', 'record'] -# Define necessary variables for inundation() -hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' -mask_type, catchment_poly = 'huc', '' - - -def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif, log_file): - - no_data_list = [] - procs_list = [] +def generate_categorical_fim(fim_run_dir, source_flow_dir, output_catfim_dir, + job_number_huc, job_number_inundate, depthtif, log_file): source_flow_dir_list = os.listdir(source_flow_dir) output_flow_dir_list = os.listdir(fim_run_dir) @@ -43,36 +34,18 @@ def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, n # Loop through matching huc directories in the source_flow directory matching_hucs = list(set(output_flow_dir_list) & set(source_flow_dir_list)) - for huc in matching_hucs: - - if "." not in huc: + + with ProcessPoolExecutor(max_workers=job_number_huc) as executor: + for huc in matching_hucs: + if "." in huc: + continue # Get list of AHPS site directories ahps_site_dir = os.path.join(source_flow_dir, huc) ahps_site_dir_list = os.listdir(ahps_site_dir) - # Map paths to HAND files needed for inundation() - fim_run_huc_dir = os.path.join(fim_run_dir, huc) - rem = os.path.join(fim_run_huc_dir, 'rem_zeroed_masked.tif') - catchments = os.path.join(fim_run_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') - hydroTable = os.path.join(fim_run_huc_dir, 'hydroTable.csv') - - exit_flag = False # Default to False. - - # Check if necessary data exist; set exit_flag to True if they don't exist - for f in [rem, catchments, hydroTable]: - if not os.path.exists(f): - no_data_list.append(f) - exit_flag = True - - # Log missing data - if exit_flag == True: - f = open(log_file, 'a+') - f.write(f"Missing data for: {fim_run_huc_dir}\n") - f.close() - - # Map path to huc directory inside out output_cat_fim_dir - cat_fim_huc_dir = os.path.join(output_cat_fim_dir, huc) + # Map path to huc directory inside out output_catfim_dir + cat_fim_huc_dir = os.path.join(output_catfim_dir, huc) if not os.path.exists(cat_fim_huc_dir): os.mkdir(cat_fim_huc_dir) @@ -89,49 +62,46 @@ def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, n # Loop through thresholds/magnitudes and define inundation output files paths for magnitude in thresholds_dir_list: - - if "." not in magnitude: - - magnitude_flows_csv = os.path.join(ahps_site_parent, magnitude, 'ahps_' + ahps_site + '_huc_' + huc + '_flows_' + magnitude + '.csv') - - if os.path.exists(magnitude_flows_csv): - - output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') - - if depthtif: - output_depth_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_depth.tif') - else: - output_depth_grid = None - - # Append necessary variables to list for multiprocessing. - procs_list.append([rem, catchments, magnitude_flows_csv, huc, hydroTable, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_file]) - - # Initiate multiprocessing - print(f"Running inundation for {len(procs_list)} sites using {number_of_jobs} jobs") - with Pool(processes=number_of_jobs) as pool: - pool.map(run_inundation, procs_list) - - -def run_inundation(args): - - rem = args[0] - catchments = args[1] - magnitude_flows_csv = args[2] - huc = args[3] - hydroTable = args[4] - output_extent_grid = args[5] - output_depth_grid = args[6] - ahps_site = args[7] - magnitude = args[8] - log_file = args[9] - + if "." in magnitude: + continue + magnitude_flows_csv = os.path.join(ahps_site_parent, magnitude, 'ahps_' + ahps_site + '_huc_' + huc + '_flows_' + magnitude + '.csv') + if os.path.exists(magnitude_flows_csv): + output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') + try: + executor.submit(run_inundation, magnitude_flows_csv, huc, output_extent_grid, ahps_site, magnitude, log_file, fim_run_dir, job_number_inundate) + except Exception: + traceback.print_exc() + sys.exit(1) + + +def run_inundation(magnitude_flows_csv, huc, output_extent_grid, ahps_site, magnitude, log_file, fim_run_dir, job_number_inundate): + + huc_dir = os.path.join(fim_run_dir, huc) try: - inundate(rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=None, - depths=output_depth_grid,out_raster_profile=None,out_vector_profile=None,quiet=True - ) - - except: + print("Running Inundate_gms for " + huc) + map_file = Inundate_gms( hydrofabric_dir = fim_run_dir, + forecast = magnitude_flows_csv, + num_workers = job_number_inundate, + hucs = huc, + inundation_raster = output_extent_grid, + inundation_polygon = None, + depths_raster = None, + verbose = False, + log_file = None, + output_fileNames = None ) + print("Mosaicking for " + huc) + Mosaic_inundation( map_file, + mosaic_attribute = 'inundation_rasters', + mosaic_output = output_extent_grid, + mask = os.path.join(huc_dir,'wbd.gpkg'), + unit_attribute_name = 'huc8', + nodata = -9999, + workers = 1, + remove_inputs = False, + subset = None, + verbose = False ) + + except Exception: # Log errors and their tracebacks f = open(log_file, 'a+') f.write(f"{output_extent_grid} - inundation error: {traceback.format_exc()}\n") @@ -147,143 +117,172 @@ def run_inundation(args): f.write('FAILURE_huc_{}:{}:{} map failed to create\n'.format(huc,ahps_site,magnitude)) -def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_attributes_filename, log_file): - +def post_process_huc_level(job_number_tif, ahps_dir_list, huc_dir, attributes_dir, gpkg_dir, fim_version, huc): + + + # Loop through ahps sites + for ahps_lid in ahps_dir_list: + tifs_to_reformat_list = [] + ahps_lid_dir = os.path.join(huc_dir, ahps_lid) + + # Append desired filenames to list. + tif_list = os.listdir(ahps_lid_dir) + for tif in tif_list: + if 'extent.tif' in tif: + tifs_to_reformat_list.append(os.path.join(ahps_lid_dir, tif)) + + # Stage-Based CatFIM uses attributes from individual CSVs instead of the master CSV. + nws_lid_attributes_filename = os.path.join(attributes_dir, ahps_lid + '_attributes.csv') + + print(f"Reformatting TIFs {ahps_lid} for {huc_dir}") + with ProcessPoolExecutor(max_workers=job_number_tif) as executor: + for tif_to_process in tifs_to_reformat_list: + if not os.path.exists(tif_to_process): + continue + try: + magnitude = os.path.split(tif_to_process)[1].split('_')[1] + try: + interval_stage = float((os.path.split(tif_to_process)[1].split('_')[2]).replace('p', '.').replace("ft", "")) + if interval_stage == 'extent': + interval_stage = None + except ValueError: + interval_stage = None + executor.submit(reformat_inundation_maps, ahps_lid, tif_to_process, gpkg_dir, fim_version, huc, magnitude, nws_lid_attributes_filename, interval_stage) + except Exception as ex: + print(f"*** {ex}") + traceback.print_exc() + + +def post_process_cat_fim_for_viz(job_number_huc, job_number_tif, output_catfim_dir, attributes_dir, log_file="", fim_version=""): + + print("In post processing...") # Create workspace - gpkg_dir = os.path.join(output_cat_fim_dir, 'gpkg') + gpkg_dir = os.path.join(output_catfim_dir, 'gpkg') if not os.path.exists(gpkg_dir): os.mkdir(gpkg_dir) # Find the FIM version - fim_version = os.path.basename(output_cat_fim_dir) - merged_layer = os.path.join(output_cat_fim_dir, 'catfim_library.shp') - - if not os.path.exists(merged_layer): # prevents appending to existing output - - huc_ahps_dir_list = os.listdir(output_cat_fim_dir) - skip_list=['errors','logs','gpkg',merged_layer] - - for magnitude in magnitude_list: - - procs_list = [] - - # Loop through all categories + merged_layer = os.path.join(output_catfim_dir, 'catfim_library.gpkg') + if not os.path.exists(merged_layer): # prevents appending to existing output + huc_ahps_dir_list = os.listdir(output_catfim_dir) + skip_list=['errors','logs','gpkg','missing_files.txt','messages',merged_layer] + + # Loop through all categories + print("Building list of TIFs to reformat...") + with ProcessPoolExecutor(max_workers=job_number_huc) as huc_exector: + for huc in huc_ahps_dir_list: - if huc not in skip_list: - - huc_dir = os.path.join(output_cat_fim_dir, huc) + if huc in skip_list: + continue + huc_dir = os.path.join(output_catfim_dir, huc) + try: ahps_dir_list = os.listdir(huc_dir) - - # Loop through ahps sites - for ahps_lid in ahps_dir_list: - ahps_lid_dir = os.path.join(huc_dir, ahps_lid) - - extent_grid = os.path.join(ahps_lid_dir, ahps_lid + '_' + magnitude + '_extent_' + huc + '.tif') - - if os.path.exists(extent_grid): - procs_list.append([ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude, nws_lid_attributes_filename]) - - else: - try: - f = open(log_file, 'a+') - f.write(f"Missing layers: {extent_gpkg}\n") - f.close() - except: - pass - - # Multiprocess with instructions - with Pool(processes=number_of_jobs) as pool: - pool.map(reformat_inundation_maps, procs_list) - + except NotADirectoryError: + continue + # If there's no mapping for a HUC, delete the HUC directory. + if ahps_dir_list == []: + os.rmdir(huc_dir) + continue + + huc_exector.submit(post_process_huc_level, job_number_tif, ahps_dir_list, huc_dir, attributes_dir, gpkg_dir, fim_version, huc) + # Merge all layers print(f"Merging {len(os.listdir(gpkg_dir))} layers...") - for layer in os.listdir(gpkg_dir): - + # Open dissolved extent layers diss_extent_filename = os.path.join(gpkg_dir, layer) - - # Open diss_extent diss_extent = gpd.read_file(diss_extent_filename) diss_extent['viz'] = 'yes' - + # Write/append aggregate diss_extent + print(f"Merging layer: {layer}") if os.path.isfile(merged_layer): diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False, mode='a') else: diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False) - del diss_extent - - shutil.rmtree(gpkg_dir) + + #shutil.rmtree(gpkg_dir) # TODO else: print(f"{merged_layer} already exists.") -def reformat_inundation_maps(args): +def reformat_inundation_maps(ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude, nws_lid_attributes_filename, interval_stage=None): try: - lid = args[0] - grid_path = args[1] - gpkg_dir = args[2] - fim_version = args[3] - huc = args[4] - magnitude = args[5] - nws_lid_attributes_filename = args[6] - # Convert raster to to shapes - with rasterio.open(grid_path) as src: + with rasterio.open(extent_grid) as src: image = src.read(1) mask = image > 0 - + # Aggregate shapes results = ({'properties': {'extent': 1}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask,transform=src.transform))) # Convert list of shapes to polygon - extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) - + extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) # Dissolve polygons extent_poly_diss = extent_poly.dissolve(by='extent') # Update attributes extent_poly_diss = extent_poly_diss.reset_index(drop=True) - extent_poly_diss['ahps_lid'] = lid + extent_poly_diss['ahps_lid'] = ahps_lid extent_poly_diss['magnitude'] = magnitude extent_poly_diss['version'] = fim_version extent_poly_diss['huc'] = huc - + extent_poly_diss['interval_stage'] = interval_stage # Project to Web Mercator extent_poly_diss = extent_poly_diss.to_crs(VIZ_PROJECTION) # Join attributes nws_lid_attributes_table = pd.read_csv(nws_lid_attributes_filename, dtype={'huc':str}) - nws_lid_attributes_table = nws_lid_attributes_table.loc[(nws_lid_attributes_table.magnitude==magnitude) & (nws_lid_attributes_table.nws_lid==lid)] - - + nws_lid_attributes_table = nws_lid_attributes_table.loc[(nws_lid_attributes_table.magnitude==magnitude) & (nws_lid_attributes_table.nws_lid==ahps_lid)] extent_poly_diss = extent_poly_diss.merge(nws_lid_attributes_table, left_on=['ahps_lid','magnitude','huc'], right_on=['nws_lid','magnitude','huc']) - extent_poly_diss = extent_poly_diss.drop(columns='nws_lid') - # Save dissolved multipolygon - handle = os.path.split(grid_path)[1].replace('.tif', '') - - diss_extent_filename = os.path.join(gpkg_dir, handle + "_dissolved.gpkg") - + handle = os.path.split(extent_grid)[1].replace('.tif', '') + diss_extent_filename = os.path.join(gpkg_dir, f"{handle}_{huc}_dissolved.gpkg") extent_poly_diss["geometry"] = [MultiPolygon([feature]) if type(feature) == Polygon else feature for feature in extent_poly_diss["geometry"]] - + if not extent_poly_diss.empty: - extent_poly_diss.to_file(diss_extent_filename,driver=getDriver(diss_extent_filename),index=False) - except Exception as e: + except Exception: + pass # Log and clean out the gdb so it's not merged in later - try: - f = open(log_file, 'a+') - f.write(str(diss_extent_filename) + " - dissolve error: " + str(e)) - f.close() - except: - pass +# try: +# print(e) +## f = open(log_file, 'a+') +## f.write(str(diss_extent_filename) + " - dissolve error: " + str(e)) +## f.close() +# except: +# pass + + +def manage_catfim_mapping(fim_run_dir, source_flow_dir, output_catfim_dir, attributes_dir, job_number_huc, job_number_inundate, overwrite, depthtif): + + # Create output directory + if not os.path.exists(output_catfim_dir): + os.mkdir(output_catfim_dir) + + # Create log directory + log_dir = os.path.join(output_catfim_dir, 'logs') + if not os.path.exists(log_dir): + os.mkdir(log_dir) + + # Create error log path + log_file = os.path.join(log_dir, 'errors.log') + + job_number_tif = job_number_inundate + + print("Generating Categorical FIM") + generate_categorical_fim(fim_run_dir, source_flow_dir, output_catfim_dir, job_number_huc, job_number_inundate, depthtif, log_file) + + print("Aggregating Categorical FIM") + # Get fim_version. + fim_version = os.path.basename(os.path.normpath(fim_run_dir)).replace('fim_','').replace('_ms_c', '').replace('_', '.') + post_process_cat_fim_for_viz(job_number_huc, job_number_tif, output_catfim_dir, attributes_dir, log_file, fim_version) if __name__ == '__main__': @@ -292,7 +291,7 @@ def reformat_inundation_maps(args): parser = argparse.ArgumentParser(description='Categorical inundation mapping for FOSS FIM.') parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) parser.add_argument('-s', '--source-flow-dir',help='Path to directory containing flow CSVs to use to generate categorical FIM.',required=True, default="") - parser.add_argument('-o', '--output-cat-fim-dir',help='Path to directory where categorical FIM outputs will be written.',required=True, default="") + parser.add_argument('-o', '--output-catfim-dir',help='Path to directory where categorical FIM outputs will be written.',required=True, default="") parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) parser.add_argument('-depthtif','--write-depth-tiff',help='Using this option will write depth TIFFs.',required=False, action='store_true') @@ -300,27 +299,8 @@ def reformat_inundation_maps(args): fim_run_dir = args['fim_run_dir'] source_flow_dir = args['source_flow_dir'] - output_cat_fim_dir = args['output_cat_fim_dir'] + output_catfim_dir = args['output_catfim_dir'] number_of_jobs = int(args['number_of_jobs']) depthtif = args['write_depth_tiff'] - - # Create output directory - if not os.path.exists(output_cat_fim_dir): - os.mkdir(output_cat_fim_dir) - - # Create log directory - log_dir = os.path.join(output_cat_fim_dir, 'logs') - if not os.path.exists(log_dir): - os.mkdir(log_dir) - - # Create error log path - log_file = os.path.join(log_dir, 'errors.log') - - # Map path to points with attributes - nws_lid_attributes_filename = os.path.join(source_flow_dir, 'nws_lid_attributes.csv') - - print("Generating Categorical FIM") - generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif,log_file) - - print("Aggregating Categorical FIM") - post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir,nws_lid_attributes_filename,log_file) + + manage_catfim_mapping(fim_run_dir, source_flow_dir, output_catfim_dir, number_of_jobs, depthtif) diff --git a/tools/gms_tools/mosaic_inundation.py b/tools/gms_tools/mosaic_inundation.py deleted file mode 100644 index ea24cb75f..000000000 --- a/tools/gms_tools/mosaic_inundation.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from glob import glob -from gms_tools.overlapping_inundation import OverlapWindowMerge -import argparse -import os -import pandas as pd -from tqdm import tqdm -from tools_shared_variables import elev_raster_ndv - -def Mosaic_inundation( - map_file,mosaic_attribute='inundation_rasters',mosaic_output=None, - mask=None,unit_attribute_name='huc8', - nodata=elev_raster_ndv,workers=4, - remove_inputs=False, - subset=None,verbose=True - ): - - # check input - if mosaic_attribute not in ('inundation_rasters','depths_rasters'): - raise ValueError('Pass inundation or depths for mosaic_attribute argument') - - # load file - if isinstance(map_file,pd.DataFrame): - inundation_maps_df = map_file - del map_file - elif isinstance(map_file,str): - inundation_maps_df = pd.read_csv(map_file, - dtype={unit_attribute_name:str,'branchID':str} - ) - else: - raise TypeError('Pass Pandas Dataframe or file path string to csv for map_file argument') - - # remove NaNs - inundation_maps_df.dropna(axis=0,how='all',inplace=True) - - # subset - if subset is not None: - subset_mask = inundation_maps_df.loc[:,unit_attribute_name].isin(subset) - inundation_maps_df = inundation_maps_df.loc[subset_mask,:] - - # unique aggregation units - aggregation_units = inundation_maps_df.loc[:,unit_attribute_name].unique() - - inundation_maps_df.set_index(unit_attribute_name,drop=True,inplace=True) - - # decide upon wheter to display - if verbose & len(aggregation_units) == 1: - tqdm_disable = False - elif verbose: - tqdm_disable = False - else: - tqdm_disable = True - - for ag in tqdm(aggregation_units,disable=tqdm_disable,desc='Compositing MS and FR maps'): - - try: - inundation_maps_list = inundation_maps_df.loc[ag,mosaic_attribute].tolist() - except AttributeError: - inundation_maps_list = [ inundation_maps_df.loc[ag,mosaic_attribute] ] - - ag_mosaic_output = __append_id_to_file_name(mosaic_output,ag) - #try: - mosaic_by_unit(inundation_maps_list,ag_mosaic_output,nodata, - workers=1,remove_inputs=remove_inputs,mask=mask,verbose=verbose) - #except Exception as exc: - # print(ag,exc) - - - # inundation maps - inundation_maps_df.reset_index(drop=True) - - - -def mosaic_by_unit(inundation_maps_list,mosaic_output,nodata=elev_raster_ndv, - workers=1,remove_inputs=False,mask=None,verbose=False): - - - # overlap object instance - overlap = OverlapWindowMerge( inundation_maps_list, (30, 30) ) - - # mosaic - #if verbose: - # print("Mosaicing ...") - - if mosaic_output is not None: - if workers > 1: - threaded = True - else: - threaded= False - - overlap.merge_rasters(mosaic_output, threaded=threaded, workers=workers,nodata=nodata) - - if mask: - #if verbose: - # print("Masking ...") - overlap.mask_mosaic(mosaic_output,mask,outfile=mosaic_output) - - if remove_inputs: - #if verbose: - # print("Removing inputs ...") - - for inun_map in inundation_maps_list: - if inun_map is not None: - if os.path.isfile(inun_map): - os.remove(inun_map) - - -def __append_id_to_file_name(file_name,identifier): - - - if file_name is not None: - - root,extension = os.path.splitext(file_name) - - if isinstance(identifier,list): - for i in identifier: - out_file_name = root + "_{}".format(i) - out_file_name += extension - else: - out_file_name = root + "_{}".format(identifier) + extension - - else: - out_file_name = None - - return(out_file_name) - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Mosaic GMS Inundation Rasters') - parser.add_argument('-i','--map-file', help='List of file paths to inundation/depth maps to mosaic', required=True) - parser.add_argument('-a','--mask', help='File path to vector polygon mask to clip mosaic too', required=False,default=None) - parser.add_argument('-s','--subset', help='Subset units', required=False,default=None,type=str,nargs='+') - parser.add_argument('-n','--nodata', help='Inundation Maps', required=False,default=elev_raster_ndv) - parser.add_argument('-w','--workers', help='Number of Workers', required=False,default=4,type=int) - parser.add_argument('-t','--mosaic-attribute', help='Mosaiced inundation Maps', required=False,default=None) - parser.add_argument('-m','--mosaic-output', help='Mosaiced inundation Maps', required=False,default=None) - parser.add_argument('-r','--remove-inputs', help='Remove original input inundation Maps', required=False,default=False,action='store_true') - parser.add_argument('-v','--verbose', help='Remove original input inundation Maps', required=False,default=False,action='store_true') - - args = vars(parser.parse_args()) - - Mosaic_inundation(**args) \ No newline at end of file diff --git a/tools/inundate_events.py b/tools/inundate_events.py new file mode 100755 index 000000000..c3a42efc3 --- /dev/null +++ b/tools/inundate_events.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 + +from shapely.geometry import box +from concurrent.futures import ThreadPoolExecutor,as_completed +from tqdm import tqdm +import pandas as pd +import geopandas as gpd +import argparse +from inundation import inundate +import os + + +def inundate_events(hydrofabric_dir,forecast_file,inundation_file,inundation_polygon=None,jobs=1): + + forecast_df = pd.read_csv(forecast_file,infer_datetime_format=True,dtype={'huc':str},parse_dates=['date_time']) + + #list_of_hucs_to_run = { '09020318','09020317' } + #forecast_df = forecast_df.loc[ forecast_df.huc.isin(list_of_hucs_to_run),:] + + inputs = build_fim3_inputs(hydrofabric_dir,forecast_df,inundation_file,inundation_polygon) + + executor = ThreadPoolExecutor(max_workers=jobs) + + results = {executor.submit(inundate,**kwargs) : (kwargs['hydro_table'],kwargs['forecast']) for kwargs in inputs} + #rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=None,hucs_layerName=None, + #subset_hucs=None,num_workers=1,aggregate=False,inundation_raster=None,inundation_polygon=None, + #depths=None,out_raster_profile=None,out_vector_profile=None,quiet=False + + for future in tqdm(as_completed(results),total=len(forecast_df)): + + try: + future.result() + except Exception as exc: + print(exc,results[future]) + + executor.shutdown(wait=True) + + + +def build_fim3_inputs(hydrofabric_dir,forecast_df,inundation_file=None,inundation_polygons=None): + + + for idx,row in forecast_df.iterrows(): + + huc = row['huc'] + rem = os.path.join(hydrofabric_dir,huc,'rem_zeroed_masked.tif') + catchments_raster = os.path.join(hydrofabric_dir,huc,'gw_catchments_reaches_filtered_addedAttributes.tif') + catchment_poly = os.path.join(hydrofabric_dir,huc,'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + hydrotable = os.path.join(hydrofabric_dir,huc,'hydroTable.csv') + + # inundation raster, inundation poly add HUC,date,time to filename + inundation_file_for_row = append_meta_data_to_file_names(inundation_file,row) + inundation_poly_for_row = append_meta_data_to_file_names(inundation_polygons,row) + + kwargs = { 'rem' : rem, + 'catchments' : catchments_raster, + 'catchment_poly' : catchment_poly , + 'hydro_table': hydrotable, + 'mask_type' : 'huc', + 'forecast' : row['forecast_file'], + 'inundation_raster' : inundation_file_for_row, + 'inundation_polygon' : inundation_poly_for_row, + 'quiet' : True + } + + yield(kwargs) + + +def append_meta_data_to_file_names(file_name,row): + + if file_name is None: + return(file_name) + + base_file_path, extension = os.path.splitext(file_name) + + hucCode = row['huc'] + site_name = row['Name'] + date_time = str(row['date_time'].year) + str(row['date_time'].month).zfill(2) + str(row['date_time'].day).zfill(2) + '_' +str(row['date_time'].hour).zfill(2) + str(row['date_time'].minute).zfill(2) + 'Z' + + + appended_file_path = f"{base_file_path}_{site_name}_{hucCode}_{date_time}{extension}" + + return(appended_file_path) + + + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Find hucs for bounding boxes') + parser.add_argument('-y','--hydrofabric-dir', help='Bounding box file', required=True) + parser.add_argument('-f','--forecast-file', help='WBD file', required=True) + parser.add_argument('-i','--inundation-file', help='WBD file', required=False,default=None) + parser.add_argument('-j','--jobs', help='WBD file', required=False,default=None,type=int) + + args=vars(parser.parse_args()) + + inundate_events(**args) diff --git a/tools/gms_tools/inundate_gms.py b/tools/inundate_gms.py old mode 100644 new mode 100755 similarity index 61% rename from tools/gms_tools/inundate_gms.py rename to tools/inundate_gms.py index 0148da336..e1a90fa67 --- a/tools/gms_tools/inundate_gms.py +++ b/tools/inundate_gms.py @@ -1,26 +1,24 @@ #!/usr/bin/env python3 -import numpy as np -import pandas as pd -from inundation import inundate import os -from tqdm import tqdm import argparse +# import logging + +import pandas as pd + +from tqdm import tqdm +from inundation import inundate from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor,as_completed from inundation import hydroTableHasOnlyLakes, NoForecastFound -import traceback -import logging +from utils.shared_functions import FIM_Helpers as fh - -def Inundate_gms( - hydrofabric_dir, forecast, num_workers=1, - hucs=None, - inundation_raster=None, - inundation_polygon=None, depths_raster=None, - verbose=False, - log_file=None, - output_fileNames=None - ): +def Inundate_gms( hydrofabric_dir, forecast, num_workers = 1, + hucs = None, + inundation_raster = None, + inundation_polygon = None, depths_raster = None, + verbose = False, + log_file = None, + output_fileNames = None ): # input handling if hucs is not None: @@ -38,17 +36,17 @@ def Inundate_gms( if log_file is not None: if os.path.exists(log_file): os.remove(log_file) - - print('HUC8,BranchID,Exception',file=open(log_file,'w')) + + if verbose : + print('HUC8,BranchID,Exception',file=open(log_file,'w')) #if log_file: #logging.basicConfig(filename=log_file, level=logging.INFO) #logging.info('HUC8,BranchID,Exception') - # load gms inputs - hucs_branches = pd.read_csv( os.path.join(hydrofabric_dir,'gms_inputs.csv'), + # load fim inputs + hucs_branches = pd.read_csv( os.path.join(hydrofabric_dir,'fim_inputs.csv'), header=None, - dtype= {0:str,1:str} - ) + dtype= {0:str,1:str} ) if hucs is not None: hucs = set(hucs) @@ -59,19 +57,18 @@ def Inundate_gms( number_of_branches = len(hucs_branches) # make inundate generator - inundate_input_generator = __inundate_gms_generator( - hucs_branches,number_of_branches, - hydrofabric_dir, - inundation_raster, - inundation_polygon, - depths_raster, - forecast, - verbose=False - ) + inundate_input_generator = __inundate_gms_generator( hucs_branches, + number_of_branches, + hydrofabric_dir, + inundation_raster, + inundation_polygon, + depths_raster, + forecast, + verbose=False ) # start up process pool # better results with Process pool - executor = ProcessPoolExecutor(max_workers=num_workers) + executor = ProcessPoolExecutor(max_workers = num_workers) # collect output filenames inundation_raster_fileNames = [None] * number_of_branches @@ -80,17 +77,14 @@ def Inundate_gms( hucCodes = [None] * number_of_branches branch_ids = [None] * number_of_branches - executor_generator = { executor.submit(inundate,**inp) : ids for inp,ids in inundate_input_generator - } - + } idx = 0 for future in tqdm(as_completed(executor_generator), total=len(executor_generator), - disable=(not verbose), - desc="Inundating branches with {} workers".format(num_workers) - ): + desc="Inundating branches with {} workers".format(num_workers), + disable=(not verbose) ): hucCode, branch_id = executor_generator[future] @@ -123,7 +117,7 @@ def Inundate_gms( branch_ids[idx] = branch_id try: - #print(hucCode,branch_id,future.result()[0][0]) + #print(hucCode,branch_id,future.result()[0][0]) inundation_raster_fileNames[idx] = future.result()[0][0] except TypeError: pass @@ -144,13 +138,11 @@ def Inundate_gms( executor.shutdown(wait=True) # make filename dataframe - output_fileNames_df = pd.DataFrame( { - 'huc8' : hucCodes, + output_fileNames_df = pd.DataFrame( { 'huc8' : hucCodes, 'branchID' : branch_ids, 'inundation_rasters' : inundation_raster_fileNames, 'depths_rasters' : depths_raster_fileNames, - 'inundation_polygons' : inundation_polygon_fileNames } - ) + 'inundation_polygons' : inundation_polygon_fileNames } ) if output_fileNames is not None: output_fileNames_df.to_csv(output_fileNames,index=False) @@ -158,16 +150,13 @@ def Inundate_gms( return(output_fileNames_df) - - -def __inundate_gms_generator( - hucs_branches,number_of_branches, +def __inundate_gms_generator( hucs_branches, + number_of_branches, hydrofabric_dir, inundation_raster, inundation_polygon, depths_raster, - forecast,verbose=False - ): + forecast, verbose = False ): # iterate over branches for idx,row in hucs_branches.iterrows(): @@ -175,74 +164,67 @@ def __inundate_gms_generator( huc = str(row[0]) branch_id = str(row[1]) - gms_dir = os.path.join(hydrofabric_dir,huc,'branches') + huc_dir = os.path.join(hydrofabric_dir, huc, 'branches') - rem_branch = os.path.join( gms_dir,branch_id,'rem_zeroed_masked_{}.tif'.format(branch_id) ) - catchments_branch = os.path.join( gms_dir,branch_id, - f'gw_catchments_reaches_filtered_addedAttributes_{branch_id}.tif' ) - hydroTable_branch = os.path.join( gms_dir,branch_id,'hydroTable_{}.csv'.format(branch_id) ) - catchment_poly = os.path.join( gms_dir, branch_id, - f'gw_catchments_reaches_filtered_addedAttributes_crosswalked_{branch_id}.gpkg' ) + rem_file_name = 'rem_zeroed_masked_{}.tif'.format(branch_id) + rem_branch = os.path.join( huc_dir, branch_id, rem_file_name ) + + catchments_file_name = f'gw_catchments_reaches_filtered_addedAttributes_{branch_id}.tif' + catchments_branch = os.path.join( huc_dir, branch_id, catchments_file_name ) + + hydroTable_branch = os.path.join( huc_dir, branch_id, 'hydroTable_{}.csv'.format(branch_id) ) + + xwalked_file_name = f'gw_catchments_reaches_filtered_addedAttributes_crosswalked_{branch_id}.gpkg' + catchment_poly = os.path.join( huc_dir, branch_id, xwalked_file_name ) # branch output - inundation_branch_raster = __append_id_to_file_name(inundation_raster,[huc,branch_id]) - inundation_branch_polygon = __append_id_to_file_name(inundation_polygon,[huc,branch_id]) - depths_branch_raster = __append_id_to_file_name(depths_raster,[huc,branch_id]) + # Some other functions that call in here already added a huc, so only add it if not yet there + if (inundation_raster is not None) and (huc not in inundation_raster): + inundation_branch_raster = fh.append_id_to_file_name(inundation_raster,[huc,branch_id]) + else: + inundation_branch_raster = fh.append_id_to_file_name(inundation_raster,branch_id) + + if (inundation_polygon is not None) and (huc not in inundation_polygon): + inundation_branch_polygon = fh.append_id_to_file_name(inundation_polygon,[huc,branch_id]) + else: + inundation_branch_polygon = fh.append_id_to_file_name(inundation_polygon,branch_id) + + if (depths_raster is not None) and (huc not in depths_raster): + depths_branch_raster = fh.append_id_to_file_name(depths_raster,[huc,branch_id]) + else: + depths_branch_raster = fh.append_id_to_file_name(depths_raster,branch_id) # identifiers identifiers = (huc,branch_id) + #print(f"inundation_branch_raster is {inundation_branch_raster}") + # inundate input - inundate_input = { - 'rem' : rem_branch, 'catchments' : catchments_branch, 'catchment_poly' : catchment_poly, - 'hydro_table' : hydroTable_branch,'forecast' : forecast, - 'mask_type' : None, + inundate_input = { 'rem' : rem_branch, + 'catchments' : catchments_branch, + 'catchment_poly' : catchment_poly, + 'hydro_table' : hydroTable_branch, + 'forecast' : forecast, + 'mask_type' : 'filter', 'hucs' : None, 'hucs_layerName' : None, - 'subset_hucs' : None, 'num_workers' : 1, + 'subset_hucs' : None, + 'num_workers' : 1, 'aggregate' : False, 'inundation_raster' : inundation_branch_raster, 'inundation_polygon' : inundation_branch_polygon, 'depths' : depths_branch_raster, 'out_raster_profile' : None, 'out_vector_profile' : None, - 'quiet' : not verbose - } - - yield (inundate_input,identifiers) - - - -def __append_id_to_file_name(file_name,identifier): - - - if file_name is not None: - - root,extension = os.path.splitext(file_name) - - if isinstance(identifier,list): - for i in identifier: - out_file_name = root + "_{}".format(i) - out_file_name += extension - else: - out_file_name = root + "_{}".format(identifier) + extension - - else: - out_file_name = None - - return(out_file_name) - - -def __vprint(message,verbose): - if verbose: - print(message) - + 'quiet' : not verbose } + + yield (inundate_input, identifiers) if __name__ == '__main__': # parse arguments - parser = argparse.ArgumentParser(description='Inundate GMS') + parser = argparse.ArgumentParser(description='Inundate FIM') parser.add_argument('-y','--hydrofabric_dir', help='Directory path to FIM hydrofabric by processing unit', required=True) parser.add_argument('-u','--hucs',help='List of HUCS to run',required=False,default=None,type=str,nargs='+') parser.add_argument('-f','--forecast',help='Forecast discharges in CMS as CSV file',required=True) @@ -257,5 +239,3 @@ def __vprint(message,verbose): # extract to dictionary and run Inundate_gms( **vars(parser.parse_args()) ) - - \ No newline at end of file diff --git a/tools/inundate_nation.py b/tools/inundate_nation.py index 6fbcdf146..efcf0e503 100644 --- a/tools/inundate_nation.py +++ b/tools/inundate_nation.py @@ -1,102 +1,287 @@ import argparse +import logging +import multiprocessing import os +import rasterio +import re +import shutil +import sys -from inundation import inundate from multiprocessing import Pool +from rasterio.merge import merge +from osgeo import gdal, ogr + +from datetime import datetime +from mosaic_inundation import Mosaic_inundation +from inundate_gms import Inundate_gms +from inundation import inundate + +# setting path +sys.path.append('/foss_fim/src') +from utils.shared_variables import elev_raster_ndv, PREP_PROJECTION +from utils.shared_functions import FIM_Helpers as fh + + +#INUN_REVIEW_DIR = r'/data/inundation_review/inundation_nwm_recurr/' +#INUN_OUTPUT_DIR = r'/data/inundation_review/inundate_nation/' +#INPUTS_DIR = r'/data/inputs' +#OUTPUT_BOOL_PARENT_DIR = '/data/inundation_review/inundate_nation/bool_temp/ +#DEFAULT_OUTPUT_DIR = '/data/inundation_review/inundate_nation/mosaic_output/' + + +def inundate_nation(fim_run_dir, output_dir, magnitude_key, + flow_file, inc_mosaic, job_number): + + + assert os.path.isdir(fim_run_dir), f'ERROR: could not find the input fim_dir location: {fim_run_dir}' + + assert os.path.exists(flow_file), f'ERROR: could not find the flow file: {flow_file}' + + if job_number > available_cores: + job_number = available_cores - 1 + print("Provided job number exceeds the number of available cores. " + str(job_number) + " max jobs will be used instead.") + + fim_version = os.path.basename(os.path.normpath(fim_run_dir)) + logging.info(f'Using fim version: {fim_version}') + output_base_file_name = magnitude_key + "_" + fim_version + #print(output_base_file_name) + + __setup_logger(output_dir, output_base_file_name ) + + start_dt = datetime.now() + + logging.info(f'Input FIM Directory: {fim_run_dir}') + logging.info(f'output_dir: {output_dir}') + logging.info(f'magnitude_key: {magnitude_key}') + logging.info(f'flow_file: {flow_file}') + logging.info(f'inc_mosaic: {str(inc_mosaic)}') -INUN_REVIEW_DIR = r'/data/inundation_review/inundation_nwm_recurr/' -INPUTS_DIR = r'/data/inputs' + print("Preparing to generate inundation outputs for magnitude: " + magnitude_key) + print("Input flow file: " + flow_file) + + magnitude_output_dir = os.path.join(output_dir, output_base_file_name) + + if not os.path.exists(magnitude_output_dir): + print('Creating new output directory for raw mosaic files: ' + magnitude_output_dir) + os.mkdir(magnitude_output_dir) + else: + # we need to empty it. we will kill it and remake it (using rmtree to force it) + shutil.rmtree(magnitude_output_dir, ignore_errors=True) + os.mkdir(magnitude_output_dir) + + huc_list = [] + for huc in os.listdir(fim_run_dir): + + #if huc != 'logs' and huc != 'branch_errors'and huc != 'unit_errors' and os.path.isdir(os.path.join(fim_run_dir, huc)): + if re.match('\d{8}', huc): + huc_list.append(huc) + + print('Inundation raw mosaic outputs here: ' + magnitude_output_dir) + + run_inundation([fim_run_dir, huc_list, magnitude_key, + magnitude_output_dir, flow_file, job_number]) + + # Perform mosaic operation + if inc_mosaic: + fh.print_current_date_time() + logging.info(datetime.now().strftime("%Y_%m_%d-%H_%M_%S")) + print("Performing bool mosaic process...") + logging.info("Performing bool mosaic process...") + + output_bool_dir = os.path.join(output_dir, "bool_temp") + if not os.path.exists(output_bool_dir): + os.mkdir(output_bool_dir) + else: + # we need to empty it. we will kill it and remake it (using rmtree to force it) + shutil.rmtree(output_bool_dir, ignore_errors=True) + os.mkdir(output_bool_dir) + + procs_list = [] + for rasfile in os.listdir(magnitude_output_dir): + if rasfile.endswith('.tif') and "extent" in rasfile: + #p = magnitude_output_dir + rasfile + procs_list.append([magnitude_output_dir, rasfile, output_bool_dir]) + + # Multiprocess --> create boolean inundation rasters for all hucs + if len(procs_list) > 0: + with Pool(processes=job_number) as pool: + pool.map(create_bool_rasters, procs_list) + else: + msg = f'Did not find any valid FIM extent rasters: {magnitude_output_dir}' + print(msg) + logging.info(msg) + + # Perform VRT creation and final mosaic using boolean rasters + vrt_raster_mosaic(output_bool_dir, output_dir, output_base_file_name) + + # now cleanup the raw mosiac directories + shutil.rmtree(output_bool_dir, ignore_errors=True) + + # now cleanup the raw mosiac directories + shutil.rmtree(magnitude_output_dir, ignore_errors=True) + + fh.print_current_date_time() + logging.info(logging.info(datetime.now().strftime("%Y_%m_%d-%H_%M_%S"))) + end_time = datetime.now() + logging.info(fh.print_date_time_duration(start_dt, end_time)) def run_inundation(args): """ - This script is basically a wrapper for the inundate function and is designed for multiprocessing. + This script is a wrapper for the inundate function and is designed for multiprocessing. Args: - args (list): [fim_run_dir (str), huc (str), magnitude (str), magnitude_output_dir (str), config (str)] + args (list): [fim_run_dir (str), huc_list (list), magnitude (str), magnitude_output_dir (str), forecast (str), job_number (int)] """ - fim_run_dir = args[0] - huc = args[1] + fim_run_dir = args[0] + huc_list = args[1] magnitude = args[2] magnitude_output_dir = args[3] - config = args[4] - + forecast = args[4] + job_number = args[5] + # Define file paths for use in inundate(). - fim_run_parent = os.path.join(fim_run_dir, huc) - rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') - catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') - mask_type = 'huc' - catchment_poly = '' - hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') - catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') - inundation_raster = os.path.join(magnitude_output_dir, magnitude + '_' + config + '_inund_extent.tif') - depth_raster = os.path.join(magnitude_output_dir, magnitude + '_' + config + '_inund_depth.tif') - forecast = os.path.join(INUN_REVIEW_DIR, 'nwm_recurr_flow_data', 'recurr_' + magnitude + '_cms.csv') - hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' - - # Run inundate() once for depth and once for extent. - if not os.path.exists(depth_raster): - print("Running the NWM recurrence intervals for HUC: " + huc + ", " + magnitude + "...") - inundate( - rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=None,inundation_polygon=None, - depths=depth_raster,out_raster_profile=None,out_vector_profile=None,quiet=True - ) - - if not os.path.exists(inundation_raster): - inundate( - rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, - depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True - ) - -if __name__ == '__main__': + inundation_raster = os.path.join(magnitude_output_dir, magnitude + '_inund_extent.tif') - # Parse arguments. - parser = argparse.ArgumentParser(description='Inundation mapping for FOSS FIM using streamflow recurrence interflow data. Inundation outputs are stored in the /inundation_review/inundation_nwm_recurr/ directory.') - parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh (e.g. data/ouputs/dev_abc/12345678_dev_test)',required=True) - parser.add_argument('-o', '--output-dir',help='The path to a directory to write the outputs. If not used, the inundation_review directory is used by default -> type=str',required=False, default="") - parser.add_argument('-j', '--job-number',help='The number of jobs',required=False,default=1) - - args = vars(parser.parse_args()) + print("Running the NWM recurrence intervals for HUC inundation (extent) for magnitude: " + str(magnitude)) - fim_run_dir = args['fim_run_dir'] - output_dir = args['output_dir'] - magnitude_list = ['1_5'] + map_file = Inundate_gms( hydrofabric_dir = fim_run_dir, + forecast = forecast, + num_workers = job_number, + hucs = huc_list, + inundation_raster = inundation_raster, + inundation_polygon = None, + depths_raster = None, + verbose = True, + log_file = None, + output_fileNames = None ) - job_number = int(args['job_number']) + Mosaic_inundation( map_file, + mosaic_attribute = 'inundation_rasters', + mosaic_output = inundation_raster, + #mask = os.path.join(fim_run_dir,huc8,'wbd.gpkg'), + mask = None, + unit_attribute_name = 'huc8', + nodata = elev_raster_ndv, + workers = 1, + remove_inputs = True, + subset = None, + verbose = True, + is_mosaic_for_branches = True ) - huc_list = os.listdir(fim_run_dir) - - fim_version = os.path.split(fim_run_dir)[1] +def create_bool_rasters(args): + in_raster_dir = args[0] + rasfile = args[1] + output_bool_dir = args[2] + + print("Calculating boolean inundate raster: " + rasfile) + p = in_raster_dir + os.sep + rasfile + raster = rasterio.open(p) + profile = raster.profile + array = raster.read() + del raster + array[array>0] = 1 + array[array<=0] = 0 + # And then change the band count to 1, set the + # dtype to uint8, and specify LZW compression. + profile.update(driver="GTiff", + height=array.shape[1], + width=array.shape[2], + tiled=True, + nodata=0, + blockxsize=512, + blockysize=512, + dtype='int8', + compress='lzw') + with rasterio.open(output_bool_dir + os.sep + "bool_" + rasfile, 'w', **profile) as dst: + dst.write(array.astype(rasterio.int8)) + + +def vrt_raster_mosaic(output_bool_dir, output_dir, fim_version_tag): - if output_dir == "": - output_dir = os.path.join(INUN_REVIEW_DIR, fim_version) - - if not os.path.exists(output_dir): - os.mkdir(output_dir) + + # NOTE: Oct 2022.. we no longer need the VRT, only the large mosaic'd raster. + # this code is about to be deprecated, so we will leave it as is. - if 'ms' in fim_version: - config = 'ms' - if 'fr' in fim_version: - config = 'fr' - - procs_list = [] + rasters_to_mosaic = [] + for rasfile in os.listdir(output_bool_dir): + if rasfile.endswith('.tif') and "extent" in rasfile: + p = output_bool_dir + os.sep + rasfile + print("Processing: " + p) + rasters_to_mosaic.append(p) + + logging.info(fh.print_current_date_time()) + output_mosiac_vrt = os.path.join(output_bool_dir, fim_version_tag + "_merged.vrt") + print("Creating virtual raster: " + output_mosiac_vrt) + logging.info("Creating virtual raster: " + output_mosiac_vrt) + vrt = gdal.BuildVRT(output_mosiac_vrt, rasters_to_mosaic) + + output_mosiac_raster = os.path.join(output_dir, fim_version_tag + "_mosaic.tif") + print("Building raster mosaic: " + output_mosiac_raster) + logging.info("Building raster mosaic: " + output_mosiac_raster) + print("This can take a number of hours, watch 'docker stats' cpu value to ensure the process"\ + "to ensure the process is still working") + gdal.Translate(output_mosiac_raster, vrt, xRes = 10, yRes = -10, creationOptions = ['COMPRESS=LZW','TILED=YES','PREDICTOR=2']) + vrt = None + + +def __setup_logger(output_folder_path, log_file_name_key): + + start_time = datetime.now() + file_dt_string = start_time.strftime("%Y_%m_%d-%H_%M_%S") + log_file_name = f"{log_file_name_key}-{file_dt_string}.log" + + log_file_path = os.path.join(output_folder_path, log_file_name) + + logging.basicConfig(filename=log_file_path, + level=logging.DEBUG, + format='%(message)s') + + # yes.. this can do console logs as well, but it can be a bit unstable and ugly - for huc in huc_list: - if huc != 'logs': - for magnitude in magnitude_list: - magnitude_output_dir = os.path.join(output_dir, magnitude + '_' + config) - if not os.path.exists(magnitude_output_dir): - os.mkdir(magnitude_output_dir) - print(magnitude_output_dir) - procs_list.append([fim_run_dir, huc, magnitude, magnitude_output_dir, config]) - - # Multiprocess. - if job_number > 1: - with Pool(processes=job_number) as pool: - pool.map(run_inundation, procs_list) + logging.info(f'Started : {start_time.strftime("%m/%d/%Y %H:%M:%S")}') + logging.info("----------------") + + +if __name__ == '__main__': + + # Sample usage: python3 /foss_fim/tools/inundate_nation.py -r /outputs/fim_4_0_9_2 -m 100_0 -f /data/inundation_review/inundation_nwm_recurr/nwm_recurr_flow_data/nwm21_17C_recurr_100_0_cms.csv -s -j 10 + # outputs become /data/inundation_review/inundate_nation/100_0_fim_4_0_9_2_mosiac.tif (.log, etc) + + # Sample usage: python3 /foss_fim/tools/inundate_nation.py -r /outputs/fim_4_0_9_2 -m hw -f /data/inundation_review/inundation_nwm_recurr/nwm_recurr_flow_data/nwm_high_water_threshold_cms.csv -s -j 10 + # outputs become /data/inundation_review/inundate_nation/hw_fim_4_0_9_2_mosiac.tif (.log, etc) + + # if run on UCS2, you can map docker as -v /dev_fim_share... etc/:/data -v /local....outputs:/outputs -v foss_fim as normal. + + available_cores = multiprocessing.cpu_count() + + # Parse arguments. + parser = argparse.ArgumentParser(description='Inundation mapping for FOSS FIM using streamflow '\ + 'recurrence interflow data. Inundation outputs are stored in the '\ + '/inundation_review/inundation_nwm_recurr/ directory.') + + parser.add_argument('-r', '--fim-run-dir', help='Name of directory containing outputs '\ + 'of fim_run.sh (e.g. data/ouputs/dev_abc/12345678_dev_test)', required=True) + + parser.add_argument('-o', '--output-dir', help='Optional: The path to a directory to write the '\ + 'outputs. If not used, the inundation_nation directory is used by default '\ + 'ie) /data/inundation_review/inundate_nation/', + default='/data/inundation_review/inundate_nation/', required=False) + + parser.add_argument('-m', '--magnitude_key', help = 'used in output folders names and temp files, '\ + 'added to output_file_name_key ie 100_0, 2_0, hw, etc)', required = True) + + parser.add_argument('-f', '--flow_file', help = 'the path and flow file to be used. '\ + 'ie /data/inundation_review/inundation_nwm_recurr/nwm_recurr_flow_data/nwm_high_water_threshold_cms.csv', required = True) + + parser.add_argument('-s', '--inc_mosaic',help='Optional flag to produce mosaic of FIM extent rasters', + action='store_true') + + parser.add_argument('-j', '--job-number', help='The number of jobs', required=False, default=1, type=int) + + args = vars(parser.parse_args()) + inundate_nation(**args) \ No newline at end of file diff --git a/tools/inundation.py b/tools/inundation.py index 2b5a4599c..188f0bce0 100755 --- a/tools/inundation.py +++ b/tools/inundation.py @@ -11,12 +11,13 @@ from shapely.geometry import shape from rasterio.mask import mask from rasterio.io import DatasetReader,DatasetWriter -from rasterio.features import shapes from collections import OrderedDict import argparse from warnings import warn from gdal import BuildVRT import geopandas as gpd +import sys +import xarray as xr class hydroTableHasOnlyLakes(Exception): """ Raised when a Hydro-Table only has lakes """ @@ -27,11 +28,21 @@ class NoForecastFound(Exception): """ Raised when no forecast is available for a given Hydro-Table """ pass -def inundate( - rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=None,hucs_layerName=None, - subset_hucs=None,num_workers=1,aggregate=False,inundation_raster=None,inundation_polygon=None, - depths=None,out_raster_profile=None,out_vector_profile=None,src_table=None,quiet=False - ): +class hydroTableHasOnlyLakes(Exception): + """ Raised when a Hydro-Table only has lakes """ + pass + + +class NoForecastFound(Exception): + """ Raised when no forecast is available for a given Hydro-Table """ + pass + +def inundate(rem, catchments, catchment_poly, hydro_table, forecast, + mask_type, hucs = None, hucs_layerName = None, + subset_hucs = None, num_workers = 1, aggregate = False, + inundation_raster = None, inundation_polygon = None, + depths = None, out_raster_profile = None, out_vector_profile = None, + src_table = None, quiet = False): """ Run inundation on FIM >=3.0 outputs at job-level scale or aggregated scale @@ -92,10 +103,6 @@ def inundate( ----- - Specifying a subset of the domain in rem or catchments to inundate on is achieved by the HUCs file or the forecast file. - Examples - -------- - >>> import inundation - >>> inundation.inundate(rem,catchments,hydro_table,forecast,inundation_raster) """ # check for num_workers @@ -163,13 +170,17 @@ def inundate( else: raise TypeError("Pass hydro table csv") + if catchmentStagesDict is not None: if src_table is not None: create_src_subset_csv(hydro_table,catchmentStagesDict,src_table) # make windows generator - window_gen = __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, - depths,out_raster_profile,out_vector_profile,quiet,hucs=hucs,hucSet=hucSet) + window_gen = __make_windows_generator(rem, catchments, catchment_poly, + mask_type, catchmentStagesDict, inundation_raster, + inundation_polygon, depths, out_raster_profile, + out_vector_profile, quiet, + hucs = hucs, hucSet = hucSet) # start up thread pool executor = ThreadPoolExecutor(max_workers=num_workers) @@ -184,7 +195,6 @@ def inundate( except Exception as exc: __vprint("Exception {} for {}".format(exc,results[future]),not quiet) else: - if results[future] is not None: __vprint("... {} complete".format(results[future]),not quiet) else: @@ -197,31 +207,12 @@ def inundate( # power down pool executor.shutdown(wait=True) - # optional aggregation - if (aggregate) & (hucs is not None): - # inun grid vrt - if inundation_raster is not None: - inun_vrt = BuildVRT(splitext(inundation_raster)[0]+'.vrt',inundation_rasters) - inun_vrt = None - #_ = run('gdalbuildvrt -q -overwrite {} {}'.format(splitext(inundation_raster)[0]+'.vrt'," ".join(inundation_rasters)),shell=True) - # depths vrt - if depths is not None: - depths_vrt = BuildVRT(splitext(depths)[0]+'.vrt',depth_rasters,resampleAlg='bilinear') - depths_vrt = None - #_ = run('gdalbuildvrt -q -overwrite -r bilinear {} {}'.format(splitext(depths)[0]+'.vrt'," ".join(depth_rasters)),shell=True) - - # concat inun poly - if inundation_polygon is not None: - _ = run('ogrmerge.py -o {} {} -f GPKG -single -overwrite_ds'.format(inundation_polygon," ".join(inundation_polys)),shell=True) - - # close datasets - rem.close() - catchments.close() - - return(0) + # close datasets + rem.close() + catchments.close() + + return(inundation_rasters,depth_rasters,inundation_polys) - else: - return(1) def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profile,catchments_profile,hucCode, catchmentStagesDict,depths,inundation_raster,inundation_polygon, @@ -279,10 +270,8 @@ def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profil if out_vector_profile is None: out_vector_profile = {'crs' : crs , 'driver' : 'GPKG'} - out_vector_profile['schema'] = { - 'geometry' : 'Polygon', - 'properties' : OrderedDict([('HydroID' , 'int')]) - } + out_vector_profile['schema'] = { 'geometry' : 'Polygon', + 'properties' : OrderedDict([('HydroID' , 'int')]) } # open output inundation polygons if isinstance(inundation_polygon,str): @@ -326,7 +315,7 @@ def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profil # make generator for inundation polygons inundation_polygon_generator = shapes(inundation_array,mask=inundation_array>0,connectivity=8,transform=window_transform) - + # generate records records = [] for i,(g,h) in enumerate(inundation_polygon_generator): @@ -334,7 +323,7 @@ def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profil record['geometry'] = g record['properties'] = {'HydroID' : int(h)} records += [record] - + # write out inundation_polygon.writerecords(records) @@ -358,6 +347,13 @@ def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profil ip_name = inundation_polygon.path except AttributeError: ip_name = None + + #print(ir_name) + #yield(ir_name,d_name,ip_name) + + if isinstance(depths,DatasetWriter): depths.close() + if isinstance(inundation_raster,DatasetWriter): inundation_raster.close() + if isinstance(inundation_polygon,fiona.Collection): inundation_polygon.close() return(ir_name,d_name,ip_name) @@ -367,9 +363,11 @@ def __go_fast_mapping(rem,catchments,catchmentStagesDict,inundation,depths): for i,(r,cm) in enumerate(zip(rem,catchments)): if cm in catchmentStagesDict: - - depth = catchmentStagesDict[cm] - r - depths[i] = max(depth,0) # set negative depths to 0 + if r >= 0: + depth = catchmentStagesDict[cm] - r + depths[i] = max(depth,0) # set negative depths to 0 + else: + depths[i] = 0 if depths[i] > 0: # set positive depths to positive inundation[i] *= -1 @@ -379,9 +377,21 @@ def __go_fast_mapping(rem,catchments,catchmentStagesDict,inundation,depths): return(inundation,depths) -def __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, - depths,out_raster_profile,out_vector_profile,quiet,hucs=None,hucSet=None): - +def __make_windows_generator(rem, + catchments, + catchment_poly, + mask_type, + catchmentStagesDict, + inundation_raster, + inundation_polygon, + depths, + out_raster_profile, + out_vector_profile, + quiet, + hucs = None, + hucSet = None): + + if hucs is not None: # get attribute name for HUC column @@ -429,6 +439,8 @@ def __return_huc_in_hucSet(hucCode,hucSet): rem_array,window_transform = mask(rem,catchment_poly['geometry'],crop=True,indexes=1) catchments_array,_ = mask(catchments,catchment_poly['geometry'],crop=True,indexes=1) del catchment_poly + elif mask_type is None: + pass else: print ("invalid mask type. Options are 'huc' or 'filter'") except ValueError: # shape doesn't overlap raster @@ -436,10 +448,10 @@ def __return_huc_in_hucSet(hucCode,hucSet): hucCode = huc['properties'][hucColName] - yield (rem_array,catchments_array,rem.crs.wkt, - window_transform,rem.profile,catchments.profile,hucCode, - catchmentStagesDict,depths,inundation_raster, - inundation_polygon,out_raster_profile,out_vector_profile,quiet) + yield (rem_array, catchments_array, rem.crs.wkt, + window_transform, rem.profile, catchments.profile, hucCode, + catchmentStagesDict, depths, inundation_raster, + inundation_polygon, out_raster_profile, out_vector_profile, quiet) else: hucCode = None @@ -480,6 +492,41 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. + # raises error if hydroTable is empty due to all segments being lakes + if hydroTable.empty: + raise hydroTableHasOnlyLakes("All stream segments in HUC are within lake boundaries.") + + + if isinstance(forecast,str): + + try: + forecast = pd.read_csv( + forecast, + dtype={'feature_id' : str , 'discharge' : float} + ) + forecast.set_index('feature_id',inplace=True) + except UnicodeDecodeError: + forecast = read_nwm_forecast_file(forecast) + + elif isinstance(forecast,pd.DataFrame): + pass # consider checking for dtypes, indices, and columns + else: + raise TypeError("Pass path to forecast file csv or Pandas DataFrame") + + # susbset hucs if passed + if subset_hucs is not None: + if isinstance(subset_hucs,list): + if len(subset_hucs) == 1: + try: + subset_hucs = open(subset_hucs[0]).read().split('\n') + except FileNotFoundError: + pass + elif isinstance(subset_hucs,str): + try: + subset_hucs = open(subset_hucs).read().split('\n') + except FileNotFoundError: + subset_hucs = [subset_hucs] + if not hydroTable.empty: if isinstance(forecast,str): @@ -516,36 +563,58 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] - # join tables - try: - hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') + # join tables + try: + hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') + except AttributeError: + #print("FORECAST ERROR") + raise NoForecastFound("No forecast value found for the passed feature_ids in the Hydro-Table") + else: - # initialize dictionary - catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) + # initialize dictionary + catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) - # interpolate stages - for hid,sub_table in hydroTable.groupby(level='HydroID'): + # interpolate stages + for hid,sub_table in hydroTable.groupby(level='HydroID'): - interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) + interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) - # add this interpolated stage to catchment stages dict - h = round(interpolated_stage[0],4) + # add this interpolated stage to catchment stages dict + h = round(interpolated_stage[0],4) - hid = types.int32(hid) ; h = types.float32(h) - catchmentStagesDict[hid] = h + hid = types.int32(hid) ; h = types.float32(h) + catchmentStagesDict[hid] = h - # huc set - hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] + # huc set + hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] - return(catchmentStagesDict,hucSet) + return(catchmentStagesDict,hucSet) + + +def read_nwm_forecast_file(forecast_file,rename_headers=True): + + """ Reads NWM netcdf comp files and converts to forecast data frame """ + + flows_nc = xr.open_dataset(forecast_file,decode_cf='feature_id',engine='netcdf4') + + flows_df = flows_nc.to_dataframe() + flows_df.reset_index(inplace=True) + + flows_df = flows_df[['streamflow','feature_id']] + + if rename_headers: + flows_df = flows_df.rename(columns={"streamflow": "discharge"}) + + convert_dict = {'feature_id': str,'discharge': float} + flows_df = flows_df.astype(convert_dict) + + flows_df.set_index('feature_id',inplace=True,drop=True) + + flows_df.dropna(inplace=True) + + return(flows_df) - except AttributeError: - print (f"No matching feature IDs between forecast and hydrotable for HUC(s): {subset_hucs}") - return(None,None) - else: - print(f"All stream segments in HUC(s): {huc_error} are within lake boundaries.") - return(None,None) def __vprint(message,verbose): if verbose: @@ -566,22 +635,53 @@ def create_src_subset_csv(hydro_table,catchmentStagesDict,src_table): # parse arguments parser = argparse.ArgumentParser(description='Rapid inundation mapping for FOSS FIM. Operates in single-HUC and batch modes.') - parser.add_argument('-r','--rem', help='REM raster at job level or mosaic vrt. Must match catchments CRS.', required=True) - parser.add_argument('-c','--catchments',help='Catchments raster at job level or mosaic VRT. Must match rem CRS.',required=True) - parser.add_argument('-b','--catchment-poly',help='catchment_vector',required=True) - parser.add_argument('-t','--hydro-table',help='Hydro-table in csv file format',required=True) - parser.add_argument('-f','--forecast',help='Forecast discharges in CMS as CSV file',required=True) - parser.add_argument('-u','--hucs',help='Batch mode only: HUCs file to process at. Must match CRS of input rasters',required=False,default=None) - parser.add_argument('-l','--hucs-layerName',help='Batch mode only. Layer name in HUCs file to use',required=False,default=None) - parser.add_argument('-j','--num-workers',help='Batch mode only. Number of concurrent processes',required=False,default=1,type=int) - parser.add_argument('-s','--subset-hucs',help='Batch mode only. HUC code, series of HUC codes (no quotes required), or line delimited of HUCs to run within the hucs file that is passed',required=False,default=None,nargs='+') - parser.add_argument('-m', '--mask-type', help='Specify huc (FIM < 3) or filter (FIM >= 3) masking method', required=False,default="huc") - parser.add_argument('-a','--aggregate',help='Batch mode only. Aggregate outputs to VRT files. Currently, raises warning and sets to false if used.',required=False,action='store_true') - parser.add_argument('-i','--inundation-raster',help='Inundation Raster output. Only writes if designated. Appends HUC code in batch mode.',required=False,default=None) - parser.add_argument('-p','--inundation-polygon',help='Inundation polygon output. Only writes if designated. Appends HUC code in batch mode.',required=False,default=None) - parser.add_argument('-d','--depths',help='Depths raster output. Only writes if designated. Appends HUC code in batch mode.',required=False,default=None) - parser.add_argument('-n','--src-table',help='Output table with the SRC lookup/interpolation. Only writes if designated. Appends HUC code in batch mode.',required=False,default=None) - parser.add_argument('-q','--quiet',help='Quiet terminal output',required=False,default=False,action='store_true') + parser.add_argument('-r', '--rem', + help='REM raster at job level or mosaic vrt. Must match catchments CRS.', + required=True) + parser.add_argument('-c', '--catchments', + help='Catchments raster at job level or mosaic VRT. Must match rem CRS.', + required=True) + parser.add_argument('-b', '--catchment-poly', help='catchment_vector', + required=True) + parser.add_argument('-t', '--hydro-table', help='Hydro-table in csv file format', + required=True) + parser.add_argument('-f', '--forecast', help='Forecast discharges in CMS as CSV file', + required=True) + parser.add_argument('-u', '--hucs', + help='Batch mode only: HUCs file to process at. Must match CRS of input rasters',required=False, default=None) + parser.add_argument('-l', '--hucs-layerName', help='Batch mode only. Layer name in HUCs file to use', + required=False, default=None) + parser.add_argument('-j', '--num-workers',help='Batch mode only. Number of concurrent processes', + required=False, default=1, type=int) + parser.add_argument('-s', '--subset-hucs', help = """Batch mode only. HUC code, + series of HUC codes (no quotes required), or line delimited of HUCs to run within + the hucs file that is passed""", + required=False, default=None, nargs='+') + parser.add_argument('-m', '--mask-type', + help='Specify huc (FIM < 3) or filter (FIM >= 3) masking method', + required=False, default="huc") + parser.add_argument('-a', '--aggregate', + help="""Batch mode only. Aggregate outputs to VRT files. + Currently, raises warning and sets to false if used.""", + required=False, action='store_true') + parser.add_argument('-i', '--inundation-raster', + help="""Inundation Raster output. Only writes if designated. + Appends HUC code in batch mode.""", + required=False, default=None) + parser.add_argument('-p', '--inundation-polygon', + help="""Inundation polygon output. Only writes if designated. + Appends HUC code in batch mode.""", + required=False, default=None) + parser.add_argument('-d', '--depths', + help="""Depths raster output. Only writes if designated. + Appends HUC code in batch mode.""", + required=False, default=None) + parser.add_argument('-n', '--src-table', + help="""Output table with the SRC lookup/interpolation. + Only writes if designated. Appends HUC code in batch mode.""", + required=False, default=None) + parser.add_argument('-q','--quiet', help='Quiet terminal output', + required=False, default=False, action='store_true') # extract to dictionary args = vars(parser.parse_args()) diff --git a/tools/make_boxes_from_bounds.py b/tools/make_boxes_from_bounds.py new file mode 100755 index 000000000..69b9b5a8b --- /dev/null +++ b/tools/make_boxes_from_bounds.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + + +from shapely.geometry import box +import pandas as pd +import geopandas as gpd +import argparse +from datetime import datetime +from foss_fim.src.utils.shared_functions import getDriver + + +def find_hucs_of_bounding_boxes(bounding_boxes_file, wbd=None, projection_of_boxes='EPSG:4329', + wbd_layer='WBDHU8', huc_output_file=None, + forecast_output_file=None, bounding_boxes_outfile=None): + + + # load bounding box file + bounding_boxes = pd.read_csv(bounding_boxes_file, + dtype={'minx':float,'miny':float,'maxx':float,'maxy':float}, + comment='#') + + + make_box_geom = lambda df : box(df['minx'],df['miny'],df['maxx'],df['maxy']) + + bounding_boxes['geometry'] = bounding_boxes.apply(make_box_geom,axis=1) + + bounding_boxes = gpd.GeoDataFrame(bounding_boxes,crs=projection_of_boxes) + + wbd_proj = gpd.read_file(wbd,layer=wbd_layer,rows=1).crs + + bounding_boxes = bounding_boxes.to_crs(wbd_proj) + + if bounding_boxes_outfile is not None: + bounding_boxes.to_file(bounding_boxes_outfile,driver=getDriver(bounding_boxes_outfile),index=False) + + wbdcol_name = 'HUC'+wbd_layer[-1] + + get_intersections = lambda bbdf : gpd.read_file(wbd,layer=wbd_layer,mask=bbdf.geometry)[wbdcol_name] + + hucs = bounding_boxes.apply(get_intersections,axis=1) + + bounding_boxes.drop(columns=['geometry','minx','miny','maxx','maxy'],inplace=True) + + hucs_columns = hucs.columns + bb_columns = bounding_boxes.columns + bounding_boxes = hucs.join(bounding_boxes) + bounding_boxes = pd.melt(bounding_boxes,id_vars=bb_columns,value_vars=hucs_columns,value_name='HUC8') + bounding_boxes.drop(columns=['variable'],inplace=True) + bounding_boxes.dropna(inplace=True) + bounding_boxes.reset_index(drop=True,inplace=True) + + hucs_series = pd.Series(hucs.stack().reset_index(drop=True).unique()) + + if huc_output_file is not None: + hucs_series.to_csv(huc_output_file,sep='\n',index=False,header=False) + + if forecast_output_file is not None: + bounding_boxes.to_csv(forecast_output_file,index=False,date_format='%Y-%m-%d %H:%M:%S%Z') + + return(hucs_series,bounding_boxes) + + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Find hucs for bounding boxes') + parser.add_argument('-b','--bounding-boxes-file', help='Bounding box file', required=True) + parser.add_argument('-w','--wbd', help='WBD file', required=True) + parser.add_argument('-p','--projection-of-boxes', help='Projection', required=False,default='EPSG:4329') + parser.add_argument('-o','--huc-output-file', help='Output file of HUCS', required=False,default=None) + parser.add_argument('-f','--forecast-output-file', help='Forecast file', required=False,default=None) + parser.add_argument('-u','--bounding-boxes-outfile', help='Bounding boxes outfile', required=False,default=None) + + args=vars(parser.parse_args()) + + find_hucs_of_bounding_boxes(**args) diff --git a/tools/mosaic_inundation.py b/tools/mosaic_inundation.py new file mode 100755 index 000000000..215e6511c --- /dev/null +++ b/tools/mosaic_inundation.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +# coding: utf-8 + +import argparse +import os +import pandas as pd +import sys + +from glob import glob +from overlapping_inundation import OverlapWindowMerge +from tqdm import tqdm +from utils.shared_variables import elev_raster_ndv +from utils.shared_functions import FIM_Helpers as fh + +def Mosaic_inundation( map_file, + mosaic_attribute = 'inundation_rasters', + mosaic_output = None, + mask = None, + unit_attribute_name = 'huc8', + nodata = elev_raster_ndv, + workers = 1, + remove_inputs = False, + subset = None, + verbose = True, + is_mosaic_for_branches = False ): + + # Notes: + # - If is_mosaic_for_branches is true, the mosaic output name + # will add the HUC into the output name for overwrite resons. + + # check input + if mosaic_attribute not in ('inundation_rasters','depths_rasters'): + raise ValueError('Pass inundation or depths for mosaic_attribute argument') + + # load file + if isinstance(map_file,pd.DataFrame): + inundation_maps_df = map_file + del map_file + elif isinstance(map_file,str): + inundation_maps_df = pd.read_csv(map_file, + dtype={unit_attribute_name:str,'branchID':str}) + else: + raise TypeError('Pass Pandas Dataframe or file path string to csv for map_file argument') + + # remove NaNs + inundation_maps_df.dropna(axis=0, how='all', inplace=True) + + # subset + if subset is not None: + subset_mask = inundation_maps_df.loc[:,unit_attribute_name].isin(subset) + inundation_maps_df = inundation_maps_df.loc[subset_mask,:] + + # unique aggregation units + aggregation_units = inundation_maps_df.loc[:,unit_attribute_name].unique() + + inundation_maps_df.set_index(unit_attribute_name, drop=True, inplace=True) + + # decide upon wheter to display + if verbose & len(aggregation_units) == 1: + tqdm_disable = False + elif verbose: + tqdm_disable = False + else: + tqdm_disable = True + + ag_mosaic_output = "" + + for ag in tqdm(aggregation_units, disable = tqdm_disable, desc = 'Mosaicing FIMs'): + + try: + inundation_maps_list = inundation_maps_df.loc[ag,mosaic_attribute].tolist() + except AttributeError: + inundation_maps_list = [ inundation_maps_df.loc[ag,mosaic_attribute] ] + + # Some processes may have already added the ag value (if it is a huc) to + # the file name, so don't re-add it. + # Only add the huc into the name if branches are being processed, as + # sometimes the mosiac is not for gms branches but maybe mosaic of an + # fr set with a gms composite map. + + ag_mosaic_output = mosaic_output + if (is_mosaic_for_branches) and (ag not in mosaic_output): + ag_mosaic_output = fh.append_id_to_file_name(mosaic_output, ag) # change it + + mosaic_by_unit(inundation_maps_list, + ag_mosaic_output, + nodata, + workers = workers, + remove_inputs = remove_inputs, + mask = mask, + verbose = verbose) + + + # inundation maps + inundation_maps_df.reset_index(drop=True) + + # Return file name and path of the final mosaic output file. + # Might be empty. + return ag_mosaic_output + + +# Note: This uses threading and not processes. If the number of workers is more than +# the number of possible threads, no results will be returned. But it is usually +# pretty fast anyways. This needs to be fixed. +def mosaic_by_unit(inundation_maps_list, + mosaic_output, + nodata = elev_raster_ndv, + workers = 1, + remove_inputs = False, + mask = None, + verbose = False): + + # overlap object instance + overlap = OverlapWindowMerge( inundation_maps_list, (30, 30) ) + + if mosaic_output is not None: + if workers > 1: + threaded = True + else: + threaded= False + + overlap.merge_rasters(mosaic_output, threaded=threaded, workers=workers, nodata=nodata) + + if mask: + fh.vprint("Masking ...", verbose) + overlap.mask_mosaic(mosaic_output, mask, outfile=mosaic_output) + + if remove_inputs: + fh.vprint("Removing inputs ...", verbose) + + for inun_map in inundation_maps_list: + if inun_map is not None: + if os.path.isfile(inun_map): + os.remove(inun_map) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Mosaic GMS Inundation Rasters') + parser.add_argument('-i','--map-file', + help='List of file paths to inundation/depth maps to mosaic', + required=True) + parser.add_argument('-a','--mask', + help='File path to vector polygon mask to clip mosaic too', + required=False, default=None) + parser.add_argument('-s','--subset', help='Subset units', + required=False, default=None, type=str, nargs='+') + parser.add_argument('-n','--nodata', help='Inundation Maps', + required=False, default=elev_raster_ndv) + parser.add_argument('-w','--workers', help='Number of Workers', + required=False, default=4, type=int) + parser.add_argument('-t','--mosaic-attribute', help='Mosaiced inundation Maps', + required=False, default=None) + parser.add_argument('-m','--mosaic-output', help='Mosaiced inundation Maps file name', + required=False, default=None) + parser.add_argument('-r','--remove-inputs', help='Remove original input inundation Maps', + required=False, default=False, action='store_true') + parser.add_argument('-v','--verbose', help='Remove original input inundation Maps', + required=False, default=False, action='store_true') + parser.add_argument('-g','--is-mosaic-for-branches', + help='If the mosaic is for branchs, include this arg', + required=False, default=False, action='store_true') + + args = vars(parser.parse_args()) + + Mosaic_inundation(**args) diff --git a/tools/nesdis_preprocessing.py b/tools/nesdis_preprocessing.py new file mode 100755 index 000000000..45be46247 --- /dev/null +++ b/tools/nesdis_preprocessing.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 + +from shapely.geometry import box +import pandas as pd +import geopandas as gpd +import argparse +from datetime import datetime +from make_boxes_from_bounds import find_hucs_of_bounding_boxes +import requests +from concurrent.futures import ThreadPoolExecutor,as_completed +import os +from tqdm import tqdm + +################################## +## +## Likely Deprecated: File appears to be no longer used. Noticed Jan 16, 2023 +## Might want to be kept for possible re-use at a later time? +## +################################## + + +def nesdis_preprocessing(bounding_boxes_file,wbd=None,projection_of_boxes='EPSG:4329',wbd_layer='WBDHU8',forecast_output_file=None,retrieve=True,workers=6,download_directory=None): + + _, bounding_boxes = find_hucs_of_bounding_boxes(bounding_boxes_file,wbd=wbd,projection_of_boxes=projection_of_boxes,wbd_layer=wbd_layer) + + # load bounding box file + bounding_boxes['event_start'] = pd.to_datetime(bounding_boxes['event_start'],utc=True) + bounding_boxes['event_end'] = pd.to_datetime(bounding_boxes['event_end'],utc=True) + bounding_boxes['additional_date'] = pd.to_datetime(bounding_boxes['additional_date'],utc=True) + + bounding_boxes.reset_index(drop=True,inplace=True) + + wbdcol_name = 'HUC'+wbd_layer[-1] + + # expand dates + datetime_indices = bounding_boxes.apply(lambda df:pd.date_range(df['event_start'],df['event_end'],closed=None,freq='D',tz='UTC'),axis=1) + + datetime_indices.name = 'date_time' + datetime_indices=pd.DataFrame(datetime_indices) + datetime_indices = datetime_indices.join(bounding_boxes[['Name',wbdcol_name]]) + + # append columns to expanded dates + forecast_df = pd.DataFrame() + for idx,row in datetime_indices.iterrows(): + dt_df = row['date_time'].to_frame(index=False,name='date_time') + + row = row.drop('date_time') + + dt_df = dt_df.join(pd.concat([pd.DataFrame(row).T]*len(dt_df),ignore_index=True)) + + forecast_df = pd.concat((forecast_df,dt_df),ignore_index=True) + + # add extra dry date + additional_date_df = forecast_df[['Name',wbdcol_name]].merge(bounding_boxes[['additional_date',wbdcol_name]],left_on=wbdcol_name,right_on=wbdcol_name).drop_duplicates(ignore_index=True) + + forecast_df = pd.concat((forecast_df,additional_date_df.rename(columns={'additional_date':'date_time'})),ignore_index=True) + + forecast_df = forecast_df.sort_values(['Name',wbdcol_name],ignore_index=True) + + + forecast_df['date_time'] = forecast_df.apply(lambda df : df['date_time'].replace(hour=18,minute=0),axis=1) + + forecast_df = forecast_df.rename(columns={wbdcol_name:'huc'}) + + forecast_df = construct_nwm_forecast_filenames_and_retrieve(forecast_df,download_directory,retrieve=retrieve,workers=workers) + + if forecast_output_file is not None: + forecast_df.to_csv(forecast_output_file,index=False,date_format='%Y-%m-%d %H:%M:%S%Z') + + + + +def construct_nwm_forecast_filenames_and_retrieve(forecast_df,download_directory,retrieve=True,workers=1): + # make forecast file names for NWM and retrieve + + #construct url + #url = f'{year}/{year}{month}{day}{time}.CHRTOUT_DOMAIN1.comp' + + make_url = lambda df: "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/model_output/" + \ + str(df['date_time'].year) + '/' + str(df['date_time'].year) + \ + str(df['date_time'].month).zfill(2) + \ + str(df['date_time'].day).zfill(2) + \ + str(df['date_time'].hour).zfill(2) + \ + str(df['date_time'].minute).zfill(2) +\ + ".CHRTOUT_DOMAIN1.comp" + + def make_file_names(df,download_directory): + # assumes that the last segment after the / represents the file name + url = df['forecast_url'] + file_name_start_pos = url.rfind("/") + 1 + file_name = url[file_name_start_pos:] + + file_name = os.path.join(download_directory,file_name) + + return(file_name) + + + urls = forecast_df.apply(make_url,axis=1) + forecast_df['forecast_url'] = urls + + file_names = forecast_df.apply(lambda df: make_file_names(df,download_directory),axis=1) + forecast_df['forecast_file'] = file_names + + if not retrieve: + return(forecast_df) + + download_df = forecast_df[['forecast_url','forecast_file']].drop_duplicates() + + + def download_url(url,file_name): + r = requests.get(url, stream=True) + if r.status_code == requests.codes.ok: + with open(file_name, 'wb') as f: + for data in r: + f.write(data) + return url + + pool = ThreadPoolExecutor(max_workers=workers) + + results = { pool.submit(download_url,*(url,file_name)) : (url,file_name) for idx,(url,file_name) in download_df.iterrows() } + + for future in tqdm(as_completed(results),total=len(download_df)): + + url,file_name = results[future] + + try: + future.result() + except Exception as exc: + print('error',exc,url) + + pool.shutdown(wait=True) + + return(forecast_df) + + +if __name__ == '__main__': + + ################################## + ## + ## Likely Deprecated: File appears to be no longer used. Noticed Jan 16, 2023 + ## Might want to be kept for possible re-use at a later time? + ## + ################################## + + + # parse arguments + parser = argparse.ArgumentParser(description='Find hucs for bounding boxes') + parser.add_argument('-b','--bounding-boxes-file', help='Bounding box file', required=True) + parser.add_argument('-w','--wbd', help='WBD file', required=True) + parser.add_argument('-f','--forecast-output-file', help='Forecast file', required=False,default=None) + parser.add_argument('-r','--retrieve', help='Forecast file', required=False,default=False,action='store_true') + parser.add_argument('-j','--workers', help='Forecast file', required=False,default=1,type=int) + parser.add_argument('-d','--download_directory', help='Forecast file', required=False,default=1) + + args=vars(parser.parse_args()) + + nesdis_preprocessing(**args) diff --git a/tools/gms_tools/overlapping_inundation.py b/tools/overlapping_inundation.py similarity index 99% rename from tools/gms_tools/overlapping_inundation.py rename to tools/overlapping_inundation.py index 5dab3da96..a99abdeec 100644 --- a/tools/gms_tools/overlapping_inundation.py +++ b/tools/overlapping_inundation.py @@ -442,10 +442,12 @@ def merge_data(rst_data, data] del data + with warnings.catch_warnings(): # This `with` block supresses the RuntimeWarning thrown by numpy when aggregating nan values warnings.simplefilter("ignore", category=RuntimeWarning) window_data[row_slice, col_slice] = agg_function(merge) + window_data[np.isnan(window_data)] = nodata del merge @@ -502,4 +504,4 @@ def merge_data(rst_data, (1, 1)) overlap.merge_rasters(project_path + '/merged_final5.tif', threaded=False, workers=4) - print('end', time.localtime()) \ No newline at end of file + print('end', time.localtime()) diff --git a/tools/pixel_counter.py b/tools/pixel_counter.py new file mode 100644 index 000000000..b1d3afabb --- /dev/null +++ b/tools/pixel_counter.py @@ -0,0 +1,298 @@ +'''Created on 02/21/2022. +Written by: +Anuska Narayanan (The University of Alabama Department of Geography, anarayanan1@crimson.ua.edu; +Sophie Williams (The University of Alabama Department of Geography, scwilliams8@crimson.ua.edu; and +Brad Bates (NOAA, Lynker, and the National Water Center, bradford.bates@noaa.gov) + +Derived from a Python version of a zonal statistics function written by Matthew Perry (@perrygeo). + +Description: This script isolates the number of pixels per class of a raster within the outlines of +one or more polygons and displays them in a table. It accomplishes this by rasterizing the vector file, +masking out the desired areas of both rasters, and then summarizing them in a dataframe. It makes use +of the gdal, numpy, and pandas function libraries. +Inputs: one raster file with at least one set of attributes; one vector file containing one or more polygon +boundaries +Output: a dataframe table with rows displayed by each polygon within the vector file, and columns +displaying the pixel count of each raster attribute class in the polygon +''' + +# Import raster and vector function libraries +#from types import NoneType +from osgeo import gdal, ogr +from osgeo.gdalconst import * +# Import numerical data library +import numpy as np +# Import file management library +import sys +import os +# Import data analysis library +import pandas as pd +import argparse +from pandas import DataFrame +import copy +import pathlib +import tempfile + +from pixel_counter_functions import (get_nlcd_counts, get_levee_counts, get_bridge_counts, get_nlcd_counts_inside_flood,get_mask_value_counts) + + +# Set up error handler +gdal.PushErrorHandler('CPLQuietErrorHandler') + +# Function to pologonize flood extent +def make_flood_extent_polygon(flood_extent): + flood_extent_dataset = gdal.Open(flood_extent) + cols = flood_extent_dataset.RasterXSize + rows = flood_extent_dataset.RasterYSize + + # Get some metadata to filter out NaN values + flood_extent_raster = flood_extent_dataset.GetRasterBand(1) + noDataVal = flood_extent_raster.GetNoDataValue() # no data value + scaleFactor = flood_extent_raster.GetScale() # scale factor + + # Assign flood_extent No Data Values to NaN + flood_extent_array = flood_extent_dataset.GetRasterBand(1).ReadAsArray(0, 0, cols, rows).astype(np.float) + flood_extent_array[flood_extent_array == int(noDataVal)] = np.nan + flood_extent_array = flood_extent_array / scaleFactor + + # Assign flood_extent Negative Values to NaN + flood_extent_nonzero_array = copy.copy(flood_extent_array) + flood_extent_nonzero_array[flood_extent_array < 0] = np.nan + + # make temporary output file + mem_drv = gdal.GetDriverByName('MEM') + target = mem_drv.Create('temp_tif', cols, rows, 1, gdal.GDT_Float32) + target.GetRasterBand(1).WriteArray(flood_extent_nonzero_array) + + # Add GeoTranform and Projection + geotrans = flood_extent_dataset.GetGeoTransform() + proj = flood_extent_dataset.GetProjection() + target.SetGeoTransform(geotrans) + target.SetProjection(proj) + target.FlushCache() + + # set up inputs for converting flood extent raster to polygon + band = target.GetRasterBand(1) + band.ReadAsArray() + + outshape_location = tempfile.gettempdir() + outshape_location_path = os.path.abspath(outshape_location) + outShapefile = outshape_location_path + "/" + "polygonized.shp" + driver = ogr.GetDriverByName("ESRI Shapefile") + outDatasource = driver.CreateDataSource(outShapefile) + outLayer = outDatasource.CreateLayer("buffalo", srs=None) + + # Add the DN field + newField = ogr.FieldDefn('HydroID', ogr.OFTInteger) + outLayer.CreateField(newField) + + # Polygonize + gdal.Polygonize(band, None, outLayer, 0, [], callback=None) + outDatasource.Destroy() + sourceRaster = None + + fullpath = os.path.abspath(outShapefile) + print(fullpath) + print(type(fullpath)) + + return fullpath + + +# Function that transforms vector dataset to raster +def bbox_to_pixel_offsets(gt, bbox): + originX = gt[0] + originY = gt[3] + pixel_width = gt[1] + pixel_height = gt[5] + x1 = int((bbox[0] - originX) / pixel_width) + x2 = int((bbox[1] - originX) / pixel_width) + 1 + + y1 = int((bbox[3] - originY) / pixel_height) + y2 = int((bbox[2] - originY) / pixel_height) + 1 + + xsize = x2 - x1 + ysize = y2 - y1 + return (x1, y1, xsize, ysize) + + +# Main function that determines zonal statistics of raster classes in a polygon area +def zonal_stats(vector_path, raster_path_dict, nodata_value=None, global_src_extent=False): + + stats = [] + + # Loop through different raster paths in the raster_path_dict and + # perform zonal statistics on the files. + for layer in raster_path_dict: + raster_path = raster_path_dict[layer] + if raster_path == "": # Only process if a raster path is provided + continue + if layer == 'flood_extent' and raster_path_dict["nlcd"]!= "": + vector_path = make_flood_extent_polygon(flood_extent) + raster_path = raster_path_dict["nlcd"] + + + # Opens raster file and sets path + rds = gdal.Open(raster_path) + + assert rds + rb = rds.GetRasterBand(1) + rgt = rds.GetGeoTransform() + + if nodata_value: + nodata_value = float(nodata_value) + rb.SetNoDataValue(nodata_value) + if vector_path == "": + print('No vector path provided. Continuing to next layer.') + continue + # Opens vector file and sets path + + try: + vds = ogr.Open(vector_path) + vlyr = vds.GetLayer(0) + except: + continue + + # Creates an in-memory numpy array of the source raster data covering the whole extent of the vector layer + if global_src_extent: + # use global source extent + # useful only when disk IO or raster scanning inefficiencies are your limiting factor + # advantage: reads raster data in one pass + # disadvantage: large vector extents may have big memory requirements + src_offset = bbox_to_pixel_offsets(rgt, vlyr.GetExtent()) + src_array = rb.ReadAsArray(*src_offset) + + # calculate new geotransform of the layer subset + new_gt = ( + (rgt[0] + (src_offset[0] * rgt[1])), + rgt[1], + 0.0, + (rgt[3] + (src_offset[1] * rgt[5])), + 0.0, + rgt[5] + ) + + mem_drv = ogr.GetDriverByName('Memory') + driver = gdal.GetDriverByName('MEM') + + # Loop through vectors, as many as exist in file + # Creates new list to contain their stats + + feat = vlyr.GetNextFeature() + while feat is not None: + + if not global_src_extent: + # use local source extent + # fastest option when you have fast disks and well indexed raster (ie tiled Geotiff) + # advantage: each feature uses the smallest raster chunk + # disadvantage: lots of reads on the source raster + src_offset = bbox_to_pixel_offsets(rgt, feat.geometry().GetEnvelope()) + src_array = rb.ReadAsArray(*src_offset) + + # calculate new geotransform of the feature subset + new_gt = ( + (rgt[0] + (src_offset[0] * rgt[1])), + rgt[1], + 0.0, + (rgt[3] + (src_offset[1] * rgt[5])), + 0.0, + rgt[5] + ) + + # Create a temporary vector layer in memory + mem_ds = mem_drv.CreateDataSource('out') + mem_layer = mem_ds.CreateLayer('poly', None, ogr.wkbPolygon) + mem_layer.CreateFeature(feat.Clone()) + + # Rasterize temporary vector layer + rvds = driver.Create('', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) + rvds.SetGeoTransform(new_gt) + gdal.RasterizeLayer(rvds, [1], mem_layer, burn_values=[1]) + rv_array = rvds.ReadAsArray() + + # Mask the source data array with our current feature and get statistics (pixel count) of masked areas + # we take the logical_not to flip 0<->1 to get the correct mask effect + # we also mask out nodata values explictly + if src_array is None: + feat = vlyr.GetNextFeature() + continue + masked = np.ma.MaskedArray( + src_array, + mask=np.logical_or( + src_array == nodata_value, + np.logical_not(rv_array) + ) + ) + + # Call different counter functions depending on the raster's source. + if layer == "nlcd": + feature_stats = get_nlcd_counts(feat, masked) + if layer == "agreement_raster": + feature_stats = get_mask_value_counts(feat, masked) + if layer == "levees": + feature_stats = get_levee_counts(feat, masked) + if layer == "bridges": + feature_stats = get_bridge_counts(feat, masked) + if layer == "flood_extent": + feature_stats = get_nlcd_counts_inside_flood(feat, masked) + + stats.append(feature_stats) + + rvds = None + mem_ds = None + feat = vlyr.GetNextFeature() + + vds = None + rds = None + if stats != []: + return stats + else: + return [] + + +# Creates and prints dataframe containing desired statistics +if __name__ == "__main__": + # opts = {'VECTOR': sys.argv[1:], 'RASTER': sys.argv[2:]} + # stats = zonal_stats(opts['VECTOR'], opts['RASTER']) + + parser = argparse.ArgumentParser(description='Computes pixel counts for raster classes within a vector area.') + parser.add_argument('-v', '--vector', + help='Path to vector file.', + required=False, + default="") + parser.add_argument('-n', '--nlcd', + help='Path to National Land Cover Database raster file.', + required=False, + default="") + parser.add_argument('-l', '--levees', + help='Path to levees raster file.', + required=False, + default="") + parser.add_argument('-b', '--bridges', + help='Path to bridges file.', + required=False, + default="") + parser.add_argument('-f', '--flood_extent', + help='Path to flood extent file.', + required=False, + default="") + parser.add_argument('-c', '--csv', + help='Path to export csv file.', + required=True) + # Assign variables from arguments. + args = vars(parser.parse_args()) + vector = args['vector'] + nlcd = args['nlcd'] + levees = args['levees'] + bridges = args['bridges'] + flood_extent = args['flood_extent'] + + csv = args['csv'] + + raster_path_dict = {'nlcd': nlcd, 'levees': levees, 'bridges': bridges, 'flood_extent': flood_extent} + stats = zonal_stats(vector, raster_path_dict) + + # Export CSV + df = pd.DataFrame(stats) + result = df[(df >= 0).all(axis=1)] + df2 = pd.DataFrame(result) + df2.to_csv(csv, index=False) diff --git a/tools/pixel_counter_functions.py b/tools/pixel_counter_functions.py new file mode 100644 index 000000000..63add9861 --- /dev/null +++ b/tools/pixel_counter_functions.py @@ -0,0 +1,110 @@ +import numpy as np +def get_nlcd_counts_inside_flood(feat,masked): + # Gets NLCD Counds inside flood extent + feature_stats = { + 'f_HydroID': feat.GetField('HydroID'), + 'f_TotalPixels': int(masked.count()), + 'f_lulc_11': np.count_nonzero((masked == [11])), + 'f_lulc_12': np.count_nonzero((masked == [12])), + 'f_lulc_21': np.count_nonzero((masked == [21])), + 'f_lulc_22': np.count_nonzero((masked == [22])), + 'f_lulc_23': np.count_nonzero((masked == [23])), + 'f_lulc_24': np.count_nonzero((masked == [24])), + 'f_lulc_31': np.count_nonzero((masked == [31])), + 'f_lulc_41': np.count_nonzero((masked == [41])), + 'f_lulc_42': np.count_nonzero((masked == [42])), + 'f_lulc_43': np.count_nonzero((masked == [43])), + 'f_lulc_51': np.count_nonzero((masked == [51])), + 'f_lulc_52': np.count_nonzero((masked == [52])), + 'f_lulc_71': np.count_nonzero((masked == [71])), + 'f_lulc_72': np.count_nonzero((masked == [72])), + 'f_lulc_73': np.count_nonzero((masked == [73])), + 'f_lulc_74': np.count_nonzero((masked == [74])), + 'f_lulc_81': np.count_nonzero((masked == [81])), + 'f_lulc_82': np.count_nonzero((masked == [82])), + 'f_lulc_90': np.count_nonzero((masked == [90])), + 'f_lulc_95': np.count_nonzero((masked == [95])), + 'f_lulc_1': np.count_nonzero((masked == [11])) + np.count_nonzero((masked == [12])), + 'f_lulc_2': np.count_nonzero((masked == [21])) + np.count_nonzero((masked == [22])) + + np.count_nonzero((masked == [23])) + np.count_nonzero((masked == [24])), + 'f_lulc_3': np.count_nonzero((masked == [31])), + 'f_lulc_4': np.count_nonzero((masked == [41])) + np.count_nonzero((masked == [42])) + + np.count_nonzero((masked == [43])), + 'f_lulc_5': np.count_nonzero((masked == [51])) + np.count_nonzero((masked == [52])), + 'f_lulc_7': np.count_nonzero((masked == [71])) + np.count_nonzero((masked == [72])) + + np.count_nonzero((masked == [73])) + np.count_nonzero((masked == [74])), + 'f_lulc_8': np.count_nonzero((masked == [81])) + np.count_nonzero((masked == [82])), + 'f_lulc_9': np.count_nonzero((masked == [90])) + np.count_nonzero((masked == [95])) + + } + return feature_stats + +def get_nlcd_counts(feat, masked): + # Acquires information for table on each raster attribute per poly feature + feature_stats = { + 'FID': int(feat.GetFID()), + 'HydroID': feat.GetField('HydroID'), + 'TotalPixels': int(masked.count()), + 'lulc_11': np.count_nonzero((masked == [11])), + 'lulc_12': np.count_nonzero((masked == [12])), + 'lulc_21': np.count_nonzero((masked == [21])), + 'lulc_22': np.count_nonzero((masked == [22])), + 'lulc_23': np.count_nonzero((masked == [23])), + 'lulc_24': np.count_nonzero((masked == [24])), + 'lulc_31': np.count_nonzero((masked == [31])), + 'lulc_41': np.count_nonzero((masked == [41])), + 'lulc_42': np.count_nonzero((masked == [42])), + 'lulc_43': np.count_nonzero((masked == [43])), + 'lulc_51': np.count_nonzero((masked == [51])), + 'lulc_52': np.count_nonzero((masked == [52])), + 'lulc_71': np.count_nonzero((masked == [71])), + 'lulc_72': np.count_nonzero((masked == [72])), + 'lulc_73': np.count_nonzero((masked == [73])), + 'lulc_74': np.count_nonzero((masked == [74])), + 'lulc_81': np.count_nonzero((masked == [81])), + 'lulc_82': np.count_nonzero((masked == [82])), + 'lulc_90': np.count_nonzero((masked == [90])), + 'lulc_95': np.count_nonzero((masked == [95])), + 'lulc_1': np.count_nonzero((masked == [11])) + np.count_nonzero((masked == [12])), + 'lulc_2': np.count_nonzero((masked == [21])) + np.count_nonzero((masked == [22])) + + np.count_nonzero((masked == [23])) + np.count_nonzero((masked == [24])), + 'lulc_3': np.count_nonzero((masked == [31])), + 'lulc_4': np.count_nonzero((masked == [41])) + np.count_nonzero((masked == [42])) + + np.count_nonzero((masked == [43])), + 'lulc_5': np.count_nonzero((masked == [51])) + np.count_nonzero((masked == [52])), + 'lulc_7': np.count_nonzero((masked == [71])) + np.count_nonzero((masked == [72])) + + np.count_nonzero((masked == [73])) + np.count_nonzero((masked == [74])), + 'lulc_8': np.count_nonzero((masked == [81])) + np.count_nonzero((masked == [82])), + 'lulc_9': np.count_nonzero((masked == [90])) + np.count_nonzero((masked == [95])) + + } + return feature_stats + + +def get_levee_counts(feat, masked): + + # Acquires information for table on each levee attribute per catchment + feature_stats = { + 'HydroID': feat.GetField('HydroID'), + 'TotalLeveePixels': int(masked.count()), + } + return feature_stats + +def get_bridge_counts(feat_masked): + pass + + +def get_mask_value_counts(feat, masked): + # Acquires information for table on each raster attribute per poly feature + feature_stats = { + 'FID': int(feat.GetFID()), + 'HydroID': feat.GetField('HydroID'), + 'TotalPixels': int(masked.count()), + 'tn': np.count_nonzero((masked == [0])), + 'fn': np.count_nonzero((masked == [1])), + 'fp': np.count_nonzero((masked == [2])), + 'tp': np.count_nonzero((masked == [3])), + 'mp': np.count_nonzero((masked == [4])) + } + + return feature_stats diff --git a/tools/pixel_counter_wrapper.py b/tools/pixel_counter_wrapper.py new file mode 100644 index 000000000..575486646 --- /dev/null +++ b/tools/pixel_counter_wrapper.py @@ -0,0 +1,102 @@ +import argparse +import pandas as pd +import os +from multiprocessing import Pool +import traceback + +from pixel_counter import zonal_stats + + +def queue_zonal_stats(fim_run_dir, raster_path_dict, output_dir, job_number): + """ + This function sets up multiprocessing of the process_zonal_stats() function. + + """ + + # Make output directory if it doesn't exist + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + fim_version = os.path.split(fim_run_dir)[1] # Parse FIM Version + + fim_run_dir_list = os.listdir(fim_run_dir) # List all HUCs in FIM run dir + + # Define variables to pass into process_zonal_stats() + procs_list = [] + for huc in fim_run_dir_list: + vector = os.path.join(fim_run_dir, huc, 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg') + csv = os.path.join(output_dir, fim_version + '_' + huc + '_pixel_counts.csv') + print(csv) + procs_list.append([vector, csv, raster_path_dict]) + + # Initiate multiprocessing + with Pool(processes=job_number) as pool: + pool.map(process_zonal_stats, procs_list) + + +def process_zonal_stats(args): + """ + This function calls zonal_stats() in multiprocessing mode. + + """ + # Extract variables from args + vector = args[0] + csv = args[1] + raster = args[2] + + # Do the zonal stats + try: + stats = zonal_stats(vector,raster) # Call function + + # Export CSV + df = pd.DataFrame(stats) + df.to_csv(csv) + print("Finished writing: " + csv) + + # Save traceback to error file if error is encountered. + except Exception: + error_file = csv.replace('.csv', '_error.txt') + with open(error_file, 'w+') as f: + traceback.print_exc(file=f) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Computes pixel counts for raster classes within a vector area.') + parser.add_argument('-d', '--fim-run-dir', + help='Path to vector file.', + required=True) + parser.add_argument('-n', '--nlcd', + help='Path to National Land Cover Database raster file.', + required=False, + default="") + parser.add_argument('-l', '--levees', + help='Path to levees raster file.', + required=False, + default="") + parser.add_argument('-b', '--bridges', + help='Path to bridges file.', + required=False, + default="") + parser.add_argument('-o', '--output-dir', + help='Path to output directory where CSV files will be written.', + required=False, + default="") + parser.add_argument('-j', '--job-number', + help='Number of jobs to use.', + required=False, + default="") + + # Assign variables from arguments. + args = vars(parser.parse_args()) + nlcd = args['nlcd'] + levees = args['levees'] + bridges = args['bridges'] + + raster_path_dict = {'nlcd': nlcd, 'levees': levees, 'bridges': bridges} + + args = vars(parser.parse_args()) + + raster_path_dict = {'nlcd': args['nlcd']} + + queue_zonal_stats(args['fim_run_dir'], raster_path_dict, args['output_dir'], int(args['job_number'])) diff --git a/tools/plots.py b/tools/plots.py new file mode 100755 index 000000000..80a1b4fcb --- /dev/null +++ b/tools/plots.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 + +import seaborn as sns +import matplotlib.pyplot as plt +from matplotlib.ticker import FixedLocator, FixedFormatter, NullFormatter, FormatStrFormatter +import argparse +import pandas as pd +import numpy as np +from statsmodels.robust.robust_linear_model import RLM +from itertools import product + + + +def gms_box_plots(metrics_table,output_fig=None): + + if isinstance(metrics_table,pd.DataFrame): + pass + elif isinstance(metrics_table,str): + metrics_table = pd.read_csv(metrics_table) + elif isinstance(metrics_table,list): + metrics_table = pd.concat([pd.read_csv(mt) for mt in metrics_table],ignore_index=True) + else: + ValueError("Pass metrics_table as DataFrame or path to CSV") + + metrics_table = preparing_data_for_plotting(metrics_table) + + facetgrid = sns.catplot( + data=metrics_table,x='Model',y='Metric Value', + hue='Magnitude',inner='quartile',split=True, + order=['FR','MS','GMS'], + cut=2, + kind='violin', + col='Mannings N', + row='Metric', + margin_titles=False, + sharex=False, + sharey=False, + despine=True, + scale='width', + height=2.75, + aspect=1, + linewidth=1.5, + legend=False, + saturation=0.55 + ) + + facetgrid.map_dataframe( sns.regplot, + data=metrics_table, + x='model_integer_encodings', + y='Metric Value', + scatter=False, + ci=None, + robust=True, + color= 'darkgreen', + truncate=False, + line_kws= {'linewidth' : 2}, + label='Trend Line' + ) + + #facetgrid.fig.set_size_inches(10,15) + + # legend + handles, labels = facetgrid.axes[0,0].get_legend_handles_labels() + facetgrid.fig.legend(loc='lower left',ncol=3,bbox_to_anchor=(0.22,-0.06),handles=handles,labels=labels) + + # xlabel + plt.annotate('Model',xy=(0.52,0.054),xycoords='figure fraction') + + # override axes params + facetgrid = set_axes(facetgrid) + + facetgrid.fig.subplots_adjust( wspace=0, + hspace=0.1, + ) + + # set margins to tight + #plt.tight_layout() + + # set rlm metrics + metrics_table = robust_linear_model(metrics_table) + facetgrid = annotate_rlm(facetgrid,metrics_table) + + # set text sizes + facetgrid = set_text_sizes(facetgrid) + + if output_fig is not None: + plt.savefig(output_fig,dpi=300,format='jpg',bbox_inches='tight') + + plt.show() + + +def set_axes(facetgrid): + + range_by_row = ( (0.45,0.75,0.05), + (0.50,0.90,0.05), + (0.00,0.40,0.05) + ) + + nrows,ncols = facetgrid.axes.shape + + for rowIdx in range(nrows): + + all_ticks = np.arange(*range_by_row[rowIdx]) + major_ticks = all_ticks[1:] + #minor_ticks = (all_ticks[0] , all_ticks[-1]+range_by_row[rowIdx][-1]) + minor_ticks = np.arange( range_by_row[rowIdx][0], + range_by_row[rowIdx][1]+range_by_row[rowIdx][2], + range_by_row[rowIdx][2]*2 + ) + major_ticks = np.arange( range_by_row[rowIdx][0]+range_by_row[rowIdx][2], + range_by_row[rowIdx][1], + range_by_row[rowIdx][2]*2 + ) + + + for colIdx in range(ncols): + + facetgrid.axes[rowIdx,colIdx].set_ylim(range_by_row[rowIdx][:-1]) + facetgrid.axes[rowIdx,colIdx].yaxis.set_major_locator(FixedLocator(major_ticks)) + facetgrid.axes[rowIdx,colIdx].yaxis.set_major_formatter(FormatStrFormatter('%.2f')) + facetgrid.axes[rowIdx,colIdx].yaxis.set_minor_locator(FixedLocator(minor_ticks)) + facetgrid.axes[rowIdx,colIdx].yaxis.set_minor_formatter(NullFormatter()) + facetgrid.axes[rowIdx,colIdx].tick_params(labelrotation=45,axis='y') + + # set axis titles + current_title = facetgrid.axes[rowIdx,colIdx].get_title() + facetgrid.axes[rowIdx,colIdx].set_title('') + values = [val.strip() for val in current_title.split('|')] + metric = values[0][-3:] + mannings = values[1][-4:] + + # axis border + facetgrid.axes[rowIdx,colIdx].axhline(range_by_row[rowIdx][1],color='black') + + if colIdx == 1: + facetgrid.axes[rowIdx,colIdx].axvline(x=facetgrid.axes[rowIdx,colIdx].get_xlim()[1],color='black') + + if rowIdx == 0: + facetgrid.axes[rowIdx,colIdx].set_title(f'Manning\'s N = {mannings}',pad=25) + + if colIdx == 0: + facetgrid.axes[rowIdx,colIdx].set_ylabel(metric) + + # removes x ticks from everything but bottom row + if rowIdx < (nrows - 1): + facetgrid.axes[rowIdx,colIdx].set_xticks([]) + + # remove ticks from 2nd column + facetgrid.axes[rowIdx,1].set(yticklabels=[]) + + return(facetgrid) + + +def preparing_data_for_plotting(metrics_table): + + metrics_table.rename(columns= {'extent_config':'Model', + 'magnitude': 'Magnitude', + 'TPR' : 'POD' + }, + inplace=True) + + set_mannings = lambda df : 0.06 if 'n_6' in df['version'] else 0.12 + + metrics_table['Mannings N'] = metrics_table.apply(set_mannings,axis=1) + + metrics_table = pd.melt(metrics_table, id_vars=('Model','Magnitude','Mannings N'), + value_vars=('CSI','POD','FAR'), + var_name='Metric', + value_name='Metric Value' + ) + + model_to_value_dict = {'FR' : 0, 'MS': 1, 'GMS': 2} + + set_model_integer_encodings = lambda df : model_to_value_dict[ df['Model'] ] + + metrics_table['model_integer_encodings'] = metrics_table.apply(set_model_integer_encodings, axis=1) + + return(metrics_table) + + +def robust_linear_model(metrics_table): + + metrics = metrics_table.loc[:,'Metric'].unique() + mannings = metrics_table.loc[:,'Mannings N'].unique() + + metric_indices = { m : metrics_table.loc[:,'Metric'] == m for m in metrics } + mannings_indices = { m : metrics_table.loc[:,'Mannings N'] == m for m in mannings } + + metrics_table.set_index(['Metric','Mannings N'],inplace=True,drop=False) + metrics_table.sort_index(inplace=True) + for met,man in product(metrics,mannings): + + y = metrics_table.loc[(met,man),'Metric Value'].to_numpy() + + X = metrics_table.loc[(met,man),'model_integer_encodings'].to_numpy() + + X = np.stack((np.ones(X.shape),X),axis=1) + + model = RLM(y,X) + results = model.fit() + + beta1 = results.params[1] + pval_beta1=results.pvalues[1]/2 # two tailed to one tail + + metrics_table.loc[(met,man),'beta1'] = beta1 + metrics_table.loc[(met,man),'beta1_pvalue'] = pval_beta1 + + metrics_table.reset_index(drop=True,inplace=True) + + return(metrics_table) + + +def annotate_rlm(facetgrid,metrics_table): + + metrics_table.set_index(['Metric','Mannings N'],inplace=True,drop=False) + metrics_table.sort_index(inplace=True) + + metric_index_dict = {'CSI':0,'POD':1,'FAR':2} + mannings_index_dict = {0.06:0 , 0.12:1} + + for met,man in metrics_table.index.unique(): + + rowIdx = metric_index_dict[met] + colIdx = mannings_index_dict[man] + + beta1 = metrics_table.loc[(met,man),'beta1'].unique()[0] + beta1_pvalue = metrics_table.loc[(met,man),'beta1_pvalue'].unique()[0] + + facetgrid.axes[rowIdx,colIdx].annotate( r'$\beta_1$ = {:.4f} | p-value = {:.3f}'.format(beta1,beta1_pvalue), + xy=(0.01,1.025), + xycoords='axes fraction', + color='darkgreen') + + return(facetgrid) + + +def set_text_sizes(facetgrid): + + plt.rc('font',size=12) + + return(facetgrid) + + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') + parser.add_argument('-m','--metrics-table',help='Metrics table',required=True,nargs='+') + parser.add_argument('-o','--output-fig',help='Output figure',required=False,default=None) + + args = vars(parser.parse_args()) + + gms_box_plots(**args) diff --git a/tools/plots/eval_plots.py b/tools/plots/eval_plots.py new file mode 100755 index 000000000..9c29087e1 --- /dev/null +++ b/tools/plots/eval_plots.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +import pandas as pd +from pathlib import Path +import argparse +from natsort import natsorted +import geopandas as gpd +from utils.shared_functions import filter_dataframe, boxplot, scatterplot, barplot +def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False): + + ''' + Creates plots and summary statistics using metrics compiled from + synthesize_test_cases. Required inputs are metrics_csv and workspace. + Outputs include: + aggregate__.csv: this csv + contains the aggregated total statistics (i.e. CSI, FAR, POD) + using the summed area_sq_km fields + __common_sites.csv: this csv + contains the unique sites (e.g usgs/nws: nws_lid; ble: huc08) + considered for aggregation/plots for each magnitude. The selected + sites occur in all versions analyzed. For example, if FIM 1, + FIM 2, FIM 3.0.0.3 were versions analyzed, the common sites + would be those that had data for ALL versions. This + analysis is then redone for each magnitude. As such, the number + of sites may vary with magnitude. The number of sites for each + magnitude is annotated on generated plots. + __analyzed_data.csv: this is the + dataset used to create plots and aggregate statistics. It is + a subset of the input metrics file and consists of the common + sites. + csi_aggr__.png: bar plot of the + aggregated CSI scores. Number of common sites is annotated + (see list of sites listed in *_*_common_sites.csv). + csi__.png: box plot of CSI scores + (sites weighted equally). Number of common sites is annotated + (see list of sites listed in *_*_common_sites.csv). + far__*.png: box plot of FAR scores + (sites weighted equally). Number of common sites is annotated + (see list of sites listed in *_*_common_sites.csv). + tpr__*.png: box plot of TPR/POD + scores (sites weighted equally). Number of common sites is + annotated (see list of sites listed in *_*_common_sites.csv). + csi_scatter__*.png: scatter plot comparing + two versions for a given magnitude. This is only generated if + there are exactly two versions analyzed. + + Parameters + ---------- + metrics_csv : STRING + Path to csv produced as part of synthesize_test_cases containing + all metrics across all versions. + workspace : STRING + Path to the output workspace. Subdirectories will be created + reflecting the evaluation datasets. + versions: LIST + A list of versions to be aggregated/plotted. Uses the "startswith" + approach. Versions should be supplied in the order they are to + be plotted. For example: ['fim_', 'fb']; This will evaluate all + versions that start with fim_ (e.g. fim_1, fim_2, fim_3) and any + feature branch that starts with "fb". To esbalish version order, + the fim versions are naturally sorted and then fb versions + (naturally sorted) are appended. These versions are also used to + filter the input metric csv as only these versions are retained + for analysis. + stats: LIST + A list of statistics to be plotted. Must be identical to column + field in metrics_csv. CSI, POD, TPR are currently calculated, if + additional statistics are desired formulas would need to be coded. + alternate_ahps_query : STRING, optional + The default is false. Currently the default ahps query is same + as done for apg goals. If a different query is desired it can be + supplied and it will supercede the default query. + spatial_ahps : DICTIONARY, optional + The default is false. A dictionary with keys as follows: + 'static': Path to AHPS point file created during creation of + FIM 3 static libraries. + 'evaluated': Path to extent file created during the creation + of the NWS/USGS AHPS preprocessing. + 'metadata': Path to previously created file that contains + metadata about each site (feature_id, wfo, rfc and etc). + No spatial layers will be created if set to False, if a dictionary + is supplied then a spatial layer is produced. + fim_1_ms: BOOL + Default is false. If True then fim_1 rows are duplicated with + extent_config set to MS. This allows for FIM 1 to be included + in MS plots/stats (helpful for nws/usgs ahps comparisons). + + Returns + ------- + all_datasets : DICT + Dictionary containing all datasets generated. + Keys: (benchmark_source, extent_config), + Values: (filtered dataframe, common sites) + + ''' + + #Import metrics csv as DataFrame and initialize all_datasets dictionary + csv_df = pd.read_csv(metrics_csv) + + #fim_1_ms flag enables FIM 1 to be shown on MS plots/stats + if fim_1_ms: + #Query FIM 1 rows based on version beginning with "fim_1" + fim_1_rows = csv_df.query('version.str.startswith("fim_1")').copy() + #Set extent configuration to MS (instead of FR) + fim_1_rows['extent_config'] = 'MS' + #Append duplicate FIM 1 rows to original dataframe + csv_df = csv_df.append(fim_1_rows, ignore_index = True) + + #If versions are supplied then filter out + if versions: + #Filter out versions based on supplied version list + metrics = csv_df.query('version.str.startswith(tuple(@versions))') + else: + metrics = csv_df + + #Group by benchmark source + benchmark_by_source = metrics.groupby(['benchmark_source', 'extent_config']) + + #Iterate through benchmark_by_source. Pre-filter metrics dataframe + #as needed (e.g. usgs/nws filter query). Then further filtering to + #discard all hucs/nws_lid that are not present across all analyzed + #versions for a given magnitude. The final filtered dataset is written + #to a dictionary with the key (benchmark source, extent config) + #and values (filtered dataframe, common sites). + all_datasets = {} + for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: + + #If source is usgs/nws define the base resolution and query + #(use alternate query if passed). Append filtered datasets to + #all_datasets dictionary. + if benchmark_source in ['usgs','nws']: + + #Set the base processing unit for the ahps runs. + base_resolution = 'nws_lid' + + #Default query (used for APG) it could be that bad_sites should be modified. If so pass an alternate query using the "alternate_ahps_query" + bad_sites = ['grfi2','ksdm7','hohn4','rwdn4'] + query = "not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" + + #If alternate ahps evaluation query argument is passed, use that. + if alternate_ahps_query: + query = alternate_ahps_query + + #Filter the dataset based on query + ahps_metrics = benchmark_metrics.query(query) + + #Filter out all instances where the base_resolution doesn't + #exist across all desired fim versions for a given magnitude. + all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(ahps_metrics, base_resolution) + + #If source is 'ble', set base_resolution and append ble dataset + #to all_datasets dictionary + elif benchmark_source == 'ble': + + #Set the base processing unit for ble runs + base_resolution = 'huc' + + #Filter out all instances where base_resolution doesn't exist + #across all desired fim versions for a given magnitude. + all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(benchmark_metrics, base_resolution) + + #For each dataset in all_datasets, generate plots and aggregate statistics. + for (dataset_name,configuration), (dataset, sites) in all_datasets.items(): + + #Define and create the output workspace as a subfolder within + #the supplied workspace + output_workspace = Path(workspace) / dataset_name / configuration.lower() + output_workspace.mkdir(parents = True, exist_ok = True) + + #Write out the filtered dataset and common sites to file + dataset.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_analyzed_data.csv'), index = False) + sites_pd = pd.DataFrame.from_dict(sites, orient = 'index').transpose() + sites_pd.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_common_sites.csv'), index = False) + + #set the order of the magnitudes and define base resolution. + if dataset_name == 'ble': + magnitude_order = ['100yr', '500yr'] + base_resolution = 'huc' + elif dataset_name in ['usgs','nws']: + magnitude_order = ['action','minor','moderate','major'] + base_resolution = 'nws_lid' + + #Calculate aggregated metrics based on total_sq_km fields. + dataset_sums = dataset.groupby(['version', 'magnitude'])[['TP_area_km2','FP_area_km2','FN_area_km2']].sum() + dataset_sums['csi'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2'] + dataset_sums['FN_area_km2']) + dataset_sums['far'] = dataset_sums['FP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2']) + dataset_sums['pod'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FN_area_km2']) + dataset_sums = dataset_sums.reset_index() + + #Write aggregated metrics to file. + dataset_sums.to_csv(output_workspace / f'aggregate_{dataset_name}_{configuration.lower()}.csv', index = False ) + + #This section naturally orders analyzed versions which defines + #the hue order for the generated plots. + #Get all versions in dataset + all_versions = list(dataset.version.unique()) + version_order = [] + #If versions are not specified then use all available versions + #and assign to versions_list + if not versions: + versions_list = all_versions + #if versions are supplied assign to versions_list + else: + versions_list = versions + #For each version supplied by the user + for version in versions_list: + #Select all the versions that start with the supplied version. + selected_versions = [sel_version for sel_version in all_versions if sel_version.startswith(version)] + #Naturally sort selected_versions + selected_versions = natsorted(selected_versions) + #Populate version order based on the sorted subsets. + version_order.extend(selected_versions) + + #Define textbox which will contain the counts of each magnitude. + textbox = [] + for magnitude in sites: + count = len(sites[magnitude]) + line_text = f'{magnitude.title()} Sites = {count}' + textbox.append(line_text) + textbox = '\n'.join(textbox) + + #Create aggregate barplot + aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png') + barplot(dataframe = dataset_sums, x_field = 'magnitude', x_order = magnitude_order, y_field = 'csi', hue_field = 'version', ordered_hue = version_order, title_text = f'Aggregate {dataset_name.upper()} FIM Scores', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = aggregate_file) + + #Create box plots for each metric in supplied stats. + for stat in stats: + output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png') + boxplot(dataframe = dataset, x_field = 'magnitude', x_order = magnitude_order, y_field = stat, hue_field = 'version', ordered_hue = version_order, title_text = f'{dataset_name.upper()} FIM Sites', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = output_file) + + #Get the last 2 versions from the version order for scatter plot. + if len(version_order) == 2: + x_version, y_version = version_order + for magnitude in magnitude_order: + #Scatterplot comparison between last 2 versions. + x_csi = dataset.query(f'version == "{x_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] + y_csi = dataset.query(f'version == "{y_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] + plotdf = pd.merge(x_csi, y_csi, on = base_resolution, suffixes = (f"_{x_version}",f"_{y_version}")) + #Define arguments for scatterplot function. + title_text = f'CSI {magnitude}' + dest_file = output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}.png' + scatterplot(dataframe = plotdf, x_field = f'CSI_{x_version}', y_field = f'CSI_{y_version}', title_text = title_text, annotate = False, dest_file = dest_file) + + + ####################################################################### + #Create spatial layers with threshold and mapping information + ######################################################################## + if spatial_ahps: + + #Read in supplied shapefile layers + #Layer containing metadata for each site (feature_id, wfo, etc). + #Convert nws_lid to lower case. + ahps_metadata = gpd.read_file(spatial_ahps['metadata']) + ahps_metadata['nws_lid'] = ahps_metadata['nws_lid'].str.lower() + metadata_crs = ahps_metadata.crs + + #Extent layer generated from preprocessing NWS/USGS datasets + evaluated_ahps_extent = gpd.read_file(spatial_ahps['evaluated']) + + #Extent layer generated from static ahps library preprocessing + static_library = gpd.read_file(spatial_ahps['static']) + + #Fields to keep + #Get list of fields to keep in merge + preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))] + #Get list of fields to keep in merge. + preserved_evaluated_ahps_fields = ['nws_lid', 'source', 'geometry'] + [i for i in evaluated_ahps_extent.columns if i.startswith(('action','minor','moderate','major'))] + + #Join tables to evaluated_ahps_extent + evaluated_ahps_extent = evaluated_ahps_extent[preserved_evaluated_ahps_fields] + evaluated_ahps_extent = evaluated_ahps_extent.merge(ahps_metadata, on = 'nws_lid') + evaluated_ahps_extent['geometry'] = evaluated_ahps_extent['geometry_y'] + evaluated_ahps_extent.drop(columns = ['geometry_y','geometry_x'], inplace = True) + evaluated_ahps_extent = evaluated_ahps_extent.merge(static_library[preserved_static_library_fields], on = 'nws_lid') + + #Join dataset metrics to evaluated_ahps_extent data. + final_join = pd.DataFrame() + for (dataset_name, configuration), (dataset, sites) in all_datasets.items(): + #Only select ahps from dataset if config is MS + if dataset_name in ['usgs','nws'] and configuration == 'MS': + #Select records from evaluated_ahps_extent that match the dataset name + subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"') + #Join to dataset + dataset_with_subset = dataset.merge(subset, on = 'nws_lid') + #Append rows to final_join dataframe + final_join = final_join.append(dataset_with_subset) + + #Modify version field + final_join['version'] = final_join.version.str.split('_nws|_usgs').str[0] + + #Write geodataframe to file + gdf = gpd.GeoDataFrame(final_join, geometry = final_join['geometry'], crs = metadata_crs) + output_shapefile = Path(workspace) / 'nws_usgs_site_info.shp' + gdf.to_file(output_shapefile) + + + +####################################################################### +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Plot and aggregate statistics for benchmark datasets (BLE/AHPS libraries)') + parser.add_argument('-m','--metrics_csv', help = 'Metrics csv created from synthesize test cases.', required = True) + parser.add_argument('-w', '--workspace', help = 'Output workspace', required = True) + parser.add_argument('-v', '--versions', help = 'List of versions to be plotted/aggregated. Versions are filtered using the "startswith" approach. For example, ["fim_","fb1"] would retain all versions that began with "fim_" (e.g. fim_1..., fim_2..., fim_3...) as well as any feature branch that began with "fb". An other example ["fim_3","fb"] would result in all fim_3 versions being plotted along with the fb.', nargs = '+', default = []) + parser.add_argument('-s', '--stats', help = 'List of statistics (abbrev to 3 letters) to be plotted/aggregated', nargs = '+', default = ['CSI','TPR','FAR'], required = False) + parser.add_argument('-q', '--alternate_ahps_query',help = 'Alternate filter query for AHPS. Default is: "not nws_lid.isnull() & not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" where bad_sites are (grfi2,ksdm7,hohn4,rwdn4)', default = False, required = False) + parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False) + parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) + + #Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + #If errors occur reassign error to True + error = False + #Create dictionary if file specified for spatial_ahps + if args['spatial_ahps']: + #Create dictionary + spatial_dict = {} + with open(args['spatial_ahps']) as file: + for line in file: + key, value = line.strip('\n').split(',') + spatial_dict[key] = Path(value) + args['spatial_ahps'] = spatial_dict + #Check that all required keys are present and overwrite args with spatial_dict + required_keys = set(['metadata', 'evaluated', 'static']) + if required_keys - spatial_dict.keys(): + print('\n Required keys are: metadata, evaluated, static') + error = True + else: + args['spatial_ahps'] = spatial_dict + + + #Finalize Variables + m = args['metrics_csv'] + w = args['workspace'] + v = args['versions'] + s = args['stats'] + q = args['alternate_ahps_query'] + sp= args['spatial_ahps'] + f = args['fim_1_ms'] + + #Run eval_plots function + if not error: + eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f) \ No newline at end of file diff --git a/tools/plots/utils/__init__.py b/tools/plots/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/plots/utils/shared_functions.py b/tools/plots/utils/shared_functions.py new file mode 100644 index 000000000..60342059e --- /dev/null +++ b/tools/plots/utils/shared_functions.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import re +######################################################################### +#Create boxplot +######################################################################### +def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False): + ''' + Create boxplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis + x_order : List + Order to arrange the x-axis. + y_field : STR + Field to use for the y-axis + hue_field : STR + Field to use for hue (typically FIM version) + title_text : STR + Text for plot title. + fim_configuration: STR + Configuration of FIM (FR or MS or Composite). + simplify_legend : BOOL, optional + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + The default is False. + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. The default is False. + + Returns + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + #Use seaborn to plot the boxplot + axes=sns.boxplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') + #set title of plot + axes.set_title(f'{title_text} ({y_field})',fontsize=20, weight = 'bold') + #Set yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + for index,ytick in enumerate(axes.get_yticks()): + plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel('',fontsize=0,weight = 'bold') + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + axes.legend(markerscale = 2, fontsize =20, loc = 'lower left') + + #If simple legend desired + if simplify_legend: + #trim labels to FIM 1, FIM 2, and the FIM 3 version + handles, org_labels = axes.get_legend_handles_labels() + label_dict = {} + for label in org_labels: + if 'fim_1' in label: + label_dict[label] = 'FIM 1' + elif 'fim_2' in label: + label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() + elif 'fim_3' in label: + label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + if label.endswith('_c'): + label_dict[label] = label_dict[label] + ' c' + else: + label_dict[label] = label + ' ' + fim_configuration.lower() + #Define simplified labels as a list. + new_labels = [label_dict[label] for label in org_labels] + #Define legend location. FAR needs to be in different location than CSI/POD. + if y_field == 'FAR': + legend_location = 'upper right' + else: + legend_location = 'lower left' + #rename legend labels to the simplified labels. + axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = legend_location, ncol = int(np.ceil(len(new_labels)/7))) + #Print textbox if supplied + if textbox_str: + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=14, verticalalignment='top', bbox=box_props) + + #If figure to be saved to disk, then do so, otherwise return figure + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig + +######################################################################### +#Create scatter plot +######################################################################### +def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annotate = False, dest_file = False): + ''' + Create boxplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis (Assumes FIM 2) + y_field : STR + Field to use for the y-axis (Assumes FIM 3) + title_text : STR + Text for plot title. + stats_text : STR or BOOL + Text for stats to place on chart. Default is false (no stats printed) + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. The default is False. + + Returnsy + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + + #Use seaborn to plot the boxplot + axes=sns.scatterplot(data=dataframe, x=x_field, y=y_field, color = 'black', s = 150) + + #Set xticks and yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + axes.set(xlim=(0.0,1.0),xticks = np.arange(0,1.1,0.1)) + axes.grid(b=True, which='major', axis='both') + + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel(f'{x_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') + + #Plot diagonal line + diag_range = [0,1] + axes.plot(diag_range, diag_range, color='gray', transform=axes.transAxes) + + + #set title of plot + axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') + + if annotate: + #Set text for labels + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + textbox_str = 'Target Better' + axes.text(0.3, 0.6, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') + textbox_str = 'Baseline Better' + axes.text(0.5, 0.2, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') + + if stats_text: + #Add statistics textbox + axes.text(0.01, 0.80, stats_text, transform=axes.transAxes, fontsize=24, verticalalignment='top', bbox=box_props) + + #If figure to be saved to disk, then do so, otherwise return fig + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig +######################################################################### +#Create barplot +######################################################################### +def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False): + ''' + Create barplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis + x_order : List + Order to arrange the x-axis. + y_field : STR + Field to use for the y-axis + hue_field : STR + Field to use for hue (typically FIM version) + title_text : STR + Text for plot title. + fim_configuration: STR + Configuration of FIM (FR or MS or Composite). + simplify_legend : BOOL, optional + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + Default is False. + display_values : BOOL, optional + If True, Y values will be displayed above bars. + Default is False. + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. Default is False. + + Returns + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + #Use seaborn to plot the boxplot + axes=sns.barplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') + #set title of plot + axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') + #Set yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + for index,ytick in enumerate(axes.get_yticks()): + plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field.upper()}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel('',fontsize=0,weight = 'bold') + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + axes.legend(markerscale = 2, fontsize =20, loc = 'upper right') + #If simple legend desired + if simplify_legend: + #trim labels to FIM 1, FIM 2, FIM 3 + handles, org_labels = axes.get_legend_handles_labels() + label_dict = {} + for label in org_labels: + if 'fim_1' in label: + label_dict[label] = 'FIM 1' + elif 'fim_2' in label: + label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() + elif 'fim_3' in label: + label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + if label.endswith('_c'): + label_dict[label] = label_dict[label] + ' c' + else: + label_dict[label] = label + ' ' + fim_configuration.lower() + #Define simplified labels as a list. + new_labels = [label_dict[label] for label in org_labels] + #rename legend labels to the simplified labels. + axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = 'upper right', ncol = int(np.ceil(len(new_labels)/7))) + #Add Textbox + if textbox_str: + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=18, verticalalignment='top', bbox=box_props) + + #Display Y values above bars + if display_values: + #Add values of bars directly above bar. + for patch in axes.patches: + value = round(patch.get_height(),3) + axes.text(patch.get_x()+patch.get_width()/2., + patch.get_height(), + '{:1.3f}'.format(value), + ha="center", fontsize=18) + + #If figure to be saved to disk, then do so, otherwise return fig + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig +####################################################################### +#Filter dataframe generated from csv file from run_test_case aggregation +######################################################################## +def filter_dataframe(dataframe, unique_field): + ''' + + This script will filter out the sites (or hucs) which are not consistently + found for all versions for a given magnitude. For example, an AHPS + lid site must have output for all 3 versions (fim1, fim2, fim3) for + a given magnitude (eg action) otherwise that lid is filtered out. + Likewise for a BLE a huc must have output for all 3 versions + (fim1, fim2, fim3) for a given magnitude (eg 100yr) otherwise it is + filtered out. + + Parameters + ---------- + dataframe : Pandas DataFrame + Containing the input metrics originating from synthesize_test_cases + unique_field : STR + base resolution for each benchmark source: 'nws'/'usgs' (nws_lid) + ble (huc). + + Returns + ------- + final_filtered_dataframe : Pandas Dataframe + Filtered dataframe that contains only common sites (lids or hucs) between versions for each magnitude. For example, for AHPS all sites which were run for each version for a given magnitude will be kept or for ble, all hucs which ran for all versions for a given magnitude. + unique_sites: DICT + The sites that were included in the dataframe for each magnitude. + + ''' + + #Get lists of sites for each magnitude/version + unique_sites = dataframe.groupby(['magnitude','version'])[unique_field].agg('unique') + #Get unique magnitudes + magnitudes = dataframe.magnitude.unique() + #Create new dataframe to hold metrics for the common sites as well as the actual lists of common sites. + final_filtered_dataframe = pd.DataFrame() + all_unique_sites = {} + #Cycle through each magnitude + for magnitude in magnitudes: + #Compile a list of sets containing unique lids pertaining to each threshold. List contains 3 unique sets [{fim1:unique lids},{fim2: unique lids},{fim3: unique lids}] + sites_per_magnitude=[set(a) for a in unique_sites[magnitude]] + #Intersect the sets to get the common lids per threshold then convert to list. + common_sites_per_magnitude = list(set.intersection(*sites_per_magnitude)) + #Write common sites to dataframe + all_unique_sites[magnitude] = common_sites_per_magnitude + #Query filtered dataframe and only include data associated with the common sites for that magnitude + filtered_common_sites = dataframe.query(f'magnitude == "{magnitude}" & {unique_field} in @common_sites_per_magnitude') + #Append the data for each magnitude to a final dataframe that will contain data for all common sites for all magnitudes. + final_filtered_dataframe = final_filtered_dataframe.append(filtered_common_sites, ignore_index = True) + + return final_filtered_dataframe, all_unique_sites + diff --git a/tools/preprocess_fimx.py b/tools/preprocess_fimx.py deleted file mode 100755 index cad6058d0..000000000 --- a/tools/preprocess_fimx.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python3 - -import rasterio -from rasterio.warp import calculate_default_transform, reproject, Resampling -from rasterio import features -import numpy as np -import geopandas as gpd -import os -import argparse - -def fimx_to_fim3(catchments_path, raster_value_field, hand_raster_path, template_raster, out_hand_path = None, out_catchment_path = None): - ''' - This function will produce a hand and catchment grid from fim1/fim2 for use in the fim3 inundation.py script. To accomplish this it: - 1) reprojects the hand raster to the template raster crs. - 2) It reprojects the catchment polygons to the template raster crs and then converts the polygons to a raster. The reprojected HAND raster properties are applied (extent, resolution) - 3) Performs the intersection of the two rasters (HAND/catchments) and applies NODATA if either dataset has NODATA - 4) Writes out the preprocessed HAND and Catchments raster if a path is specified. - - Parameters - ---------- - catchments_path : STRING - The path to the catchments vector data (assumes it is in a database). - raster_value_field : STRING - Attribute field in catchments layer whos values will be used for raster creation. - hand_raster_path : STRING - The path to the HAND raster dataset (ARC GRID is acceptable). - template_raster : STRING - Path to the template raster so that a CRS can be applied to output. - out_hand_path : STRING, optional - Path to the output HAND grid (Gtif format). The default is None. - out_catchment_path : STRING, optional - Path to the output catchment grid (Gtif format). The default is None. - - Returns - ------- - hand_masked : Numpy Array - Preprocessed HAND raster array. - hand_profile : LIST - Preprocessed HAND profile. - catchment_masked : Numpy Array - Preprocessed Catchment raster array. - catchment_profile : LIST - Preprocessed catchment raster profile. - - ''' - - - # Read in template raster as band object - reference = rasterio.open(template_raster) - - ## Step 1: Convert HAND grid - # Read in the hand raster - hand = rasterio.open(hand_raster_path) - hand_arr = hand.read(1) - #Determine the new transform and dimensions of reprojected raster (CRS = reference raster) - new_transform, new_width, new_height = calculate_default_transform(hand.crs, reference.crs, hand.width, hand.height, *hand.bounds) - # Define an empty array that is same dimensions as output by the "calculate_default_transform" command - hand_proj = np.empty((new_height,new_width), dtype=np.float) - # Reproject to target dataset (resample method is bilinear due to elevation type data) - hand_nodata_value = -2147483648 - reproject(hand_arr, - destination = hand_proj, - src_transform = hand.transform, - src_crs = hand.crs, - src_nodata = hand.nodata, - dst_transform = new_transform, - dst_crs = reference.crs, - dst_nodata = hand_nodata_value, - dst_resolution = hand.res, - resampling = Resampling.bilinear) - - # Update profile data type and no data value - hand_profile = reference.profile - hand_profile.update(dtype = rasterio.float32) - hand_profile.update(nodata = hand_nodata_value) - hand_profile.update(width = new_width) - hand_profile.update(height = new_height) - hand_profile.update(transform = new_transform) - - ## Step 2: Catchments to Polygons (same extent as the HAND raster) - # Read in the catchment layer to geopandas dataframe and convert to same CRS as reference raster - gdbpath, layername = os.path.split(catchments_path) - gdb_layer=gpd.read_file(gdbpath, driver='FileGDB', layer=layername) - proj_gdb_layer = gdb_layer.to_crs(reference.crs) - # Prepare vector data to be written to raster - shapes = list(zip(proj_gdb_layer['geometry'],proj_gdb_layer[raster_value_field].astype('int32'))) - # Write vector data to raster image. Fill raster with zeros for areas that do not have data. We will set nodata to be zero later - catchment_proj = features.rasterize(((geometry, value) for geometry, value in shapes), fill = 0, out_shape=hand_proj.shape, transform=hand_profile['transform'], dtype = 'int32' ) - # Save raster image to in-memory dataset. Reset dtype and nodata values. - catchment_profile = hand_profile.copy() - catchment_profile.update(dtype = 'int32') - catchment_profile.update(nodata=0) - - ## Step 3: Union of NODATA locations applied to both HAND and Catchment grids - catchment_masked = np.where(np.logical_or(hand_proj == hand_profile['nodata'], catchment_proj == catchment_profile['nodata']), catchment_profile['nodata'],catchment_proj) - # Assign NODATA to hand where both catchment and hand have NODATA else assign hand values. - hand_masked = np.where(np.logical_or(hand_proj == hand_profile['nodata'], catchment_proj == catchment_profile['nodata']), hand_profile['nodata'],hand_proj) - - ## Step 4: Write out hand and catchment rasters to file if path is specified - if out_hand_path is not None: - os.makedirs(os.path.split(out_hand_path)[0], exist_ok = True) - with rasterio.Env(): - with rasterio.open(out_hand_path, 'w', **hand_profile) as hnd_dst: - hnd_dst.write(hand_masked.astype('float32'),1) - if out_catchment_path is not None: - os.makedirs(os.path.split(out_catchment_path)[0], exist_ok = True) - with rasterio.Env(): - with rasterio.open(out_catchment_path, 'w', **catchment_profile) as cat_dst: - cat_dst.write(catchment_masked.astype('int32'),1) - - return hand_masked, hand_profile, catchment_masked, catchment_profile - -if __name__ == '__main__': - # Parse arguments - parser = argparse.ArgumentParser(description = 'Preprocess FIM 1 and FIM 2 HAND and Catchment grids to be compatible with FIM 3.') - parser.add_argument('-c','--catchments-path', help = 'Path to catchments vector file', required = True) - parser.add_argument('-f', '--raster-value-field', help = 'Attribute ID field from which raster values will be assigned. Typically this will be "HydroID" for FIM2 and "feature_ID" for fim 1.', required = True) - parser.add_argument('-ha', '--hand-raster-path', help = 'Path to HAND raster (can be in ESRI GRID format)', required = True) - parser.add_argument('-t', '--template-raster', help = 'Path to a template raster. Properties (CRS, resolution) of the template raster will be used to preprocess HAND and Catchments grids', required = True) - parser.add_argument('-oh', '--out-hand-path', help = 'Path to the output HAND raster. Raster must be named "rem_clipped_zeroed_masked.tif', required = True) - parser.add_argument('-oc', '--out-catchment-path', help = 'Path to the output Catchment raster. Raster must be named "gw_catchments_reaches_clipped_addedAttributes.tif"', required = True) - # Extract to dictionary and assign to variables - args = vars(parser.parse_args()) - # Run fimx to fim3 function - fimx_to_fim3(**args) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index d2adeba08..53ed5c97c 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -2,7 +2,8 @@ import os import sys -import pandas as pd +import re +import pandas as pd, geopandas as gpd import numpy as np import argparse import matplotlib.pyplot as plt @@ -11,10 +12,16 @@ from multiprocessing import Pool from os.path import isfile, join import shutil +import traceback +import logging import warnings from pathlib import Path import time warnings.simplefilter(action='ignore', category=FutureWarning) +import rasterio +from rasterio import features as riofeatures +from rasterio import plot as rioplot +from shapely.geometry import Polygon """ Plot Rating Curves and Compare to USGS Gages @@ -62,7 +69,7 @@ def check_file_age(file): def generate_rating_curve_metrics(args): elev_table_filename = args[0] - hydrotable_filename = args[1] + branches_folder = args[1] usgs_gages_filename = args[2] usgs_recurr_stats_filename = args[3] nwm_recurr_data_filename = args[4] @@ -70,162 +77,212 @@ def generate_rating_curve_metrics(args): nwm_flow_dir = args[6] catfim_flows_filename = args[7] huc = args[8] + alt_plot = args[9] + single_plot = args[10] + + logging.info("Generating rating curve metrics for huc: " + str(huc)) + elev_table = pd.read_csv(elev_table_filename,dtype={'location_id': object, 'feature_id':object,'HydroID':object, 'levpa_id':object}) + + # Filter out null and non-integer location_id entries (the crosswalk steps tries to fill AHPS only sites with the nws_lid) + elev_table.dropna(subset=['location_id'], inplace=True) + elev_table = elev_table[elev_table['location_id'].apply(lambda x: str(x).isdigit())] + + # Read in the USGS gages rating curve database csv + usgs_gages = pd.read_csv(usgs_gages_filename,dtype={'location_id': object, 'feature_id':object}) + + # Aggregate FIM4 hydroTables + if not elev_table.empty: + hydrotable = pd.DataFrame() + for branch in elev_table.levpa_id.unique(): + branch_elev_table = elev_table.loc[elev_table.levpa_id == branch].copy() + #branch_elev_table = elev_table.loc[(elev_table.levpa_id == branch) & (elev_table.location_id.notnull())].copy() + branch_hydrotable = pd.read_csv(join(branches_folder, str(branch), f'hydroTable_{branch}.csv'),dtype={'HydroID':object,'feature_id':object,'obs_source':object,'last_updated':object,'submitter':object}) + # Only pull SRC for hydroids that are in this branch + branch_hydrotable = branch_hydrotable.loc[branch_hydrotable.HydroID.isin(branch_elev_table.HydroID)] + branch_hydrotable.drop(columns=['order_'], inplace=True) + # Join SRC with elevation data + branch_elev_table.rename(columns={'feature_id':'fim_feature_id'}, inplace=True) + branch_hydrotable = branch_hydrotable.merge(branch_elev_table, on="HydroID") + # Append to full rating curve dataframe + if hydrotable.empty: + hydrotable = branch_hydrotable + else: + hydrotable = hydrotable.append(branch_hydrotable) - elev_table = pd.read_csv(elev_table_filename,dtype={'location_id': str}) - hydrotable = pd.read_csv(hydrotable_filename,dtype={'HUC': str,'feature_id': str}) - usgs_gages = pd.read_csv(usgs_gages_filename,dtype={'location_id': str}) - - # Join rating curves with elevation data - hydrotable = hydrotable.merge(elev_table, on="HydroID") - relevant_gages = list(hydrotable.location_id.unique()) - usgs_gages = usgs_gages[usgs_gages['location_id'].isin(relevant_gages)] - usgs_gages = usgs_gages.reset_index(drop=True) - - if len(usgs_gages) > 0: - - # Adjust rating curve to elevation - hydrotable['elevation_ft'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft - # hydrotable['raw_elevation_ft'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft - hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 - usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation_ft"}) - - hydrotable['source'] = "FIM" - usgs_gages['source'] = "USGS" - limited_hydrotable = hydrotable.filter(items=['location_id','elevation_ft','discharge_cfs','source']) - select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation_ft', 'discharge_cfs','source']) - - rating_curves = limited_hydrotable.append(select_usgs_gages) - - # Add stream order - stream_orders = hydrotable.filter(items=['location_id','str_order']).drop_duplicates() - rating_curves = rating_curves.merge(stream_orders, on='location_id') - rating_curves['str_order'] = rating_curves['str_order'].astype('int') - - # plot rating curves - generate_facet_plot(rating_curves, rc_comparison_plot_filename) - - # NWM recurr intervals - recurr_1_5_yr_filename = join(nwm_flow_dir,'recurr_1_5_cms.csv') - recurr_5_yr_filename = join(nwm_flow_dir,'recurr_5_0_cms.csv') - recurr_10_yr_filename = join(nwm_flow_dir,'recurr_10_0_cms.csv') - - # Update column names - recurr_1_5_yr = pd.read_csv(recurr_1_5_yr_filename,dtype={'feature_id': str}) - recurr_1_5_yr = recurr_1_5_yr.rename(columns={"discharge": "1.5"}) - recurr_5_yr = pd.read_csv(recurr_5_yr_filename,dtype={'feature_id': str}) - recurr_5_yr = recurr_5_yr.rename(columns={"discharge": "5.0"}) - recurr_10_yr = pd.read_csv(recurr_10_yr_filename,dtype={'feature_id': str}) - recurr_10_yr = recurr_10_yr.rename(columns={"discharge": "10.0"}) - - # Merge NWM recurr intervals into a single layer - nwm_recurr_intervals_all = reduce(lambda x,y: pd.merge(x,y, on='feature_id', how='outer'), [recurr_1_5_yr, recurr_5_yr, recurr_10_yr]) - nwm_recurr_intervals_all = pd.melt(nwm_recurr_intervals_all, id_vars=['feature_id'], value_vars=['1.5','5.0','10.0'], var_name='recurr_interval', value_name='discharge_cms') - - # Append catfim data (already set up in format similar to nwm_recurr_intervals_all) - cat_fim = pd.read_csv(catfim_flows_filename, dtype={'feature_id':str}) - nwm_recurr_intervals_all = nwm_recurr_intervals_all.append(cat_fim) - - # Convert discharge to cfs and filter - nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 - nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() - - # Identify unique gages - usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() - - nwm_recurr_data_table = pd.DataFrame() - usgs_recurr_data = pd.DataFrame() - - # Interpolate USGS/FIM elevation at each gage - for index, gage in usgs_crosswalk.iterrows(): - - # Interpolate USGS elevation at NWM recurrence intervals - usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="USGS")] - - if len(usgs_rc) <1: - print(f"missing USGS rating curve data for usgs station {gage.location_id} in huc {huc}") - continue - - str_order = np.unique(usgs_rc.str_order).item() - feature_id = str(gage.feature_id) - - usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) - - # Handle sites missing data - if len(usgs_pred_elev) <1: - print(f"missing USGS elevation data for usgs station {gage.location_id} in huc {huc}") - continue - - # Clean up data - usgs_pred_elev['location_id'] = gage.location_id - usgs_pred_elev = usgs_pred_elev.filter(items=['location_id','recurr_interval', 'discharge_cfs','pred_elev']) - usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "USGS"}) - - # Interpolate FIM elevation at NWM recurrence intervals - fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="FIM")] - - if len(fim_rc) <1: - print(f"missing FIM rating curve data for usgs station {gage.location_id} in huc {huc}") - continue - - fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) - - # Handle sites missing data - if len(fim_pred_elev) <1: - print(f"missing FIM elevation data for usgs station {gage.location_id} in huc {huc}") - continue - - # Clean up data - fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "FIM"}) - fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','FIM']) - usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) - - # Add attributes - usgs_pred_elev['HUC'] = huc - usgs_pred_elev['HUC4'] = huc[0:4] - usgs_pred_elev['str_order'] = str_order - usgs_pred_elev['feature_id'] = feature_id - - # Melt dataframe - usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','feature_id','recurr_interval','discharge_cfs','HUC','HUC4','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') - nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) - - # Interpolate FIM elevation at USGS observations - # fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") - # usgs_rc = usgs_rc.rename(columns={"elevation_ft": "USGS"}) - # - # # Sort stage in ascending order - # usgs_rc = usgs_rc.sort_values('USGS',ascending=True) - # - # # Interpolate FIM elevation at USGS observations - # usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation_ft'], left = np.nan, right = np.nan) - # usgs_rc = usgs_rc[usgs_rc['FIM'].notna()] - # usgs_rc = usgs_rc.drop(columns=["source"]) - # - # # Melt dataframe - # usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') - # - # if not usgs_rc.empty: - # usgs_recurr_data = usgs_recurr_data.append(usgs_rc) - - # Generate stats for all sites in huc - # if not usgs_recurr_data.empty: - # usgs_recurr_stats_table = calculate_rc_stats_elev(usgs_recurr_data) - # usgs_recurr_stats_table.to_csv(usgs_recurr_stats_filename,index=False) - - # # Generate plots (not currently being used) - # fim_elev_at_USGS_rc_plot_filename = join(dirname(rc_comparison_plot_filename),'FIM_elevations_at_USGS_rc_' + str(huc) +'.png') - # generate_facet_plot(usgs_recurr_data, fim_elev_at_USGS_rc_plot_filename) - - if not nwm_recurr_data_table.empty: - nwm_recurr_data_table.discharge_cfs = np.round(nwm_recurr_data_table.discharge_cfs,2) - nwm_recurr_data_table.elevation_ft = np.round(nwm_recurr_data_table.elevation_ft,2) - nwm_recurr_data_table.to_csv(nwm_recurr_data_filename,index=False) - + # Join rating curves with elevation data + #elev_table.rename(columns={'feature_id':'fim_feature_id'}, inplace=True) + #hydrotable = hydrotable.merge(elev_table, on="HydroID") + if 'location_id' in hydrotable.columns: + relevant_gages = list(hydrotable.location_id.unique()) + else: + relevant_gages = [] + usgs_gages = usgs_gages[usgs_gages['location_id'].isin(relevant_gages)] + usgs_gages = usgs_gages.reset_index(drop=True) + + if len(usgs_gages) > 0: + + # Adjust rating curve to elevation + hydrotable['elevation_ft'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft + # hydrotable['raw_elevation_ft'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft + hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 + usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation_ft"}) + + hydrotable['source'] = "FIM" + usgs_gages['source'] = "USGS" + limited_hydrotable = hydrotable.filter(items=['location_id','elevation_ft','discharge_cfs','source', 'HydroID', 'levpa_id', 'dem_adj_elevation']) + select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation_ft', 'discharge_cfs','source']) + if 'default_discharge_cms' in hydrotable.columns: # check if both "FIM" and "FIM_default" SRCs are available + hydrotable['default_discharge_cfs'] = hydrotable.default_discharge_cms * 35.3147 + limited_hydrotable_default = hydrotable.filter(items=['location_id','elevation_ft', 'default_discharge_cfs','HydroID', 'levpa_id', 'dem_adj_elevation']) + limited_hydrotable_default['discharge_cfs'] = limited_hydrotable_default.default_discharge_cfs + limited_hydrotable_default['source'] = "FIM_default" + rating_curves = limited_hydrotable.append(select_usgs_gages) + rating_curves = rating_curves.append(limited_hydrotable_default) + else: + rating_curves = limited_hydrotable.append(select_usgs_gages) + + # Add stream order + stream_orders = hydrotable.filter(items=['location_id','order_']).drop_duplicates() + rating_curves = rating_curves.merge(stream_orders, on='location_id') + rating_curves['order_'].fillna(0,inplace=True) + rating_curves['order_'] = rating_curves['order_'].astype('int') + + + # NWM recurr intervals + recurr_intervals = ("2","5","10","25","50","100") + recurr_dfs = [] + for interval in recurr_intervals: + recurr_file = join(nwm_flow_dir, 'nwm21_17C_recurr_{}_0_cms.csv'.format(interval)) + df = pd.read_csv(recurr_file, dtype={'feature_id': str}) + # Update column names + df = df.rename(columns={"discharge": interval}) + recurr_dfs.append(df) + + # Merge NWM recurr intervals into a single layer + nwm_recurr_intervals_all = reduce(lambda x,y: pd.merge(x,y, on='feature_id', how='outer'), recurr_dfs) + nwm_recurr_intervals_all = pd.melt(nwm_recurr_intervals_all, id_vars=['feature_id'], value_vars=recurr_intervals, var_name='recurr_interval', value_name='discharge_cms') + + # Append catfim data (already set up in format similar to nwm_recurr_intervals_all) + cat_fim = pd.read_csv(catfim_flows_filename, dtype={'feature_id':str}) + nwm_recurr_intervals_all = nwm_recurr_intervals_all.append(cat_fim) + + # Convert discharge to cfs and filter + nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 + nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() + + # Identify unique gages + usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() + usgs_crosswalk.dropna(subset=['location_id'], inplace=True) + + nwm_recurr_data_table = pd.DataFrame() + usgs_recurr_data = pd.DataFrame() + + # Interpolate USGS/FIM elevation at each gage + for index, gage in usgs_crosswalk.iterrows(): + + # Interpolate USGS elevation at NWM recurrence intervals + usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="USGS")] + + if len(usgs_rc) <1: + logging.info(f"missing USGS rating curve data for usgs station {gage.location_id} in huc {huc}") + continue + + str_order = np.unique(usgs_rc.order_).item() + feature_id = str(gage.feature_id) + + usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) + + # Handle sites missing data + if len(usgs_pred_elev) <1: + logging.info(f"WARNING: missing USGS elevation data for usgs station {gage.location_id} in huc {huc}") + continue + + # Clean up data + usgs_pred_elev['location_id'] = gage.location_id + usgs_pred_elev = usgs_pred_elev.filter(items=['location_id','recurr_interval', 'discharge_cfs','pred_elev']) + usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "USGS"}) + + # Interpolate FIM elevation at NWM recurrence intervals + fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="FIM")] + + if len(fim_rc) <1: + logging.info(f"missing FIM rating curve data for usgs station {gage.location_id} in huc {huc}") + continue + + fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) + + # Handle sites missing data + if len(fim_pred_elev) <1: + logging.info(f"WARNING: missing FIM elevation data for usgs station {gage.location_id} in huc {huc}") + continue + + # Clean up data + fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "FIM"}) + fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','FIM']) + usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) + + # Add attributes + usgs_pred_elev['HUC'] = huc + usgs_pred_elev['HUC4'] = huc[0:4] + usgs_pred_elev['str_order'] = str_order + usgs_pred_elev['feature_id'] = feature_id + + # Melt dataframe + usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','feature_id','recurr_interval','discharge_cfs','HUC','HUC4','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') + nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) + + # Interpolate FIM elevation at USGS observations + # fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") + # usgs_rc = usgs_rc.rename(columns={"elevation_ft": "USGS"}) + # + # # Sort stage in ascending order + # usgs_rc = usgs_rc.sort_values('USGS',ascending=True) + # + # # Interpolate FIM elevation at USGS observations + # usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation_ft'], left = np.nan, right = np.nan) + # usgs_rc = usgs_rc[usgs_rc['FIM'].notna()] + # usgs_rc = usgs_rc.drop(columns=["source"]) + # + # # Melt dataframe + # usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') + # + # if not usgs_rc.empty: + # usgs_recurr_data = usgs_recurr_data.append(usgs_rc) + + # Generate stats for all sites in huc + # if not usgs_recurr_data.empty: + # usgs_recurr_stats_table = calculate_rc_stats_elev(usgs_recurr_data) + # usgs_recurr_stats_table.to_csv(usgs_recurr_stats_filename,index=False) + + # # Generate plots (not currently being used) + # fim_elev_at_USGS_rc_plot_filename = join(dirname(rc_comparison_plot_filename),'FIM_elevations_at_USGS_rc_' + str(huc) +'.png') + # generate_facet_plot(usgs_recurr_data, fim_elev_at_USGS_rc_plot_filename) + + if not nwm_recurr_data_table.empty: + nwm_recurr_data_table.discharge_cfs = np.round(nwm_recurr_data_table.discharge_cfs,2) + nwm_recurr_data_table.elevation_ft = np.round(nwm_recurr_data_table.elevation_ft,2) + nwm_recurr_data_table.to_csv(nwm_recurr_data_filename,index=False) + if 'location_id' not in nwm_recurr_data_table.columns: + logging.info(f"WARNING: nwm_recurr_data_table is missing location_id column for gage {relevant_gages} in huc {huc}") + + # plot rating curves + if alt_plot: + generate_rc_and_rem_plots(rating_curves, rc_comparison_plot_filename, nwm_recurr_data_table, branches_folder) + elif single_plot: + generate_single_plot(rating_curves, rc_comparison_plot_filename, nwm_recurr_data_table) + else: + generate_facet_plot(rating_curves, rc_comparison_plot_filename, nwm_recurr_data_table) + else: + logging.info(f"no USGS data for gage(s): {relevant_gages} in huc {huc}") else: - print(f"no USGS data for gage(s): {relevant_gages} in huc {huc}") + logging.info(f"no valid USGS gages found in huc {huc} (note: may be ahps sites without UGSG gages)") def aggregate_metrics(output_dir,procs_list,stat_groups): + # Default stat group to location_id + if stat_groups is None: + stat_groups = ['location_id'] + # agg_usgs_interp_elev_stats = join(output_dir,'agg_usgs_interp_elev_stats.csv') agg_nwm_recurr_flow_elev = join(output_dir,'agg_nwm_recurr_flow_elevations.csv') agg_nwm_recurr_flow_elev_stats = join(output_dir,f"agg_nwm_recurr_flow_elev_stats_{'_'.join(stat_groups)}.csv") @@ -264,20 +321,124 @@ def aggregate_metrics(output_dir,procs_list,stat_groups): agg_recurr_stats_table.to_csv(agg_nwm_recurr_flow_elev_stats,index=False) + return agg_recurr_stats_table -def generate_facet_plot(rc, plot_filename): +def generate_single_plot(rc, plot_filename, recurr_data_table): + + tmp_rc = rc.copy() + # Filter FIM elevation based on USGS data for gage in rc.location_id.unique(): + rc = rc[rc.location_id==gage] + + plot_filename_splitext = os.path.splitext(plot_filename) + gage_plot_filename = plot_filename_splitext[0] + '_' + gage + plot_filename_splitext[1] + + #print(recurr_data_table.head) + try: + min_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.min() + max_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.max() + min_q = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].discharge_cfs.min() + max_q = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].discharge_cfs.max() + ri100 = recurr_data_table[(recurr_data_table.location_id == gage) & (recurr_data_table.source == 'FIM')].discharge_cfs.max() + + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (((rc.elevation_ft > (max_elev + 2)) | (rc.discharge_cfs > ri100)) & (rc.discharge_cfs > max_q))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft < min_elev - 2) & (rc.discharge_cfs < min_q)].index) + + if 'default_discharge_cfs' in rc.columns: # Plot both "FIM" and "FIM_default" rating curves + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM_default') & (((rc.elevation_ft > (max_elev + 2)) | (rc.discharge_cfs > ri100)) & (rc.discharge_cfs > max_q))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM_default') & (rc.elevation_ft < min_elev - 2)].index) + except Exception as ex: + summary = traceback.StackSummary.extract( + traceback.walk_stack(None)) + logging.info("WARNING: rating curve dataframe not processed correctly...") + + rc = rc.rename(columns={"location_id": "USGS Gage"}) + + # split out branch 0 FIM data + rc['source_branch'] = np.where((rc.source == 'FIM') & (rc.levpa_id == '0'), 'FIM_b0', np.where((rc.source == 'FIM_default') & (rc.levpa_id == '0'), 'FIM_default_b0', rc.source)) + #rc['source_branch'] = np.where((rc.source == 'FIM_default') & (rc.levpa_id == '0'), 'FIM_default_b0', rc.source) + + ## Generate rating curve plots + num_plots = len(rc["USGS Gage"].unique()) + if num_plots > 3: + columns = num_plots // 3 + else: + columns = 1 - min_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.min() - max_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.max() + sns.set(style="ticks") - rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft > (max_elev + 2))].index) - rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft < min_elev - 2)].index) + # Plot both "FIM" and "FIM_default" rating curves + if '0' in rc.levpa_id.values: # checks to see if branch zero data exists in the rating curve df + hue_order = ['USGS','FIM','FIM_default','FIM_b0','FIM_default_b0'] if 'default_discharge_cfs' in rc.columns else ['USGS','FIM','FIM_b0'] + kw = {'color': ['blue','green','orange','green','orange'], 'linestyle' : ["-","-","-","--","--"]} if 'default_discharge_cfs' in rc.columns else {'color': ['blue','green','green'], 'linestyle' : ["-","-","--"]} + else: + hue_order = ['USGS','FIM','FIM_default'] if 'default_discharge_cfs' in rc.columns else ['USGS','FIM'] + kw = {'color': ['blue','green','orange'], 'linestyle' : ["-","-","-"]} if 'default_discharge_cfs' in rc.columns else {'color': ['blue','green'], 'linestyle' : ["-","_"]} + # Facet Grid + g = sns.FacetGrid(rc, col="USGS Gage", hue="source_branch", hue_order=hue_order, + sharex=False, sharey=False,col_wrap=columns, + height=3.5, aspect=1.65, hue_kws=kw) + g.map(plt.plot, "discharge_cfs", "elevation_ft", linewidth=2, alpha=0.8) + g.set_axis_labels(x_var="Discharge (cfs)", y_var="Elevation (ft)") + + ## Plot recurrence intervals + axes = g.axes_dict + for gage in axes: + ax = axes[gage] + plt.sca(ax) + try: + recurr_data = recurr_data_table[(recurr_data_table.location_id == gage) & (recurr_data_table.source == 'FIM')]\ + .filter(items=['recurr_interval', 'discharge_cfs']) + for i, r in recurr_data.iterrows(): + if not r.recurr_interval.isnumeric(): continue # skip catfim flows + l = 'NWM 17C\nRecurrence' if r.recurr_interval == '2' else None # only label 2 yr + plt.axvline(x=r.discharge_cfs, c='purple', linewidth=0.5, label=l) # plot recurrence intervals + plt.text(r.discharge_cfs, ax.get_ylim()[1] - (ax.get_ylim()[1]-ax.get_ylim()[0])*0.03, r.recurr_interval, size='small', c='purple') + except Exception as ex: + summary = traceback.StackSummary.extract( + traceback.walk_stack(None)) + logging.info("WARNING: Could not plot recurrence intervals...") + + # Adjust the arrangement of the plots + g.fig.tight_layout(w_pad=1) + g.add_legend() + + plt.savefig(gage_plot_filename) + plt.close() + + rc = tmp_rc + +def generate_facet_plot(rc, plot_filename, recurr_data_table): + + # Filter FIM elevation based on USGS data + for gage in rc.location_id.unique(): + #print(recurr_data_table.head) + try: + min_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.min() + max_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.max() + min_q = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].discharge_cfs.min() + max_q = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].discharge_cfs.max() + ri100 = recurr_data_table[(recurr_data_table.location_id == gage) & (recurr_data_table.source == 'FIM')].discharge_cfs.max() + + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (((rc.elevation_ft > (max_elev + 2)) | (rc.discharge_cfs > ri100)) & (rc.discharge_cfs > max_q))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft < min_elev - 2) & (rc.discharge_cfs < min_q)].index) + + if 'default_discharge_cfs' in rc.columns: # Plot both "FIM" and "FIM_default" rating curves + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM_default') & (((rc.elevation_ft > (max_elev + 2)) | (rc.discharge_cfs > ri100)) & (rc.discharge_cfs > max_q))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM_default') & (rc.elevation_ft < min_elev - 2)].index) + except Exception as ex: + summary = traceback.StackSummary.extract( + traceback.walk_stack(None)) + logging.info("WARNING: rating curve dataframe not processed correctly...") rc = rc.rename(columns={"location_id": "USGS Gage"}) + # split out branch 0 FIM data + rc['source_branch'] = np.where((rc.source == 'FIM') & (rc.levpa_id == '0'), 'FIM_b0', np.where((rc.source == 'FIM_default') & (rc.levpa_id == '0'), 'FIM_default_b0', rc.source)) + #rc['source_branch'] = np.where((rc.source == 'FIM_default') & (rc.levpa_id == '0'), 'FIM_default_b0', rc.source) + ## Generate rating curve plots num_plots = len(rc["USGS Gage"].unique()) if num_plots > 3: @@ -286,10 +447,39 @@ def generate_facet_plot(rc, plot_filename): columns = 1 sns.set(style="ticks") - g = sns.FacetGrid(rc, col="USGS Gage", hue="source", hue_order=['USGS','FIM'], sharex=False, sharey=False,col_wrap=columns) - g.map(sns.scatterplot, "discharge_cfs", "elevation_ft", palette="tab20c", marker="o") + + # Plot both "FIM" and "FIM_default" rating curves + if '0' in rc.levpa_id.values: # checks to see if branch zero data exists in the rating curve df + hue_order = ['USGS','FIM','FIM_default','FIM_b0','FIM_default_b0'] if 'default_discharge_cfs' in rc.columns else ['USGS','FIM','FIM_b0'] + kw = {'color': ['blue','green','orange','green','orange'], 'linestyle' : ["-","-","-","--","--"]} if 'default_discharge_cfs' in rc.columns else {'color': ['blue','green','green'], 'linestyle' : ["-","-","--"]} + else: + hue_order = ['USGS','FIM','FIM_default'] if 'default_discharge_cfs' in rc.columns else ['USGS','FIM'] + kw = {'color': ['blue','green','orange'], 'linestyle' : ["-","-","-"]} if 'default_discharge_cfs' in rc.columns else {'color': ['blue','green'], 'linestyle' : ["-","_"]} + # Facet Grid + g = sns.FacetGrid(rc, col="USGS Gage", hue="source_branch", hue_order=hue_order, + sharex=False, sharey=False,col_wrap=columns, + height=3.5, aspect=1.65, hue_kws=kw) + g.map(plt.plot, "discharge_cfs", "elevation_ft", linewidth=2, alpha=0.8) g.set_axis_labels(x_var="Discharge (cfs)", y_var="Elevation (ft)") + ## Plot recurrence intervals + axes = g.axes_dict + for gage in axes: + ax = axes[gage] + plt.sca(ax) + try: + recurr_data = recurr_data_table[(recurr_data_table.location_id == gage) & (recurr_data_table.source == 'FIM')]\ + .filter(items=['recurr_interval', 'discharge_cfs']) + for i, r in recurr_data.iterrows(): + if not r.recurr_interval.isnumeric(): continue # skip catfim flows + l = 'NWM 17C\nRecurrence' if r.recurr_interval == '2' else None # only label 2 yr + plt.axvline(x=r.discharge_cfs, c='purple', linewidth=0.5, label=l) # plot recurrence intervals + plt.text(r.discharge_cfs, ax.get_ylim()[1] - (ax.get_ylim()[1]-ax.get_ylim()[0])*0.03, r.recurr_interval, size='small', c='purple') + except Exception as ex: + summary = traceback.StackSummary.extract( + traceback.walk_stack(None)) + logging.info("WARNING: Could not plot recurrence intervals...") + # Adjust the arrangement of the plots g.fig.tight_layout(w_pad=1) g.add_legend() @@ -297,6 +487,112 @@ def generate_facet_plot(rc, plot_filename): plt.savefig(plot_filename) plt.close() +def generate_rc_and_rem_plots(rc, plot_filename, recurr_data_table, branches_folder): + + ## Set up figure + num_plots = len(rc["location_id"].unique()) + fig = plt.figure(figsize=(6, 2.4*num_plots)) + gs = fig.add_gridspec(num_plots, 2, width_ratios=[2, 3]) + ax = gs.subplots() + if ax.ndim == 1: # hucs with only one plot will only have one-dimensional axes; + ax = np.expand_dims(ax, axis=0) # the axes manipulations below require 2 dimensions + plt.tight_layout(w_pad=1) + + # Create a dictionary with location_id as keys and branch id as values + gage_branch_dict = rc.groupby('location_id')['levpa_id'].first().to_dict() + + for i, gage in enumerate(gage_branch_dict): + ################################################################################################################### + # Filter FIM elevation based on USGS data + + min_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.min() + max_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.max() + min_q = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].discharge_cfs.min() + max_q = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].discharge_cfs.max() + ri100 = recurr_data_table[(recurr_data_table.location_id == gage) & (recurr_data_table.source == 'FIM')].discharge_cfs.max() + + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (((rc.elevation_ft > (max_elev + 2)) | (rc.discharge_cfs > ri100)) & (rc.discharge_cfs > max_q))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft < min_elev - 2) & (rc.discharge_cfs < min_q)].index) + + if 'default_discharge_cfs' in rc.columns: # Plot both "FIM" and "FIM_default" rating curves + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM_default') & (((rc.elevation_ft > (max_elev + 2)) | (rc.discharge_cfs > ri100)) & (rc.discharge_cfs > max_q))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM_default') & (rc.elevation_ft < min_elev - 2)].index) + + ################################################################################################################### + ## Read in reaches, catchment raster, and rem raster + branch = gage_branch_dict[gage] + if os.path.isfile(os.path.join(branches_folder, branch, f'demDerived_reaches_split_filtered_addedAttributes_crosswalked_{branch}.gpkg')): + reaches = gpd.read_file(os.path.join(branches_folder, branch, f'demDerived_reaches_split_filtered_addedAttributes_crosswalked_{branch}.gpkg')) + reach = reaches[reaches.HydroID == hydroid] + with rasterio.open(os.path.join(branches_folder, branch, f'gw_catchments_reaches_filtered_addedAttributes_{branch}.tif')) as catch_rast: + catchments = catch_rast.read() + with rasterio.open(os.path.join(branches_folder, branch, f'rem_zeroed_masked_{branch}.tif')) as rem: + rem_transform = rem.transform + rem_extent = rioplot.plotting_extent(rem) + rem_sub25 = rem.read() + # Set all pixels above the SRC calculation height to nan + rem_sub25[np.where(rem_sub25 > 25.3)] = -9999. + rem_sub25[np.where(rem_sub25 == -9999.)] = np.nan + + # Plot the rating curve + ax[i,1].plot("discharge_cfs", "elevation_ft", data=rc[(rc.source == 'USGS') & (rc.location_id == gage)], linewidth=2, alpha=0.8, label='USGS') + ax[i,1].plot("discharge_cfs", "elevation_ft", data=rc[(rc.source == 'FIM') & (rc.location_id == gage)], linewidth=2, alpha=0.8, label='FIM') + if 'default_discharge_cfs' in rc.columns: + ax[i,1].plot("default_discharge_cfs", "elevation_ft", data=rc[(rc.source == 'FIM_default') & (rc.location_id == gage)], linewidth=2, alpha=0.8, label='FIM_default') + + # Plot the recurrence intervals + recurr_data = recurr_data_table[(recurr_data_table.location_id == gage) & (recurr_data_table.source == 'FIM')]\ + .filter(items=['recurr_interval', 'discharge_cfs']) + for _, r in recurr_data.iterrows(): + if not r.recurr_interval.isnumeric(): continue # skip catfim flows + l = 'NWM 17C\nRecurrence' if r.recurr_interval == '2' else None # only label 2 yr + ax[i,1].axvline(x=r.discharge_cfs, c='purple', linewidth=0.5, label=l) # plot recurrence intervals + ax[i,1].text(r.discharge_cfs, ax[i,1].get_ylim()[1] - (ax[i,1].get_ylim()[1]-ax[i,1].get_ylim()[0])*0.06, r.recurr_interval, size='small', c='purple') + + # Get the hydroid + hydroid = rc[rc.location_id == gage].HydroID.unique()[0] + if not hydroid: + logging.info(f'Gage {gage} in HUC {branch} has no HydroID') + continue + + # Filter the reaches and REM by the hydroid + catchment_rem = rem_sub25.copy() + catchment_rem[np.where(catchments != int(hydroid))] = np.nan + + # Convert raster to WSE feet and limit to upper bound of rating curve + dem_adj_elevation = rc[rc.location_id == gage].dem_adj_elevation.unique()[0] + catchment_rem = (catchment_rem + dem_adj_elevation) * 3.28084 + max_elev = rc[(rc.source == 'FIM') & (rc.location_id == gage)].elevation_ft.max() + catchment_rem[np.where(catchment_rem > max_elev)] = np.nan # <-- Comment out this line to get the full raster that is + # used in rating curve creation + # Create polygon for perimeter/area stats + catchment_rem_1s = catchment_rem.copy() + catchment_rem_1s[np.where(~np.isnan(catchment_rem_1s))] = 1 + features = riofeatures.shapes(catchment_rem_1s, mask=~np.isnan(catchment_rem), transform=rem_transform, connectivity=8) + del catchment_rem_1s + features = [f for f in features] + geom = [Polygon(f[0]['coordinates'][0]) for f in features] + poly = gpd.GeoDataFrame({'geometry':geom}) + #poly['perimeter'] = poly.length # These lines are calculating perimeter/area stats + #poly['area'] = poly.area # and can be removed if there is a separate process + #poly['perimeter_area_ratio'] = poly.length/poly.area # set up later that calculates these for all hydroids + #poly['perimeter_area_ratio_sqrt'] = poly.length/(poly.area**.5) # within a catchment. + bounds = poly.total_bounds + bounds = ((bounds[0]-20, bounds[2]+20), (bounds[1]-20, bounds[3]+20)) + + # REM plot + if 'reach' in locals(): + reach.plot(ax=ax[i,0], color='#999999', linewidth=0.9) + im = ax[i,0].imshow(rasterio.plot.reshape_as_image(catchment_rem), cmap='gnuplot', extent=rem_extent, interpolation='none') + plt.colorbar(im, ax=ax[i,0], location='left') + ax[i,0].set_xbound(bounds[0]); ax[i,0].set_ybound(bounds[1]) + ax[i,0].set_xticks([]); ax[i,0].set_yticks([]) + ax[i,0].set_title(gage) + + del catchments, rem_sub25, catchment_rem + ax[0,1].legend() + plt.savefig(plot_filename, dpi=200) + plt.close() def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): @@ -304,11 +600,18 @@ def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): nwm_ids = len(usgs_site.feature_id.drop_duplicates()) if nwm_ids > 0: - - nwm_recurr_intervals = nwm_recurr_intervals.copy().loc[nwm_recurr_intervals.feature_id==usgs_site.feature_id.drop_duplicates().item()] - nwm_recurr_intervals['pred_elev'] = np.interp(nwm_recurr_intervals.discharge_cfs.values, usgs_site['discharge_cfs'], usgs_site['elevation_ft'], left = np.nan, right = np.nan) - - return nwm_recurr_intervals + try: + nwm_recurr_intervals = nwm_recurr_intervals.copy().loc[nwm_recurr_intervals.feature_id==usgs_site.feature_id.drop_duplicates().item()] + nwm_recurr_intervals['pred_elev'] = np.interp(nwm_recurr_intervals.discharge_cfs.values, usgs_site['discharge_cfs'], usgs_site['elevation_ft'], left = np.nan, right = np.nan) + + return nwm_recurr_intervals + except Exception as ex: + summary = traceback.StackSummary.extract( + traceback.walk_stack(None)) + #logging.info("WARNING: get_recurr_intervals failed for some reason....") + #logging.info(f"*** {ex}") + #logging.info(''.join(summary.format())) + return [] else: return [] @@ -331,9 +634,6 @@ def calculate_rc_stats_elev(rc,stat_groups=None): .rename_axis(None, axis=1) ) - if stat_groups is None: - stat_groups = ['location_id'] - # Calculate variables for NRMSE rc_unmelt["yhat_minus_y"] = rc_unmelt[src_elev] - rc_unmelt[usgs_elev] rc_unmelt["yhat_minus_y_squared"] = rc_unmelt["yhat_minus_y"] ** 2 @@ -360,21 +660,157 @@ def calculate_rc_stats_elev(rc,stat_groups=None): nrmse_table_group = nrmse_table.groupby(stat_groups) # Calculate nrmse - nrmse = nrmse_table_group.apply(lambda x: ((x['sum_y_diff'] / x['n']) ** 0.5) / (x['y_max'] - x['y_min']))\ + def NRMSE(x): + if x['n'][0] == 1: # when n==1, NRME equation will return an `inf` + return x['sum_y_diff'] ** 0.5 + else: + return ((x['sum_y_diff'] / x['n']) ** 0.5) / (x['y_max'] - x['y_min']) + + nrmse = nrmse_table_group.apply(NRMSE)\ .reset_index(stat_groups, drop = False).rename({0: "nrmse"}, axis=1) # Calculate Mean Absolute Depth Difference mean_abs_y_diff = station_rc.apply(lambda x: (abs(x["yhat_minus_y"]).mean()))\ .reset_index(stat_groups, drop = False).rename({0: "mean_abs_y_diff_ft"}, axis=1) + # Calculate Mean Depth Difference (non-absolute value) + mean_y_diff = station_rc.apply(lambda x: (x["yhat_minus_y"].mean()))\ + .reset_index(stat_groups, drop = False).rename({0: "mean_y_diff_ft"}, axis=1) + # Calculate Percent Bias percent_bias = station_rc.apply(lambda x: 100 * (x["yhat_minus_y"].sum() / x[usgs_elev].sum()))\ .reset_index(stat_groups, drop = False).rename({0: "percent_bias"}, axis=1) - rc_stat_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [nrmse, mean_abs_y_diff, percent_bias]) + rc_stat_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [nrmse, mean_abs_y_diff, mean_y_diff, percent_bias]) return rc_stat_table +def create_static_gpkg(output_dir, output_gpkg, agg_recurr_stats_table, gages_gpkg_filepath): + ''' + Merges the output dataframe from aggregate_metrics() with the usgs gages GIS data + ''' + # Load in the usgs_gages geopackage + usgs_gages = gpd.read_file(gages_gpkg_filepath) + # Merge the stats for all of the recurrance intervals/thresholds + usgs_gages = usgs_gages.merge(agg_recurr_stats_table, on='location_id') + # Load in the rating curves file + agg_nwm_recurr_flow_elev = join(output_dir,'agg_nwm_recurr_flow_elevations.csv') + agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev,dtype={'location_id': str, + 'feature_id': str}) + diff_table = calculate_rc_diff(agg_stats) + # Merge recurrence interval difference table with points layer + usgs_gages = usgs_gages.merge(diff_table, on='location_id') + usgs_gages = usgs_gages.round(decimals=2) + + # Write to file + usgs_gages.to_file(join(output_dir, output_gpkg), driver='GPKG', index=False) + + # Create figure + usgs_gages.replace(np.inf, np.nan, inplace=True) # replace inf with nan for plotting + fig, ax = plt.subplots(2, 2, figsize=(18, 10)) + + # Bin data + max_bin = usgs_gages['mean_abs_y_diff_ft'].max() + bins = (0, 1, 3, 6, 9, max_bin if max_bin > 12 else 12) + usgs_gages['mean_abs_y_diff_ft'] = pd.cut(usgs_gages['mean_abs_y_diff_ft'], bins=bins) + + max_bin = usgs_gages['mean_y_diff_ft'].max() + min_bin = usgs_gages['mean_y_diff_ft'].min() + bins = (min_bin if min_bin < -12 else -12, -9, -6, -3, -1, 0, 1, 3, 6, 9, max_bin if max_bin > 12 else 12) + usgs_gages['mean_y_diff_ft'] = pd.cut(usgs_gages['mean_y_diff_ft'], bins=bins) + + # Create subplots + sns.histplot(ax=ax[0,0], y='nrmse', data=usgs_gages, binwidth=0.2, binrange=(0, 10)) + sns.countplot(ax=ax[1,0], y='mean_abs_y_diff_ft', data=usgs_gages) + sns.countplot(ax=ax[1,1], y='mean_y_diff_ft', data=usgs_gages) + sns.boxplot(ax=ax[0,1], data=usgs_gages[['2', '5', '10', '25', '50', '100', 'action', 'minor', 'moderate','major']]) + ax[0,1].set(ylim=(-12, 12)) + + fig.tight_layout() + fig.savefig(join(output_dir, f'{output_gpkg}_summary_plots.png'.replace('.gpkg', ''))) + + return + +def calculate_rc_diff(rc): + + usgs_elev = "USGS" + src_elev = "FIM" + + # Collect any extra columns not associated with melt + col_index = list(rc.columns) + pivot_vars = ['source','elevation_ft'] + col_index = [col for col in col_index if col not in pivot_vars] + + # Unmelt elevation/source + rc_unmelt = (rc.set_index(col_index) + .pivot(columns="source")['elevation_ft'] + .reset_index() + .rename_axis(None, axis=1) + ) + + # Calculate water surface elevation difference at recurrence intervals + rc_unmelt["yhat_minus_y"] = rc_unmelt[src_elev] - rc_unmelt[usgs_elev] + # Remove duplicate location_id-recurr_interval pairs and pivot + rc_unmelt.set_index(['location_id', 'recurr_interval'], inplace=True, verify_integrity=False) + rc_unmelt = (rc_unmelt[~rc_unmelt.index.duplicated(keep='first')] + .reset_index() + .pivot(index='location_id', columns='recurr_interval', values='yhat_minus_y')) + # Reorder columns + rc_unmelt = rc_unmelt[['2', '5', '10', '25', '50', '100', 'action', 'minor', 'moderate','major']] + + return rc_unmelt + +def evaluate_results(sierra_results=[], labels=[], save_location=''): + ''' + Compares multiple Sierra Test results using a boxplot. + + Parameters + ------------ + sierra_results : list + List of GeoDataFrames with sierra test results. + labels : list + List of strings that will be used as labels for sierra_results. Length must be equal to sierra_results. + save_location : str + Path to save output boxplot figure. + + Example + ------------ + from rating_curve_comparison import evaluate_results + import geopandas as gpd + + sierra_1 = gpd.read_file("/data/path/to/fim_3_X_ms.gpkg") + sierra_new = gpd.read_file("/data/path/to/fim_experiment.gpkg") + + evaluate_results([sierra_1, sierra_new], ["fim_3_X_ms", "fim_calibrate_SRC"], "/path/to/output.png") + ''' + + assert len(sierra_results) == len(labels), "Each Sierra Test results must also have a label" + + # Define recurrence intervals to plot + recurr_intervals = ("2","5","10","25","50","100","action","minor","moderate","major") + + # Assign labels to the input sierra test result dataframes + for df, label in zip(sierra_results, labels): + df['_version'] = label + + # Combine all dataframes into one + all_results = sierra_results[0] + all_results = all_results.append(sierra_results[1:]) + + # Melt results for boxplotting + all_results_melted = all_results.melt(id_vars=["location_id", '_version'], + value_vars=recurr_intervals, + var_name='recurr_interval', + value_name='error_ft') + + # Plot all results in a comparison boxplot + fig, ax = plt.subplots(figsize=(10,5)) + sns.boxplot(x='recurr_interval', y='error_ft', hue='_version', data=all_results_melted, ax=ax, fliersize=3) + ax.set(ylim=(-30, 30)) + ax.grid() + ax.legend(bbox_to_anchor=(1, 1), loc='upper right', title='FIM Version') + ax.set_title('Sierra Test Results Comparison') + plt.savefig(save_location) if __name__ == '__main__': @@ -385,8 +821,12 @@ def calculate_rc_stats_elev(rc,stat_groups=None): parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True,type=str) parser.add_argument('-catfim', '--catfim-flows-filename', help='Categorical FIM flows file',required = True,type=str) parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) - parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False,type=str) - + parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False,type=str,nargs='+') + parser.add_argument('-pnts','--stat-gages',help='takes 2 arguments: 1) file path of input usgs_gages.gpkg and 2) output GPKG name to write USGS gages with joined stats',required=False,type=str,nargs=2) + parser.add_argument('-alt','--alt-plot',help='Generate rating curve plots with REM maps',required=False,default=False,action='store_true') + parser.add_argument('-eval','--evaluate-results',help='Create a boxplot comparison of multiple input Sierra Test results. \ + Expects 2 arguments: 1) path to the Sierra Test results for comparison and 2) the corresponding label for the boxplot.',required=False,nargs=2,action='append') + parser.add_argument('-s', '--single-plot', help='Create single plots', action='store_true') args = vars(parser.parse_args()) fim_dir = args['fim_dir'] @@ -396,8 +836,21 @@ def calculate_rc_stats_elev(rc,stat_groups=None): catfim_flows_filename = args['catfim_flows_filename'] number_of_jobs = args['number_of_jobs'] stat_groups = args['stat_groups'] + alt_plot = args['alt_plot'] + eval = args['evaluate_results'] + single_plot = args['single_plot'] + if args['stat_gages']: + gages_gpkg_filepath = args['stat_gages'][0] + stat_gages = args['stat_gages'][1] + assert (os.path.exists(gages_gpkg_filepath)), f"{gages_gpkg_filepath} does not exist. Please specify a full path to a USGS geopackage (.gpkg)" + else: + stat_gages = None - stat_groups = stat_groups.split() + # Make sure that location_id is the only -group when using -pnts + assert (not stat_gages or (stat_gages and (not stat_groups or stat_groups == ['location_id']))), \ + "location_id is the only acceptable stat_groups argument when producting an output GPKG" + # Make sure that the -pnts flag is used with the -eval flag + assert not eval or (eval and stat_gages), "You must use the -pnts flag with the -eval flag" procs_list = [] plots_dir = join(output_dir,'plots') @@ -409,46 +862,66 @@ def calculate_rc_stats_elev(rc,stat_groups=None): print(check_file_age(usgs_gages_filename)) # Open log file - sys.__stdout__ = sys.stdout - log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") - sys.stdout = log_file + # Create log output + level = logging.INFO # using WARNING level to avoid benign? info messages ("Failed to auto identify EPSG: 7") + format = ' %(message)s' + handlers = [logging.FileHandler(os.path.join(output_dir,'rating_curve_comparison.log')), logging.StreamHandler()] + logging.basicConfig(level = level, format = format, handlers = handlers) merged_elev_table = [] - huc_list = os.listdir(fim_dir) + huc_list = [huc for huc in os.listdir(fim_dir) if re.search("^\d{6,8}$", huc)] for huc in huc_list: - if huc != 'logs': - elev_table_filename = join(fim_dir,huc,'usgs_elev_table.csv') - hydrotable_filename = join(fim_dir,huc,'hydroTable.csv') - usgs_recurr_stats_filename = join(tables_dir,f"usgs_interpolated_elevation_stats_{huc}.csv") - nwm_recurr_data_filename = join(tables_dir,f"nwm_recurrence_flow_elevations_{huc}.csv") - rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") - - if isfile(elev_table_filename): - procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir, catfim_flows_filename, huc]) - # Aggregate all of the individual huc elev_tables into one aggregate for accessing all data in one csv - read_elev_table = pd.read_csv(elev_table_filename) - read_elev_table['huc'] = huc - merged_elev_table.append(read_elev_table) + elev_table_filename = join(fim_dir,huc,'usgs_elev_table.csv') + branches_folder = join(fim_dir,huc,'branches') + usgs_recurr_stats_filename = join(tables_dir,f"usgs_interpolated_elevation_stats_{huc}.csv") + nwm_recurr_data_filename = join(tables_dir,f"nwm_recurrence_flow_elevations_{huc}.csv") + rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") + + if isfile(elev_table_filename): + procs_list.append([elev_table_filename, branches_folder, usgs_gages_filename, + usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename, + nwm_flow_dir, catfim_flows_filename, huc, alt_plot, single_plot]) + # Aggregate all of the individual huc elev_tables into one aggregate for accessing all data in one csv + read_elev_table = pd.read_csv(elev_table_filename, dtype={'location_id':str, 'HydroID':str, 'huc':str, 'feature_id':int}) + read_elev_table['huc'] = huc + merged_elev_table.append(read_elev_table) # Output a concatenated elev_table to_csv if merged_elev_table: - print(f"Creating aggregate elev table csv") + logging.info(f"Creating aggregate elev table csv") concat_elev_table = pd.concat(merged_elev_table) concat_elev_table['thal_burn_depth_meters'] = concat_elev_table['dem_elevation'] - concat_elev_table['dem_adj_elevation'] concat_elev_table.to_csv(join(output_dir,'agg_usgs_elev_table.csv'),index=False) # Initiate multiprocessing - print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") + logging.info(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") with Pool(processes=number_of_jobs) as pool: pool.map(generate_rating_curve_metrics, procs_list) - print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") - aggregate_metrics(output_dir,procs_list,stat_groups) - - print('Delete intermediate tables') + # Create point layer of usgs gages with joined stats attributes + if stat_gages: + logging.info("Creating usgs gages GPKG with joined rating curve summary stats") + agg_recurr_stats_table = aggregate_metrics(output_dir,procs_list,['location_id']) + create_static_gpkg(output_dir, stat_gages, agg_recurr_stats_table, gages_gpkg_filepath) + del agg_recurr_stats_table # memory cleanup + else: # if not producing GIS layer, just aggregate metrics + logging.info(f"Aggregating rating curve metrics for {len(procs_list)} hucs") + aggregate_metrics(output_dir,procs_list,stat_groups) + + logging.info('Delete intermediate tables') shutil.rmtree(tables_dir, ignore_errors=True) - # Close log file - sys.stdout = sys.__stdout__ - log_file.close() + # Compare current sierra test results to previous tests + if eval: + # Transpose comparison sierra results + sierra_test_paths, sierra_test_labels = np.array(eval).T.tolist() + # Add current sierra test to lists + sierra_test_paths = sierra_test_paths + [join(output_dir, stat_gages)] + sierra_test_labels = sierra_test_labels + [stat_gages.replace('.gpkg', '')] + # Read in all sierra test results + sierra_test_dfs = [gpd.read_file(i) for i in sierra_test_paths] + # Feed results into evaluation function + evaluate_results(sierra_test_dfs, + sierra_test_labels, + join(output_dir, 'Sierra_Test_Eval_boxplot.png')) diff --git a/tools/rating_curve_get_usgs_curves.py b/tools/rating_curve_get_usgs_curves.py index cb8a33f56..2c152850b 100644 --- a/tools/rating_curve_get_usgs_curves.py +++ b/tools/rating_curve_get_usgs_curves.py @@ -6,10 +6,16 @@ from tools_shared_functions import get_metadata, get_datum, ngvd_to_navd_ft, get_rating_curve, aggregate_wbd_hucs, get_thresholds, flow_data from dotenv import load_dotenv import os +import numpy as np import argparse import sys sys.path.append('/foss_fim/src') from utils.shared_variables import PREP_PROJECTION +from tools_shared_variables import (acceptable_coord_acc_code_list, + acceptable_coord_method_code_list, + acceptable_alt_acc_thresh, + acceptable_alt_meth_code_list, + acceptable_site_type_list) ''' This script calls the NOAA Tidal API for datum conversions. Experience shows that @@ -26,9 +32,10 @@ EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") NWM_FLOWS_MS = os.getenv("NWM_FLOWS_MS") + def get_all_active_usgs_sites(): ''' - Compile a list of all active usgs gage sites that meet certain criteria. + Compile a list of all active usgs gage sites. Return a GeoDataFrame of all sites. Returns @@ -43,61 +50,15 @@ def get_all_active_usgs_sites(): selector = ['all'] must_include = 'usgs_data.active' metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = None ) - - #Filter out sites based quality of site. These acceptable codes were initially - #decided upon and may need fine tuning. A link where more information - #regarding the USGS attributes is provided. - - #https://help.waterdata.usgs.gov/code/coord_acy_cd_query?fmt=html - acceptable_coord_acc_code = ['H','1','5','S','R','B','C','D','E'] - #https://help.waterdata.usgs.gov/code/coord_meth_cd_query?fmt=html - acceptable_coord_method_code = ['C','D','W','X','Y','Z','N','M','L','G','R','F','S'] - #https://help.waterdata.usgs.gov/codes-and-parameters/codes#SI - acceptable_alt_acc_thresh = 1 - #https://help.waterdata.usgs.gov/code/alt_meth_cd_query?fmt=html - acceptable_alt_meth_code = ['A','D','F','I','J','L','N','R','W','X','Y','Z'] - #https://help.waterdata.usgs.gov/code/site_tp_query?fmt=html - acceptable_site_type = ['ST'] - - #Cycle through each site and filter out if site doesn't meet criteria. - acceptable_sites_metadata = [] - for metadata in metadata_list: - #Get the usgs info from each site - usgs_data = metadata['usgs_data'] - - #Get site quality attributes - coord_accuracy_code = usgs_data.get('coord_accuracy_code') - coord_method_code = usgs_data.get('coord_method_code') - alt_accuracy_code = usgs_data.get('alt_accuracy_code') - alt_method_code = usgs_data.get('alt_method_code') - site_type = usgs_data.get('site_type') - - #Check to make sure that none of the codes were null, if null values are found, skip to next. - if not all([coord_accuracy_code, coord_method_code, alt_accuracy_code, alt_method_code, site_type]): - continue - - #Test if site meets criteria. - if (coord_accuracy_code in acceptable_coord_acc_code and - coord_method_code in acceptable_coord_method_code and - alt_accuracy_code <= acceptable_alt_acc_thresh and - alt_method_code in acceptable_alt_meth_code and - site_type in acceptable_site_type): - - #If nws_lid is not populated then add a dummy ID so that 'aggregate_wbd_hucs' works correctly. - if not metadata.get('identifiers').get('nws_lid'): - metadata['identifiers']['nws_lid'] = 'Bogus_ID' - - #Append metadata of acceptable site to acceptable_sites list. - acceptable_sites_metadata.append(metadata) - #Get a geospatial layer (gdf) for all acceptable sites - dictionary, gdf = aggregate_wbd_hucs(acceptable_sites_metadata, Path(WBD_LAYER), retain_attributes = False) + print("Aggregating WBD HUCs...") + dictionary, gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes=True) #Get a list of all sites in gdf list_of_sites = gdf['identifiers_usgs_site_code'].to_list() #Rename gdf fields gdf.columns = gdf.columns.str.replace('identifiers_','') - return gdf, list_of_sites, acceptable_sites_metadata + return gdf, list_of_sites, metadata_list ############################################################################## #Generate categorical flows for each category across all sites. @@ -217,14 +178,19 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): all input sites. Additional metadata also contained in DataFrame ''' + #Define URLs for metadata and rating curve metadata_url = f'{API_BASE_URL}/metadata' rating_curve_url = f'{API_BASE_URL}/rating_curve' - #If 'all' option passed to list of gages sites, it retrieves all acceptable sites within CONUS. + # Create workspace directory if it doesn't exist + if not os.path.exists(workspace): + os.mkdir(workspace) + + #If 'all' option passed to list of gages sites, it retrieves all sites within CONUS. print('getting metadata for all sites') if list_of_gage_sites == ['all']: - acceptable_sites_gdf, acceptable_sites_list, metadata_list = get_all_active_usgs_sites() + sites_gdf, sites_list, metadata_list = get_all_active_usgs_sites() #Otherwise, if a list of sites is passed, retrieve sites from WRDS. else: #Define arguments to retrieve metadata and then get metadata from WRDS @@ -254,22 +220,24 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): #For each site in metadata_list for metadata in metadata_list: + print("get_datum") #Get datum information for site (only need usgs_data) nws, usgs = get_datum(metadata) #Filter out sites that are not in contiguous US. If this section is removed be sure to test with datum adjustment section (region will need changed) if usgs['state'] in ['Alaska', 'Puerto Rico', 'Virgin Islands', 'Hawaii']: continue - #Get rating curve for site location_ids = usgs['usgs_site_code'] + if location_ids == None: # Some sites don't have a value for usgs_site_code, skip them + continue curve = get_rating_curve(rating_curve_url, location_ids = [location_ids]) #If no rating curve was returned, skip site. if curve.empty: message = f'{location_ids}: has no rating curve' regular_messages.append(message) continue - + #Adjust datum to NAVD88 if needed. If datum unknown, skip site. if usgs['vcs'] == 'NGVD29': #To prevent time-out errors @@ -283,7 +251,6 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): api_failure_messages.append(api_message) print(api_message) continue - #If datum adjustment succeeded, calculate datum in NAVD88 navd88_datum = round(usgs['datum'] + datum_adj_ft, 2) message = f'{location_ids}:succesfully converted NGVD29 to NAVD88' @@ -298,7 +265,8 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): message = f"{location_ids}: datum unknown" regular_messages.append(message) continue - + + print("Populating..") #Populate rating curve with metadata and use navd88 datum to convert stage to elevation. curve['active'] = usgs['active'] curve['datum'] = usgs['datum'] @@ -306,23 +274,53 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): curve['navd88_datum'] = navd88_datum curve['elevation_navd88'] = curve['stage'] + navd88_datum #Append all rating curves to a dataframe - all_rating_curves = all_rating_curves.append(curve) + all_rating_curves = all_rating_curves.append(curve) #Rename columns and add attribute indicating if rating curve exists - acceptable_sites_gdf.rename(columns = {'nwm_feature_id':'feature_id','usgs_site_code':'location_id'}, inplace = True) + sites_gdf.rename(columns = {'nwm_feature_id':'feature_id','usgs_site_code':'location_id'}, inplace = True) sites_with_data = pd.DataFrame({'location_id':all_rating_curves['location_id'].unique(),'curve':'yes'}) - acceptable_sites_gdf = acceptable_sites_gdf.merge(sites_with_data, on = 'location_id', how = 'left') - acceptable_sites_gdf.fillna({'curve':'no'},inplace = True) + sites_gdf = sites_gdf.merge(sites_with_data, on = 'location_id', how = 'left') + sites_gdf.fillna({'curve':'no'},inplace = True) #Add mainstems attribute to acceptable sites print('Attributing mainstems sites') #Import mainstems segments used in run_by_unit.sh ms_df = gpd.read_file(NWM_FLOWS_MS) ms_segs = ms_df.ID.astype(str).to_list() #Populate mainstems attribute field - acceptable_sites_gdf['mainstem'] = 'no' - acceptable_sites_gdf.loc[acceptable_sites_gdf.eval('feature_id in @ms_segs'),'mainstem'] = 'yes' + sites_gdf['mainstem'] = 'no' + sites_gdf.loc[sites_gdf.eval('feature_id in @ms_segs'),'mainstem'] = 'yes' + sites_gdf.to_csv(os.path.join(workspace, 'acceptable_sites_pre.csv')) + + sites_gdf = sites_gdf.drop(['upstream_nwm_features'], axis=1, errors='ignore') + sites_gdf = sites_gdf.drop(['downstream_nwm_features'], axis=1, errors='ignore') + print("Recasting...") + sites_gdf = sites_gdf.astype({'metadata_sources': str}) + # -- Filter all_rating_curves according to acceptance criteria -- # + # -- We only want acceptable gages in the rating curve CSV -- # + sites_gdf['acceptable_codes'] = (sites_gdf['usgs_data_coord_accuracy_code'].isin(acceptable_coord_acc_code_list) + & sites_gdf['usgs_data_coord_method_code'].isin(acceptable_coord_method_code_list) + & sites_gdf['usgs_data_alt_method_code'].isin(acceptable_alt_meth_code_list) + & sites_gdf['usgs_data_site_type'].isin(acceptable_site_type_list)) + + sites_gdf = sites_gdf.astype({'usgs_data_alt_accuracy_code': float}) + sites_gdf['acceptable_alt_error'] = np.where(sites_gdf['usgs_data_alt_accuracy_code'] <= acceptable_alt_acc_thresh, True, False) + + sites_gdf.to_file(os.path.join(workspace, 'sites_bool_flags.gpkg'), driver='GPKG') + + # Filter and save filtered file for viewing + acceptable_sites_gdf = sites_gdf[(sites_gdf['acceptable_codes'] == True) & (sites_gdf['acceptable_alt_error'] == True)] + acceptable_sites_gdf = acceptable_sites_gdf[acceptable_sites_gdf['curve'] == 'yes'] + acceptable_sites_gdf.to_csv(os.path.join(workspace, 'acceptable_sites_for_rating_curves.csv')) + acceptable_sites_gdf.to_file(os.path.join(workspace, 'acceptable_sites_for_rating_curves.gpkg'),driver='GPKG') + + # Make list of acceptable sites + acceptable_sites_list = acceptable_sites_gdf['location_id'].tolist() + + # Filter out all_rating_curves by list + all_rating_curves = all_rating_curves[all_rating_curves['location_id'].isin(acceptable_sites_list)] + #If workspace is specified, write data to file. if workspace: #Write rating curve dataframe to file @@ -334,10 +332,10 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): regular_messages = api_failure_messages + regular_messages all_messages = pd.DataFrame({'Messages':regular_messages}) all_messages.to_csv(Path(workspace) / 'log.csv', index = False) - #If 'all' option specified, reproject then write out shapefile of acceptable sites. + # If 'all' option specified, reproject then write out shapefile of acceptable sites. if list_of_gage_sites == ['all']: - acceptable_sites_gdf = acceptable_sites_gdf.to_crs(PREP_PROJECTION) - acceptable_sites_gdf.to_file(Path(workspace) / 'usgs_gages.gpkg', layer = 'usgs_gages', driver = 'GPKG') + sites_gdf = sites_gdf.to_crs(PREP_PROJECTION) + sites_gdf.to_file(Path(workspace) / 'usgs_gages.gpkg', layer = 'usgs_gages', driver = 'GPKG') #Write out flow files for each threshold across all sites all_data = write_categorical_flow_files(metadata_list, workspace) @@ -366,5 +364,6 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): t = float(args['sleep_timer']) #Generate USGS rating curves + print("Executing...") usgs_rating_to_elev(list_of_gage_sites = l, workspace=w, sleep_time = t) \ No newline at end of file diff --git a/tools/run_test_case.py b/tools/run_test_case.py index 9d4870565..9f7b6c984 100755 --- a/tools/run_test_case.py +++ b/tools/run_test_case.py @@ -1,253 +1,441 @@ #!/usr/bin/env python3 -import os -import sys -import shutil -import argparse +import os, re, shutil, json, sys +import pandas as pd -from tools_shared_functions import compute_contingency_stats_from_rasters -from tools_shared_variables import (TEST_CASES_DIR, INPUTS_DIR, ENDC, TRED_BOLD, WHITE_BOLD, CYAN_BOLD, AHPS_BENCHMARK_CATEGORIES) +from tools_shared_variables import TEST_CASES_DIR, INPUTS_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES, MAGNITUDE_DICT, elev_raster_ndv from inundation import inundate +from mosaic_inundation import Mosaic_inundation +from inundate_gms import Inundate_gms +from tools_shared_functions import compute_contingency_stats_from_rasters +from utils.shared_functions import FIM_Helpers as fh + +class benchmark(object): + + AHPS_BENCHMARK_CATEGORIES = AHPS_BENCHMARK_CATEGORIES + MAGNITUDE_DICT = MAGNITUDE_DICT + + def __init__(self, category): + """Class that handles benchmark data. + + Parameters + ---------- + category : str + Category of the benchmark site. Should be one of ['ble', 'ifc', 'nws', 'usgs', 'ras2fim']. + """ + + self.category = category.lower() + assert category in list(self.MAGNITUDE_DICT.keys()), f"Category must be one of {list(self.MAGNITUDE_DICT.keys())}" + self.validation_data = os.path.join(TEST_CASES_DIR, f'{self.category}_test_cases', f'validation_data_{self.category}') + self.is_ahps = True if self.category in self.AHPS_BENCHMARK_CATEGORIES else False + + def magnitudes(self): + '''Returns the magnitudes associated with the benchmark category.''' + return self.MAGNITUDE_DICT[self.category] + + def huc_data(self): + '''Returns a dict of HUC8, magnitudes, and sites.''' + huc_mags = {} + for huc in os.listdir(self.validation_data): + if not re.match('\d{8}', huc): continue + huc_mags[huc] = self.data(huc) + return huc_mags + + def data(self, huc): + '''Returns a dict of magnitudes and sites for a given huc. Sites will be AHPS lids for + AHPS sites and empty strings for non-AHPS sites. + ''' + huc_dir = os.path.join(self.validation_data, huc) + if not os.path.isdir(huc_dir): + return {} + if self.is_ahps: + lids = os.listdir(huc_dir) + + mag_dict = {} + for lid in lids: + lid_dir = os.path.join(huc_dir, lid) + for mag in [file for file in os.listdir(lid_dir) if file in self.magnitudes()]: + if mag in mag_dict: + mag_dict[mag].append(lid) + else: + mag_dict[mag] = [lid] + return mag_dict + else: + mags = list(os.listdir(huc_dir)) + return {mag:[''] for mag in mags} + +class test_case(benchmark): + + def __init__(self, test_id, version, archive=True): + """Class that handles test cases, specifically running the alpha test. + + Parameters + ---------- + test_id : str + ID of the test case in huc8_category format, e.g. `12090201_ble`. + version : str + Version of FIM to which this test_case belongs. This should correspond to the fim directory + name in either `/data/previous_fim/` or `/outputs/`. + archive : bool + If true, this test case outputs will be placed into the `official_versions` folder + and the FIM model will be read from the `/data/previous_fim` folder. + If false, it will be saved to the `testing_versions/` folder and the FIM model + will be read from the `/outputs/` folder. + + """ + self.test_id = test_id + self.huc, self.benchmark_cat = test_id.split('_') + super().__init__(self.benchmark_cat) + self.version = version + self.archive = archive + # FIM run directory path - uses HUC 6 for FIM 1 & 2 + self.fim_dir = os.path.join(PREVIOUS_FIM_DIR if archive else OUTPUTS_DIR, + self.version, + self.huc if not re.search('^fim_[1,2]', version, re.IGNORECASE) else self.huc[:6]) + # Test case directory path + self.dir = os.path.join(TEST_CASES_DIR, f'{self.benchmark_cat}_test_cases', + test_id, + 'official_versions' if archive else 'testing_versions', + version) + if not os.path.exists(self.dir): + os.makedirs(self.dir) + # Benchmark data path + self.benchmark_dir = os.path.join(self.validation_data, self.huc) + + # Create list of shapefile paths to use as exclusion areas. + self.mask_dict = {'levees': + {'path': '/data/inputs/nld_vectors/Levee_protected_areas.gpkg', + 'buffer': None, + 'operation': 'exclude' + }, + 'waterbodies': + {'path': '/data/inputs/nwm_hydrofabric/nwm_lakes.gpkg', + 'buffer': None, + 'operation': 'exclude', + }, + } + + @classmethod + def list_all_test_cases(cls, version, archive, benchmark_categories=[]): + """Returns a complete list of all benchmark category test cases as classes. + + Parameters + ---------- + version : str + Version of FIM to which this test_case belongs. This should correspond to the fim directory + name in either `/data/previous_fim/` or `/outputs/`. + archive : bool + If true, this test case outputs will be placed into the `official_versions` folder + and the FIM model will be read from the `/data/previous_fim` folder. + If false, it will be saved to the `testing_versions/` folder and the FIM model + will be read from the `/outputs/` folder. + """ + if not benchmark_categories: + benchmark_categories = list(cls.MAGNITUDE_DICT.keys()) + + test_case_list = [] + for bench_cat in benchmark_categories: + + benchmark_class = benchmark(bench_cat) + benchmark_data = benchmark_class.huc_data() + + for huc in benchmark_data.keys(): + test_case_list.append(cls(f'{huc}_{bench_cat}', version, archive)) + + return test_case_list + + def alpha_test(self, calibrated=False, model='', mask_type='huc', inclusion_area='', + inclusion_area_buffer=0, overwrite=True, verbose=False, gms_workers=1): + '''Compares a FIM directory with benchmark data from a variety of sources. + + Parameters + ---------- + calibrated : bool + Whether or not this FIM version is calibrated. + model : str + MS or FR extent of the model. This value will be written to the eval_metadata.json. + mask_type : str + Mask type to feed into inundation.py. + inclusion_area : int + Area to include in agreement analysis. + inclusion_area_buffer : int + Buffer distance in meters to include outside of the model's domain. + overwrite : bool + If True, overwites pre-existing test cases within the test_cases directory. + verbose : bool + If True, prints out all pertinent data. + gms_workers : int + Number of worker processes assigned to GMS processing. + ''' + + try: + if not overwrite and os.path.isdir(self.dir): + print(f"Metrics for {self.dir} already exist. Use overwrite flag (-o) to overwrite metrics.") + return + + fh.vprint(f"Starting alpha test for {self.dir}", verbose) + + self.stats_modes_list = ['total_area'] + + # Create paths to fim_run outputs for use in inundate() + if model != 'GMS': + self.rem = os.path.join(self.fim_dir, 'rem_zeroed_masked.tif') + if not os.path.exists(self.rem): + self.rem = os.path.join(self.fim_dir, 'rem_clipped_zeroed_masked.tif') + self.catchments = os.path.join(self.fim_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') + if not os.path.exists(self.catchments): + self.catchments = os.path.join(self.fim_dir, 'gw_catchments_reaches_clipped_addedAttributes.tif') + self.mask_type = mask_type + if mask_type == 'huc': + self.catchment_poly = '' + else: + self.catchment_poly = os.path.join(self.fim_dir, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + self.hydro_table = os.path.join(self.fim_dir, 'hydroTable.csv') + + # Map necessary inputs for inundate(). + self.hucs, self.hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' + + if inclusion_area != '': + inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name + self.mask_dict.update({inclusion_area_name: {'path': inclusion_area, + 'buffer': int(inclusion_area_buffer), + 'operation': 'include'}}) + # Append the concatenated inclusion_area_name and buffer. + if inclusion_area_buffer == None: + inclusion_area_buffer = 0 + self.stats_modes_list.append(inclusion_area_name + '_b' + str(inclusion_area_buffer) + 'm') + + # Delete the directory if it exists + if os.path.exists(self.dir): + shutil.rmtree(self.dir) + os.mkdir(self.dir) + + # Get the magnitudes and lids for the current huc and loop through them + validation_data = self.data(self.huc) + for magnitude in validation_data: + for instance in validation_data[magnitude]: # instance will be the lid for AHPS sites and '' for other sites + # For each site, inundate the REM and compute aggreement raster with stats + self._inundate_and_compute(magnitude, instance, model=model, verbose=verbose, gms_workers=gms_workers) + + # Clean up 'total_area' outputs from AHPS sites + if self.is_ahps: + self.clean_ahps_outputs(os.path.join(self.dir, magnitude)) + + # Write out evaluation meta-data + self.write_metadata(calibrated, model) + + except KeyboardInterrupt: + print("Program aborted via keyboard interrupt") + sys.exit(1) + except Exception as ex: + print(ex) + sys.exit(1) + + def _inundate_and_compute(self, + magnitude, + lid, + compute_only = False, + model = '', + verbose = False, + gms_workers = 1): + '''Method for inundating and computing contingency rasters as part of the alpha_test. + Used by both the alpha_test() and composite() methods. + + Parameters + ---------- + magnitude : str + Magnitude of the current benchmark site. + lid : str + lid of the current benchmark site. For non-AHPS sites, this should be an empty string (''). + compute_only : bool + If true, skips inundation and only computes contingency stats. + ''' + # Output files + fh.vprint("Creating output files", verbose) + + test_case_out_dir = os.path.join(self.dir, magnitude) + inundation_prefix = lid + '_' if lid else '' + inundation_path = os.path.join(test_case_out_dir, f'{inundation_prefix}inundation_extent.tif') + predicted_raster_path = inundation_path.replace('.tif', f'_{self.huc}.tif') + agreement_raster = os.path.join(test_case_out_dir, (f'ahps_{lid}' if lid else '') +'total_area_agreement.tif') + stats_json = os.path.join(test_case_out_dir, 'stats.json') + stats_csv = os.path.join(test_case_out_dir, 'stats.csv') + + # Create directory + if not os.path.isdir(test_case_out_dir): + os.mkdir(test_case_out_dir) + + # Benchmark raster and flow files + benchmark_rast = (f'ahps_{lid}' if lid else self.benchmark_cat) + f'_huc_{self.huc}_extent_{magnitude}.tif' + benchmark_rast = os.path.join(self.benchmark_dir, lid, magnitude, benchmark_rast) + benchmark_flows = benchmark_rast.replace(f'_extent_{magnitude}.tif', f'_flows_{magnitude}.csv') + mask_dict_indiv = self.mask_dict.copy() + if self.is_ahps: # add domain shapefile to mask for AHPS sites + domain = os.path.join(self.benchmark_dir, lid, f'{lid}_domain.shp') + mask_dict_indiv.update({lid: + {'path': domain, + 'buffer': None, + 'operation': 'include'} + }) + # Check to make sure all relevant files exist + if not os.path.isfile(benchmark_rast) or not os.path.isfile(benchmark_flows) or (self.is_ahps and not os.path.isfile(domain)): + return -1 + + # Inundate REM + if not compute_only: # composite alpha tests don't need to be inundated + if model == 'GMS': + fh.vprint("Begin FIM4 Inundation", verbose) + map_file = Inundate_gms( hydrofabric_dir = os.path.dirname(self.fim_dir), + forecast = benchmark_flows, + num_workers = gms_workers, + hucs = self.huc, + inundation_raster = predicted_raster_path, + inundation_polygon = None, + depths_raster = None, + verbose = verbose, + log_file = None, + output_fileNames = None ) + #if (len(map_file) > 0): + fh.vprint("Begin FIM4 Mosaic", verbose) + Mosaic_inundation( map_file, + mosaic_attribute = 'inundation_rasters', + mosaic_output = predicted_raster_path, + mask = os.path.join(self.fim_dir,'wbd.gpkg'), + unit_attribute_name = 'huc8', + nodata = elev_raster_ndv, + workers = 1, + remove_inputs = True, + subset = None, + verbose = verbose ) + # FIM v3 and before + else: + fh.vprint("Begin FIM v3 (or earlier) Inundation", verbose) + inundate_result = inundate(self.rem, self.catchments, self.catchment_poly, self.hydro_table, benchmark_flows, + self.mask_type,hucs=self.hucs,hucs_layerName=self.hucs_layerName, + subset_hucs=self.huc,num_workers=1,aggregate=False, + inundation_raster=inundation_path,inundation_polygon=None, + depths=None,out_raster_profile=None,out_vector_profile=None, + quiet=True) + if inundate_result != 0: + return inundate_result + + # Create contingency rasters and stats + fh.vprint("Begin creating contingency rasters and stats", verbose) + if os.path.isfile(predicted_raster_path): + compute_contingency_stats_from_rasters( predicted_raster_path, + benchmark_rast, + agreement_raster, + stats_csv=stats_csv, + stats_json=stats_json, + mask_values=[], + stats_modes_list=self.stats_modes_list, + test_id=self.test_id, + mask_dict=mask_dict_indiv ) + return + + + @classmethod + def run_alpha_test(cls, version, test_id, magnitude, calibrated, model, archive_results=False, + mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, overwrite=True, verbose=False, gms_workers=1): + '''Class method for instantiating the test_case class and running alpha_test directly''' + + alpha_class = cls(test_id, version, archive_results) + alpha_class.alpha_test(calibrated, model, mask_type, inclusion_area, + inclusion_area_buffer, overwrite, verbose, gms_workers) + + def composite(self, version_2, calibrated = False, overwrite = True, verbose = False): + '''Class method for compositing MS and FR inundation and creating an agreement raster with stats + + Parameters + ---------- + version_2 : str + Version with which to composite. + calibrated : bool + Whether or not this FIM version is calibrated. + overwrite : bool + If True, overwites pre-existing test cases within the test_cases directory. + ''' + + if re.match(r'(.*)(_ms|_fr)', self.version): + composite_version_name = re.sub(r'(.*)(_ms|_fr)', r'\1_comp', self.version, count=1) + else: + composite_version_name = re.sub(r'(.*)(_ms|_fr)', r'\1_comp', version_2, count=1) -def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=False, archive_results=False, mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, overwrite=True): - - benchmark_category = test_id.split('_')[1] # Parse benchmark_category from test_id. - current_huc = test_id.split('_')[0] # Break off HUC ID and assign to variable. + fh.vprint(f"Begin composite for version : {composite_version_name}", verbose) - # Construct paths to development test results if not existent. - if archive_results: - version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'official_versions', version) - else: - version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'testing_versions', version) + composite_test_case = test_case(self.test_id, composite_version_name, self.archive) + input_test_case_2 = test_case(self.test_id, version_2, self.archive) + composite_test_case.stats_modes_list = ['total_area'] - # Delete the entire directory if it already exists. - if os.path.exists(version_test_case_dir_parent): - if overwrite == True: - shutil.rmtree(version_test_case_dir_parent) - else: - print("Metrics for ({version}: {test_id}) already exist. Use overwrite flag (-o) to overwrite metrics.".format(version=version, test_id=test_id)) + if not overwrite and os.path.isdir(composite_test_case.dir): return - os.mkdir(version_test_case_dir_parent) - - print("Running the alpha test for test_id: " + test_id + ", " + version + "...") - stats_modes_list = ['total_area'] - - fim_run_parent = os.path.join(os.environ['outputDataDir'], fim_run_dir) - assert os.path.exists(fim_run_parent), "Cannot locate " + fim_run_parent - - # Create paths to fim_run outputs for use in inundate(). - rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') - if not os.path.exists(rem): - rem = os.path.join(fim_run_parent, 'rem_clipped_zeroed_masked.tif') - catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') - if not os.path.exists(catchments): - catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_clipped_addedAttributes.tif') - if mask_type == 'huc': - catchment_poly = '' - else: - catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') - hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') - - # Map necessary inputs for inundation(). - hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' - - # Create list of shapefile paths to use as exclusion areas. - zones_dir = os.path.join(TEST_CASES_DIR, 'other', 'zones') - mask_dict = {'levees': - {'path': os.path.join(zones_dir, 'leveed_areas_conus.shp'), - 'buffer': None, - 'operation': 'exclude' - }, - 'waterbodies': - {'path': os.path.join(zones_dir, 'nwm_v2_reservoirs.shp'), - 'buffer': None, - 'operation': 'exclude', - }, - } - - if inclusion_area != '': - inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name - mask_dict.update({inclusion_area_name: {'path': inclusion_area, - 'buffer': int(inclusion_area_buffer), - 'operation': 'include'}}) - # Append the concatenated inclusion_area_name and buffer. - if inclusion_area_buffer == None: - inclusion_area_buffer = 0 - stats_modes_list.append(inclusion_area_name + '_b' + str(inclusion_area_buffer) + 'm') - - # Check if magnitude is list of magnitudes or single value. - magnitude_list = magnitude - if type(magnitude_list) != list: - magnitude_list = [magnitude_list] - - - # Get path to validation_data_{benchmark} directory and huc_dir. - validation_data_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category) - for magnitude in magnitude_list: - version_test_case_dir = os.path.join(version_test_case_dir_parent, magnitude) - if not os.path.exists(version_test_case_dir): - os.mkdir(version_test_case_dir) - # Construct path to validation raster and forecast file. - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - benchmark_raster_path_list, forecast_list = [], [] - lid_dir_list = os.listdir(os.path.join(validation_data_path, current_huc)) - lid_list, inundation_raster_list, domain_file_list = [], [], [] - - for lid in lid_dir_list: - lid_dir = os.path.join(validation_data_path, current_huc, lid) - benchmark_lid_raster_path = os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_extent_' + magnitude + '.tif') - - # Only compare if the benchmark data exist. - if os.path.exists(benchmark_lid_raster_path): - benchmark_raster_path_list.append(benchmark_lid_raster_path) # TEMP - forecast_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_flows_' + magnitude + '.csv')) # TEMP - lid_list.append(lid) - inundation_raster_list.append(os.path.join(version_test_case_dir, lid + '_inundation_extent.tif')) - domain_file_list.append(os.path.join(lid_dir, lid + '_domain.shp')) - - else: - benchmark_raster_file = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_extent_' + magnitude + '.tif') - benchmark_raster_path_list = [benchmark_raster_file] - forecast_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_flows_' + magnitude + '.csv') - forecast_list = [forecast_path] - inundation_raster_list = [os.path.join(version_test_case_dir, 'inundation_extent.tif')] - - for index in range(0, len(benchmark_raster_path_list)): - benchmark_raster_path = benchmark_raster_path_list[index] - forecast = forecast_list[index] - inundation_raster = inundation_raster_list[index] - # Only need to define ahps_lid and ahps_extent_file for AHPS_BENCHMARK_CATEGORIES. - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - ahps_lid = lid_list[index] - ahps_domain_file = domain_file_list[index] - mask_dict.update({ahps_lid: - {'path': ahps_domain_file, - 'buffer': None, - 'operation': 'include'} - }) - - - if not os.path.exists(benchmark_raster_path) or not os.path.exists(ahps_domain_file) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. - continue - else: # If not in AHPS_BENCHMARK_CATEGORIES. - if not os.path.exists(benchmark_raster_path) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. - continue - # Run inundate. -# print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") - try: - inundate_test = inundate( - rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, - depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True - ) - if inundate_test == 0: -# print("-----> Inundation mapping complete.") - predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - - # Define outputs for agreement_raster, stats_json, and stats_csv. - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - else: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - - compute_contingency_stats_from_rasters(predicted_raster_path, - benchmark_raster_path, - agreement_raster, - stats_csv=stats_csv, - stats_json=stats_json, - mask_values=[], - stats_modes_list=stats_modes_list, - test_id=test_id, - mask_dict=mask_dict, - ) - - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - del mask_dict[ahps_lid] - - print(" ") - print("Evaluation metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) - print(" ") - elif inundate_test == 1: - pass - print (f"No matching feature IDs between forecast and hydrotable for magnitude: {magnitude}") - #return - except Exception as e: - print(e) - - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - # -- Delete temp files -- # - # List all files in the output directory. - output_file_list = os.listdir(version_test_case_dir) - for output_file in output_file_list: - if "total_area" in output_file: - full_output_file_path = os.path.join(version_test_case_dir, output_file) - os.remove(full_output_file_path) - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') - parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) - parser.add_argument('-b', '--version',help='The name of the working version in which features are being tested',required=True,default="") - parser.add_argument('-t', '--test-id',help='The test_id to use. Format as: HUC_BENCHMARKTYPE, e.g. 12345678_ble.',required=True,default="") - parser.add_argument('-m', '--mask-type', help='Specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") - parser.add_argument('-y', '--magnitude',help='The magnitude to run.',required=False, default="") - parser.add_argument('-c', '--compare-to-previous', help='Compare to previous versions of HAND.', required=False,action='store_true') - parser.add_argument('-a', '--archive-results', help='Automatically copy results to the "previous_version" archive for test_id. For admin use only.', required=False,action='store_true') - parser.add_argument('-i', '--inclusion-area', help='Path to shapefile. Contingency metrics will be produced from pixels inside of shapefile extent.', required=False, default="") - parser.add_argument('-ib','--inclusion-area-buffer', help='Buffer to use when masking contingency metrics with inclusion area.', required=False, default="0") - parser.add_argument('-l', '--light-run', help='Using the light_run option will result in only stat files being written, and NOT grid files.', required=False, action='store_true') - parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, default=False, action='store_true') - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - valid_test_id_list = os.listdir(TEST_CASES_DIR) - - exit_flag = False # Default to False. - print() - - # Ensure test_id is valid. -# if args['test_id'] not in valid_test_id_list: -# print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided test_id (-t) " + CYAN_BOLD + args['test_id'] + WHITE_BOLD + " is not available." + ENDC) -# print(WHITE_BOLD + "Available test_ids include: " + ENDC) -# for test_id in valid_test_id_list: -# if 'validation' not in test_id.split('_') and 'ble' in test_id.split('_'): -# print(CYAN_BOLD + test_id + ENDC) -# print() -# exit_flag = True - - # Ensure fim_run_dir exists. - if not os.path.exists(os.path.join(os.environ['outputDataDir'], args['fim_run_dir'])): - print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided fim_run_dir (-r) " + CYAN_BOLD + args['fim_run_dir'] + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) - print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. outputs/123456/123456." + ENDC) - print() - exit_flag = True - - # Ensure inclusion_area path exists. - if args['inclusion_area'] != "" and not os.path.exists(args['inclusion_area']): - print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area (-i) " + CYAN_BOLD + args['inclusion_area'] + WHITE_BOLD + " could not be located." + ENDC) - exit_flag = True - - try: - inclusion_buffer = int(args['inclusion_area_buffer']) - except ValueError: - print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area_buffer (-ib) " + CYAN_BOLD + args['inclusion_area_buffer'] + WHITE_BOLD + " is not a round number." + ENDC) - - if args['magnitude'] == '': - if 'ble' in args['test_id'].split('_'): - args['magnitude'] = ['100yr', '500yr'] - elif 'nws' or 'usgs' in args['test_id'].split('_'): - args['magnitude'] = ['action', 'minor', 'moderate', 'major'] - else: - print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. ble options include: 100yr, 500yr. ahps options include action, minor, moderate, major." + ENDC) - exit_flag = True - - if exit_flag: - print() - sys.exit() + # Delete the directory if it exists + if os.path.exists(composite_test_case.dir): + shutil.rmtree(composite_test_case.dir) + + validation_data = composite_test_case.data(composite_test_case.huc) + for magnitude in validation_data: + for instance in validation_data[magnitude]: # instance will be the lid for AHPS sites and '' for other sites (ble/ifc/ras2fim) + inundation_prefix = instance + '_' if instance else '' + + input_inundation = os.path.join(self.dir, magnitude, f'{inundation_prefix}inundation_extent_{self.huc}.tif') + input_inundation_2 = os.path.join(input_test_case_2.dir, magnitude, f'{inundation_prefix}inundation_extent_{input_test_case_2.huc}.tif') + output_inundation = os.path.join(composite_test_case.dir, magnitude, f'{inundation_prefix}inundation_extent.tif') + + if os.path.isfile(input_inundation) and os.path.isfile(input_inundation_2): + inundation_map_file = pd.DataFrame({ + 'huc8' : [composite_test_case.huc] * 2, + 'branchID' : [None] * 2, + 'inundation_rasters' : [input_inundation,input_inundation_2], + 'depths_rasters' : [None] * 2, + 'inundation_polygons' : [None] * 2 + }) + os.makedirs(os.path.dirname(output_inundation), exist_ok=True) + + fh.vprint(f"Begin mosaic inundation for version : {composite_version_name}", verbose) + Mosaic_inundation(inundation_map_file,mosaic_attribute='inundation_rasters', + mosaic_output=output_inundation, mask=None, unit_attribute_name='huc8', + nodata=elev_raster_ndv, workers=1, remove_inputs=False, subset=None, verbose=False + ) + composite_test_case._inundate_and_compute(magnitude, instance, compute_only=True) + + elif os.path.isfile(input_inundation) or os.path.isfile(input_inundation_2): + # If only one model (MS or FR) has inundation, simply copy over all files as the composite + single_test_case = self if os.path.isfile(input_inundation) else input_test_case_2 + shutil.copytree(single_test_case.dir, re.sub(r'(.*)(_ms|_fr)', r'\1_comp', single_test_case.dir, count=1)) + composite_test_case.write_metadata(calibrated, 'COMP') + return + + # Clean up 'total_area' outputs from AHPS sites + if composite_test_case.is_ahps: + composite_test_case.clean_ahps_outputs(os.path.join(composite_test_case.dir, magnitude)) + + composite_test_case.write_metadata(calibrated, 'COMP') + + def write_metadata(self, calibrated, model): + '''Writes metadata files for a test_case directory.''' + with open(os.path.join(self.dir,'eval_metadata.json'),'w') as meta: + eval_meta = { 'calibrated' : calibrated , 'model' : model } + meta.write( + json.dumps(eval_meta,indent=2) + ) + def clean_ahps_outputs(self, magnitude_directory): + '''Cleans up `total_area` files from an input AHPS magnitude directory.''' + output_file_list = [os.path.join(magnitude_directory, of) for of in os.listdir(magnitude_directory)] + for output_file in output_file_list: + if "total_area" in output_file: + os.remove(output_file) + + def get_current_agreements(self): + '''Returns a list of all agreement rasters currently existing for the test_case.''' + agreement_list = [] + for mag in os.listdir(self.dir): + mag_dir = os.path.join(self.dir, mag) + if not os.path.isdir(mag_dir): continue + + for f in os.listdir(mag_dir): + if 'agreement.tif' in f: + agreement_list.append(os.path.join(mag_dir, f)) + return agreement_list - else: - run_alpha_test(**args) diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py index e49b1519d..21c5d47d4 100755 --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -1,12 +1,13 @@ #!/usr/bin/env python3 -import os -import argparse +import os, argparse, json, csv, ast, re, sys, traceback, signal +from datetime import datetime from multiprocessing import Pool -import json -import csv +from concurrent.futures import ProcessPoolExecutor, as_completed, wait +from tqdm import tqdm -from run_test_case import run_alpha_test +from utils.shared_functions import FIM_Helpers as fh +from run_test_case import test_case from tools_shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES, MAGNITUDE_DICT @@ -45,6 +46,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include 'FAR', 'TPR', 'TNR', + 'PND', 'PPV', 'NPV', 'ACC', @@ -68,6 +70,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include 'masked_area_km2' ] + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config'] + ["calibrated"]] @@ -78,16 +81,13 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include else: iteration_list = ['official'] - for benchmark_source in ['ble', 'nws', 'usgs', 'ifc']: + for benchmark_source in ['ble', 'nws', 'usgs', 'ifc','ras2fim']: benchmark_test_case_dir = os.path.join(TEST_CASES_DIR, benchmark_source + '_test_cases') - if benchmark_source in ['ble', 'ifc']: + test_cases_list = [d for d in os.listdir(benchmark_test_case_dir) if re.match('\d{8}_\w{3,7}', d)] + if benchmark_source in ['ble', 'ifc','ras2fim']: + + magnitude_list = MAGNITUDE_DICT[benchmark_source] - if benchmark_source == 'ble': - magnitude_list = MAGNITUDE_DICT['ble'] - if benchmark_source == 'ifc': - magnitude_list = MAGNITUDE_DICT['ifc'] - test_cases_list = os.listdir(benchmark_test_case_dir) - for test_case in test_cases_list: try: int(test_case.split('_')[0]) @@ -99,18 +99,21 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include if iteration == "official": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) + # add in composite of versions + composite_versions = [v.replace('_ms', '_comp') for v in versions_to_aggregate if '_ms' in v] + versions_to_aggregate += composite_versions if iteration == "comparison": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions') versions_to_aggregate = dev_versions_to_include_list for magnitude in magnitude_list: for version in versions_to_aggregate: - if '_fr' in version: - extent_config = 'FR' - elif '_ms' in version: + if '_ms' in version: extent_config = 'MS' - else: + elif ('_fr' in version) or (version == 'fim_2_3_3'): extent_config = 'FR' + else: + extent_config = 'COMP' if "_c" in version and version.split('_c')[1] == "": calibrated = "yes" else: @@ -139,7 +142,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include list_to_write.append(sub_list_to_append) except ValueError: pass - + if benchmark_source in AHPS_BENCHMARK_CATEGORIES: test_cases_list = os.listdir(benchmark_test_case_dir) @@ -154,18 +157,21 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include if iteration == "official": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) + # add in composite of versions + composite_versions = [v.replace('_ms', '_comp') for v in versions_to_aggregate if '_ms' in v] + versions_to_aggregate += composite_versions if iteration == "comparison": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions') versions_to_aggregate = dev_versions_to_include_list for magnitude in ['action', 'minor', 'moderate', 'major']: for version in versions_to_aggregate: - if '_fr' in version: - extent_config = 'FR' - elif '_ms' in version: + if '_ms' in version: extent_config = 'MS' - else: + elif ('_fr' in version) or (version == 'fim_2_3_3'): extent_config = 'FR' + else: + extent_config = 'COMP' if "_c" in version and version.split('_c')[1] == "": calibrated = "yes" else: @@ -204,69 +210,115 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include list_to_write.append(sub_list_to_append) except ValueError: pass - + with open(master_metrics_csv_output, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerows(list_to_write) +def progress_bar_handler(executor_dict, verbose, desc): -def process_alpha_test(args): - """ - This function is designed to be used in multiprocessing. It handles the calling of the run_alpha_test function. - - Args: - args (list): Formatted [fim_run_dir (str), version (str), test_id (str), magnitude (str), archive_results (bool), overwrite (bool)] - - """ - - - fim_run_dir = args[0] - version = args[1] - test_id = args[2] - magnitude = args[3] - archive_results = args[4] - overwrite = args[5] - - mask_type = 'huc' - - if archive_results == False: - compare_to_previous = True - else: - compare_to_previous = False - - try: - run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type, overwrite=overwrite) - except Exception as e: - print(e) + for future in tqdm(as_completed(executor_dict), + total=len(executor_dict), + disable=(not verbose), + desc=desc, + ): + try: + future.result() + except Exception as exc: + print('{}, {}, {}'.format(executor_dict[future],exc.__class__.__name__,exc)) if __name__ == '__main__': + # Sample usage: + ''' + === FOR (FIM 4) + python /foss_fim/tools/synthesize_test_cases.py -c DEV -e GMS -v gms_test_synth_combined -jh 2 -jb 40 -m /outputs/gms_test_synth_combined/gms_synth_metrics.csv -vg -o + + Notes: + - fim_input.csv MUST be in the folder suggested. + - the -v param is the name in the folder in the "outputs/" directory where the test hucs are at. + It also becomes the folder names inside the test_case folders when done. + - the -vg param may not be working (will be assessed better on later releases). + - Find a balance between -jh (number of jobs for hucs) versus -jb (number of jobs for branches) + on quick tests on a 96 core machine, we tried [1 @ 80], [2 @ 40], and [3 @ 25] (and others). + -jb 3 -jh 25 was noticably better. You can likely go more jb cores with better success, just + experiment. Start times, End Times and duration are now included. + - The -m can be any path and any name. + + To see your outputs in the test_case folder (hard coded path), you can check for outputs using + (cd .... to your test_case folder), then command becomes find . -name gms_test_* -type d (Notice the + the -name can be a wildcard for your -v param (or the whole -v value)) + If you want to delete the test outputs, test the outputs as suggest immediately above, but this time your + command becomes: find . -name gms_test_* -type d -exec rm -rdf {} + + ''' + ''' + === FOR FIM 3 + python /foss_fim/tools/synthesize_test_cases.py -c DEV -e MS -v dev_fim_3_0_29_1_ms -jh 4 -m /outputs/dev_fim_3_0_29_1_ms/alpha/alpha_master_metrics_fim_3_0_29_1_ms_src_adjust.csv -vg -o + + Notes: + - the -v param is the name in the folder in the "outputs/" directory where the test hucs are at. + It also becomes the folder names inside the test_case folders when done. + - the -vg param may not be working (will be assessed better on later releases). + - The -m can be any path and any name. + + To see your outputs in the test_case folder (hard coded path), you can check for outputs using + (cd .... to your test_case folder), then command becomes find . -name dev_fim_3_0_29_1_* -type d (Notice the + the -name can be a wildcard for your -v param (or the whole -v value)) + If you want to delete the test outputs, test the outputs as suggest immediately above, but this time your + command becomes: find . -name dev_fim_3_0_29_1_* -type d -exec rm -rdf {} + + ''' + # Parse arguments. parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') - parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=True) + parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=False,default='DEV') + parser.add_argument('-l','--calibrated',help='Denotes use of calibrated n values. This should be taken from meta-data from hydrofabric dir',required=False, default=False,action='store_true') + parser.add_argument('-e','--model',help='Denotes model used. FR, MS, or GMS allowed. This should be taken from meta-data in hydrofabric dir.', default='GMS', required=False) parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") - parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") + parser.add_argument('-jh','--job-number-huc',help='Number of processes to use for HUC scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) + parser.add_argument('-jb','--job-number-branch',help='Number of processes to use for Branch scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all") parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true") parser.add_argument('-dc', '--dev-version-to-compare', nargs='+', help='Specify the name(s) of a dev (testing) version to include in master metrics CSV. Pass a space-delimited list.',required=False) - parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=True) + parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=False,default=None) + parser.add_argument('-d','--fr-run-dir',help='Name of test case directory containing FIM for FR model',required=False,default=None) + parser.add_argument('-vr','--verbose',help='Verbose',required=False,default=None,action='store_true') + parser.add_argument('-vg','--gms-verbose',help='GMS Verbose Progress Bar',required=False,default=None,action='store_true') # Assign variables from arguments. args = vars(parser.parse_args()) config = args['config'] fim_version = args['fim_version'] - job_number = int(args['job_number']) + job_number_huc = args['job_number_huc'] + job_number_branch = args['job_number_branch'] special_string = args['special_string'] benchmark_category = args['benchmark_category'] overwrite = args['overwrite'] dev_versions_to_compare = args['dev_version_to_compare'] master_metrics_csv = args['master_metrics_csv'] - - if overwrite: - if input("Are you sure you want to overwrite metrics? y/n: ") == "n": - quit + fr_run_dir = args['fr_run_dir'] + calibrated = args['calibrated'] + model = args['model'] + verbose = bool(args['verbose']) + gms_verbose = bool(args['gms_verbose']) + + print("================================") + print("Start synthesize test cases") + start_time = datetime.now() + dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + print (f"started: {dt_string}") + print() + + # check job numbers + total_cpus_requested = job_number_huc * job_number_branch + total_cpus_available = os.cpu_count() - 1 + if total_cpus_requested > total_cpus_available: + raise ValueError('The HUC job number, {}, multiplied by the branch job number, {}, '\ + 'exceeds your machine\'s available CPU count minus one. '\ + 'Please lower the job_number_huc or job_number_branch'\ + 'values accordingly.'.format(job_number_huc,job_number_branch) + ) # Default to processing all possible versions in PREVIOUS_FIM_DIR. Otherwise, process only the user-supplied version. if fim_version != "all": @@ -285,73 +337,117 @@ def process_alpha_test(args): else: print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - # List all available benchmark categories and test_cases. - test_cases_dir_list = os.listdir(TEST_CASES_DIR) - benchmark_category_list = [] - if benchmark_category == "all": - for d in test_cases_dir_list: - if 'test_cases' in d: - benchmark_category_list.append(d.replace('_test_cases', '')) + # Create a list of all test_cases for which we have validation data + all_test_cases = test_case.list_all_test_cases(version = fim_version, archive = archive_results, + benchmark_categories=[] if benchmark_category == "all" else [benchmark_category]) + + # Set up multiprocessor + with ProcessPoolExecutor(max_workers=job_number_huc) as executor: + + ## Loop through all test cases, build the alpha test arguments, and submit them to the process pool + executor_dict = {} + for test_case_class in all_test_cases: + + if not os.path.exists(test_case_class.fim_dir): + continue + + fh.vprint(f"test_case_class.test_id is {test_case_class.test_id}", verbose) + + alpha_test_args = { + 'calibrated': calibrated, + 'model': model, + 'mask_type': 'huc', + 'overwrite': overwrite, + 'verbose':gms_verbose if model == 'GMS' else verbose, + 'gms_workers': job_number_branch + } + + try: + future = executor.submit(test_case_class.alpha_test, **alpha_test_args) + executor_dict[future] = test_case_class.test_id + except Exception as ex: + print(f"*** {ex}") + traceback.print_exc() + sys.exit(1) + + # Send the executor to the progress bar and wait for all MS tasks to finish + progress_bar_handler(executor_dict, True, f"Running {model} alpha test cases with {job_number_huc} workers") + #wait(executor_dict.keys()) + + ## Composite alpha test run is initiated by a MS `model` and providing a `fr_run_dir` + if model == 'MS' and fr_run_dir: + + ## Rebuild all test cases list with the FR version, loop through them and apply the alpha test + all_test_cases = test_case.list_all_test_cases(version = fr_run_dir, archive = archive_results, + benchmark_categories=[] if benchmark_category == "all" else [benchmark_category]) + + with ProcessPoolExecutor(max_workers=job_number_huc) as executor: + executor_dict = {} + for test_case_class in all_test_cases: + if not os.path.exists(test_case_class.fim_dir): + continue + alpha_test_args = { + 'calibrated': calibrated, + 'model': model, + 'mask_type': 'huc', + 'verbose':verbose, + 'overwrite': overwrite + } + try: + future = executor.submit(test_case_class.alpha_test, **alpha_test_args) + executor_dict[future] = test_case_class.test_id + except Exception as ex: + print(f"*** {ex}") + traceback.print_exc() + sys.exit(1) + + # Send the executor to the progress bar and wait for all FR tasks to finish + progress_bar_handler(executor_dict, True, f"Running FR test cases with {job_number_huc} workers") + #wait(executor_dict.keys()) + + # Loop through FR test cases, build composite arguments, and submit the composite method to the process pool + with ProcessPoolExecutor(max_workers=job_number_huc) as executor: + executor_dict = {} + for test_case_class in all_test_cases: + composite_args = { + 'version_2': fim_version, # this is the MS version name since `all_test_cases` are FR + 'calibrated': calibrated, + 'overwrite': overwrite, + 'verbose': verbose + } + + try: + future = executor.submit(test_case_class.alpha_test, **alpha_test_args) + executor_dict[future] = test_case_class.test_id + except Exception as ex: + print(f"*** {ex}") + traceback.print_exc() + sys.exit(1) + + # Send the executor to the progress bar + progress_bar_handler(executor_dict, verbose, f"Compositing test cases with {job_number_huc} workers") + + if dev_versions_to_compare != None: + dev_versions_to_include_list = dev_versions_to_compare + previous_fim_list else: - benchmark_category_list = [benchmark_category] + dev_versions_to_include_list = previous_fim_list - # Loop through benchmark categories. - procs_list = [] - for bench_cat in benchmark_category_list: - - # Map path to appropriate test_cases folder and list test_ids into bench_cat_id_list. - bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') - bench_cat_id_list = os.listdir(bench_cat_test_case_dir) - - # Loop through test_ids in bench_cat_id_list. - for test_id in bench_cat_id_list: - if 'validation' and 'other' not in test_id: - current_huc = test_id.split('_')[0] - if test_id.split('_')[1] in bench_cat: - # Loop through versions. - for version in previous_fim_list: - if config == 'DEV': - fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) - elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) - - # For previous versions of HAND computed at HUC6 scale - if not os.path.exists(fim_run_dir): - print(fim_run_dir) - if config == 'DEV': - fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc[:6]) - elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) - - if os.path.exists(fim_run_dir): - # If a user supplies a special_string (-s), then add it to the end of the created dirs. - if special_string != "": - version = version + '_' + special_string + if master_metrics_csv is not None: + # Do aggregate_metrics. + print("Creating master metrics CSV...") - - # Define the magnitude lists to use, depending on test_id. - benchmark_type = test_id.split('_')[1] - magnitude = MAGNITUDE_DICT[benchmark_type] - - # Either add to list to multiprocess or process serially, depending on user specification. - if job_number > 1: - procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) - else: - process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) - - # Multiprocess alpha test runs. - if job_number > 1: - with Pool(processes=job_number) as pool: - pool.map(process_alpha_test, procs_list) - - if config == 'DEV': - if dev_versions_to_compare != None: - dev_versions_to_include_list = dev_versions_to_compare + [version] - else: - dev_versions_to_include_list = [version] - if config == 'PREV': - dev_versions_to_include_list = [] + # this function is not compatible with GMS + create_master_metrics_csv(master_metrics_csv_output = master_metrics_csv, + dev_versions_to_include_list = dev_versions_to_include_list) + + print("================================") + print("End synthesize test cases") + + end_time = datetime.now() + dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + print (f"ended: {dt_string}") - # Do aggregate_metrics. - print("Creating master metrics CSV...") - create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv, dev_versions_to_include_list=dev_versions_to_include_list) + # calculate duration + time_duration = end_time - start_time + print(f"Duration: {str(time_duration).split('.')[0]}") + print() diff --git a/tools/test_case_by_hydro_id.py b/tools/test_case_by_hydro_id.py new file mode 100644 index 000000000..7ff336cd7 --- /dev/null +++ b/tools/test_case_by_hydro_id.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 + +# Import file management library +import os +# Import data analysis library +import pandas as pd +import geopandas as gpd +import argparse +from datetime import datetime + +from pixel_counter import zonal_stats +from tools_shared_functions import compute_stats_from_contingency_table +from run_test_case import test_case +from shapely.validation import make_valid + +##################################################### +# Perform zonal stats is a funtion stored in pixel_counter.py. +# The input huc_gpkg is a single huc8 geopackage, the second input argument must be input as a dict. +# For the purposes of assembling the alpha metrics by hydroid, always use agreement_raster total area agreement tiff. +# This function is called automatically. Returns stats dict of pixel counts. +##################################################### +def perform_zonal_stats(huc_gpkg,agree_rast): + stats = zonal_stats(huc_gpkg,{"agreement_raster":agree_rast}, nodata_value=10) + return stats + + +##################################################### +# Creates a pandas df containing Alpha stats by hydroid. +# Stats input is the output of zonal_stats function. +# Huc8 is the huc8 string and is passed via the directory loop during execution. +# Mag is the magnitude (100y, action, minor etc.) is passed via the directory loop. +# Bench is the benchmark source. +##################################################### +def assemble_hydro_alpha_for_single_huc(stats,huc8,mag,bench): + in_mem_df = pd.DataFrame(columns=['HydroID', 'huc8','contingency_tot_area_km2', + 'CSI', 'FAR', 'TPR', 'TNR', 'PPV', 'NPV', 'Bal_ACC', + 'MCC', 'EQUITABLE_THREAT_SCORE', 'PREVALENCE', 'BIAS', 'F1_SCORE', + 'masked_perc', 'MAG','BENCH']) + + for dicts in stats: + tot_pop = dicts['tn'] +dicts['fn'] + dicts['fp'] + dicts['tp'] + if tot_pop == 0: + continue + + stats_dictionary = compute_stats_from_contingency_table(dicts['tn'], dicts['fn'], dicts['fp'], dicts['tp'], cell_area=100, masked_count= dicts['mp']) + # Calls compute_stats_from_contingency_table from run_test_case.py + + hydroid = dicts['HydroID'] + stats_dictionary['HydroID'] = hydroid + + contingency_tot_area_km2 = float(stats_dictionary['contingency_tot_area_km2']) + if contingency_tot_area_km2 != 'NA': + contingency_tot_area_km2 = round(contingency_tot_area_km2,2) + + CSI = stats_dictionary['CSI'] + if CSI != 'NA': + CSI = round(CSI,2) + + FAR = stats_dictionary['FAR'] + if FAR != 'NA': + FAR = round(FAR,2) + + TPR = stats_dictionary['TPR'] + if TPR != 'NA': + TPR = round(TPR,2) + + TNR = stats_dictionary['TNR'] + if TNR != 'NA': + TNR = round(TNR,2) + + PPV = stats_dictionary['PPV'] + if PPV != 'NA': + PPV = round(PPV,2) + + NPV = stats_dictionary['NPV'] + if NPV != 'NA': + NPV = round(NPV,2) + + Bal_ACC = stats_dictionary['Bal_ACC'] + if Bal_ACC != 'NA': + Bal_ACC = round(Bal_ACC,2) + + MCC = float(stats_dictionary['MCC']) + if MCC != 'NA': + MCC = round(MCC,2) + + EQUITABLE_THREAT_SCORE = stats_dictionary['EQUITABLE_THREAT_SCORE'] + if EQUITABLE_THREAT_SCORE != 'NA': + EQUITABLE_THREAT_SCORE = round(EQUITABLE_THREAT_SCORE,2) + + PREVALENCE = stats_dictionary['PREVALENCE'] + if PREVALENCE != 'NA': + PREVALENCE = round(PREVALENCE,2) + + BIAS = stats_dictionary['BIAS'] + if BIAS != 'NA': + BIAS = round(BIAS,2) + + F1_SCORE = stats_dictionary['F1_SCORE'] + if F1_SCORE != 'NA': + F1_SCORE = round(F1_SCORE,2) + + masked_perc = stats_dictionary['masked_perc'] + if masked_perc != 'NA': + masked_perc = round(masked_perc,2) + + HydroID = stats_dictionary['HydroID'] + + dict_with_list_values = {'HydroID': [HydroID],'huc8':[huc8], 'contingency_tot_area_km2': [contingency_tot_area_km2], + 'CSI': [CSI], 'FAR': [FAR], 'TPR': [TPR], 'TNR': [TNR], 'PPV': [PPV], 'NPV': [NPV], + 'Bal_ACC': [Bal_ACC], 'MCC': [MCC], 'EQUITABLE_THREAT_SCORE': [EQUITABLE_THREAT_SCORE], 'PREVALENCE': [PREVALENCE], + 'BIAS': [BIAS], 'F1_SCORE': [F1_SCORE], 'masked_perc': [masked_perc], 'MAG':[mag],'BENCH':[bench]} + + dict_to_df = pd.DataFrame(dict_with_list_values,columns=['HydroID','huc8', 'contingency_tot_area_km2', + 'CSI', 'FAR', 'TPR', 'TNR', 'PPV', 'NPV', 'Bal_ACC', + 'MCC', 'EQUITABLE_THREAT_SCORE', 'PREVALENCE', 'BIAS', 'F1_SCORE', 'masked_perc', 'MAG','BENCH']) + + concat_list = [in_mem_df, dict_to_df] + in_mem_df = pd.concat(concat_list, sort=False) + + return in_mem_df + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Produces alpha metrics by hyrdoid.') + + parser.add_argument('-b', '--benchmark_category', + help='Choice of truth data. Options are: all, ble, ifc, nws, usgs, ras2fim', + required=True) + parser.add_argument('-v', '--version', + help='The fim version to use. Should be similar to fim_3_0_24_14_ms', + required=True) + parser.add_argument('-g', '--gpkg', + help='filepath and filename to hold exported gpkg (and csv) file. Similar to /data/path/fim_performance_catchments.gpkg Need to use gpkg as output. ', + required=True) + + + # Assign variables from arguments. + args = vars(parser.parse_args()) + benchmark_category = args['benchmark_category'] + version = args['version'] + csv = args['gpkg'] + + print("================================") + print("Start test_case_by_hydroid.py") + start_time = datetime.now() + dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + print (f"started: {dt_string}") + print() + + # Execution code + csv_output = gpd.GeoDataFrame(columns=['HydroID', 'huc8','contingency_tot_area_km2', + 'CSI', 'FAR', 'TPR', 'TNR', 'PPV', 'NPV', 'Bal_ACC', + 'MCC', 'EQUITABLE_THREAT_SCORE', 'PREVALENCE', 'BIAS', 'F1_SCORE', + 'masked_perc', 'MAG','BENCH','geometry'], geometry = 'geometry') + + # This funtion, relies on the test_case class defined in run_test_case.py to list all available test cases + print('listing_test_cases_with_updates') + all_test_cases = test_case.list_all_test_cases(version=version, archive=True, benchmark_categories=[] if benchmark_category == "all" else [benchmark_category]) + + for test_case_class in all_test_cases: + + if not os.path.exists(test_case_class.fim_dir): + print(f'{test_case_class.fim_dir} does not exist') + continue + + print(test_case_class.fim_dir, end='\r') + + agreement_dict = test_case_class.get_current_agreements() + + for agree_rast in agreement_dict: + + print(f'performing_zonal_stats for {agree_rast}') + + branches_dir = os.path.join(test_case_class.fim_dir,'branches') + for branches in os.listdir(branches_dir): + if branches != "0": + continue + huc_gpkg = os.path.join(branches_dir,branches,) + + string_manip = "gw_catchments_reaches_filtered_addedAttributes_crosswalked_" + branches + ".gpkg" + + huc_gpkg = os.path.join(huc_gpkg, string_manip) + + define_mag = agree_rast.split(version) + + define_mag_1 = define_mag[1].split('/') + + mag = define_mag_1[1] + + stats = perform_zonal_stats(huc_gpkg,agree_rast) + if stats == []: + continue + + print('assembling_hydroalpha_for_single_huc') + get_geom = gpd.read_file(huc_gpkg) + + get_geom['geometry'] = get_geom.apply(lambda row: make_valid(row.geometry), axis=1) + + in_mem_df = assemble_hydro_alpha_for_single_huc(stats, test_case_class.huc, mag, test_case_class.benchmark_cat) + + hydro_geom_df = get_geom[["HydroID", "geometry"]] + + geom_output = hydro_geom_df.merge(in_mem_df, on='HydroID', how ='inner') + + concat_df_list = [geom_output, csv_output] + + csv_output = pd.concat(concat_df_list, sort=False) + + print('projecting to 3857') + csv_output = csv_output.to_crs('EPSG:3857') + + print('manipulating the input string to exclude gpkg and include csv') + csv_path_list = csv.split(".") + csv_path = csv_path_list[0] + csv_path_dot = csv_path + ".csv" + + print('writing_to_gpkg') + csv_output.to_file(csv, driver="GPKG") + + # Add version information to csv_output dataframe + csv_output['version'] = version + + print('writing_to_csv') + csv_output.to_csv(csv_path_dot) # Save to CSV + + print("================================") + print("End test_case_by_hydroid.py") + + end_time = datetime.now() + dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + print (f"ended: {dt_string}") + + # calculate duration + time_duration = end_time - start_time + print(f"Duration: {str(time_duration).split('.')[0]}") + print() + + + \ No newline at end of file diff --git a/tools/tools_shared_functions.py b/tools/tools_shared_functions.py index 6cf0e7685..f1ad637ce 100755 --- a/tools/tools_shared_functions.py +++ b/tools/tools_shared_functions.py @@ -8,6 +8,7 @@ import requests import numpy as np import pathlib +import time from pathlib import Path import rasterio.shutil from rasterio.warp import calculate_default_transform, reproject, Resampling @@ -16,6 +17,51 @@ from shapely.geometry import shape from shapely.geometry import Polygon from shapely.geometry import MultiPolygon +from dotenv import load_dotenv + + +def get_env_paths(): + load_dotenv() + #import variables from .env file + API_BASE_URL = os.getenv("API_BASE_URL") + WBD_LAYER = os.getenv("WBD_LAYER") + return API_BASE_URL, WBD_LAYER + + +def filter_nwm_segments_by_stream_order(unfiltered_segments, desired_order, nwm_flows_df): + """ + This function uses the WRDS API to filter out NWM segments from a list if their stream order is different than + the target stream order. + + Args: + unfiltered_segments (list): A list of NWM feature_id strings. + desired_order (str): The desired stream order. + Returns: + filtered_segments (list): A list of NWM feature_id strings, paired down to only those that share the target order. + + """ + +# API_BASE_URL, WBD_LAYER = get_env_paths() + #Define workspace and wbd_path as a pathlib Path. Convert search distances to integer. +# metadata_url = f'{API_BASE_URL}/metadata' + + + + # feature ID of 0 is getting passed to WRDS and returns empty results, + # which can cause failures on next() +# if '0' in unfiltered_segments: +# unfiltered_segments = unfiltered_segments.remove('0') +# if unfiltered_segments is None: +# return filtered_segments + + filtered_segments = [] + + for feature_id in unfiltered_segments: + stream_order = nwm_flows_df.loc[nwm_flows_df['ID'] == int(feature_id), 'order_'].values[0] + if stream_order == desired_order: + filtered_segments.append(feature_id) + + return filtered_segments def check_for_regression(stats_json_to_test, previous_version, previous_version_stats_json_path, regression_test_csv=None): @@ -291,6 +337,11 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ except ZeroDivisionError: F1_score = "NA" + if TPR == 'NA': + PND = 'NA' + else: + PND = 1.0 - TPR # Probability Not Detected (PND) + stats_dictionary = {'true_negatives_count': int(true_negatives), 'false_negatives_count': int(false_negatives), 'true_positives_count': int(true_positives), @@ -314,6 +365,7 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ 'FAR': FAR, 'TPR': TPR, 'TNR': TNR, + 'PND': PND, 'PPV': PPV, 'NPV': NPV, @@ -499,6 +551,7 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r operation = mask_dict[poly_layer]['operation'] if operation == 'include': + poly_path = mask_dict[poly_layer]['path'] buffer_val = mask_dict[poly_layer]['buffer'] @@ -606,6 +659,8 @@ def get_metadata(metadata_url, select_by, selector, must_include = None, upstrea params['downstream_trace_distance'] = downstream_trace_distance #Request data from url response = requests.get(url, params = params) +# print(response) +# print(url) if response.ok: #Convert data response to a json metadata_json = response.json() @@ -678,13 +733,16 @@ def aggregate_wbd_hucs(metadata_list, wbd_huc8_path, retain_attributes = False): ''' #Import huc8 layer as geodataframe and retain necessary columns + print("Reading WBD...") huc8 = gpd.read_file(wbd_huc8_path, layer = 'WBDHU8') + print("WBD read.") huc8 = huc8[['HUC8','name','states', 'geometry']] #Define EPSG codes for possible latlon datum names (default of NAD83 if unassigned) crs_lookup ={'NAD27':'EPSG:4267', 'NAD83':'EPSG:4269', 'WGS84': 'EPSG:4326'} #Create empty geodataframe and define CRS for potential horizontal datums metadata_gdf = gpd.GeoDataFrame() #Iterate through each site + print("Iterating through metadata list...") for metadata in metadata_list: #Convert metadata to json df = pd.json_normalize(metadata) @@ -694,6 +752,7 @@ def aggregate_wbd_hucs(metadata_list, wbd_huc8_path, retain_attributes = False): df.dropna(subset = ['identifiers_nws_lid','usgs_preferred_latitude', 'usgs_preferred_longitude'], inplace = True) #If dataframe still has data if not df.empty: +# print(df[:5]) #Get horizontal datum h_datum = df['usgs_preferred_latlon_datum_name'].item() #Look up EPSG code, if not returned Assume NAD83 as default. @@ -709,14 +768,14 @@ def aggregate_wbd_hucs(metadata_list, wbd_huc8_path, retain_attributes = False): site_gdf = site_gdf.to_crs(huc8.crs) #Append site geodataframe to metadata geodataframe metadata_gdf = metadata_gdf.append(site_gdf, ignore_index = True) - + #Trim metadata to only have certain fields. if not retain_attributes: metadata_gdf = metadata_gdf[['identifiers_nwm_feature_id', 'identifiers_nws_lid', 'identifiers_usgs_site_code', 'geometry']] #If a list of attributes is supplied then use that list. - elif isinstance(retain_attributes,list): - metadata_gdf = metadata_gdf[retain_attributes] - +# elif isinstance(retain_attributes,list): +# metadata_gdf = metadata_gdf[retain_attributes] + print("Performing spatial and tabular operations on geodataframe...") #Perform a spatial join to get the WBD HUC 8 assigned to each AHPS joined_gdf = gpd.sjoin(metadata_gdf, huc8, how = 'inner', op = 'intersects', lsuffix = 'ahps', rsuffix = 'wbd') joined_gdf = joined_gdf.drop(columns = 'index_wbd') @@ -906,7 +965,10 @@ def get_thresholds(threshold_url, select_by, selector, threshold = 'all'): flows['usgs_site_code'] = threshold_data.get('metadata').get('usgs_site_code') stages['units'] = threshold_data.get('metadata').get('stage_units') flows['units'] = threshold_data.get('metadata').get('calc_flow_units') - return stages, flows + return stages, flows + else: + print("WRDS response error: ") +# print(response) ######################################################################## # Function to write flow file @@ -1064,7 +1126,7 @@ def ngvd_to_navd_ft(datum_info, region = 'contiguous'): lon = datum_info['lon'] #Define url for datum API - datum_url = 'https://vdatum.noaa.gov/vdatumweb/api/tidal' + datum_url = 'https://vdatum.noaa.gov/vdatumweb/api/convert' #Define parameters. Hard code most parameters to convert NGVD to NAVD. params = {} @@ -1080,16 +1142,18 @@ def ngvd_to_navd_ft(datum_info, region = 'contiguous'): #Call the API response = requests.get(datum_url, params = params) - #If succesful get the navd adjustment + + #If successful get the navd adjustment if response: results = response.json() #Get adjustment in meters (NGVD29 to NAVD88) - adjustment = results['tar_height'] + adjustment = results['t_z'] #convert meters to feet adjustment_ft = round(float(adjustment) * 3.28084,2) else: adjustment_ft = None - return adjustment_ft + return adjustment_ft + ####################################################################### #Function to download rating curve from API ####################################################################### @@ -1114,6 +1178,7 @@ def get_rating_curve(rating_curve_url, location_ids): #Define DataFrame to contain all returned curves. all_curves = pd.DataFrame() + print(location_ids) #Define call to retrieve all rating curve information from WRDS. joined_location_ids = '%2C'.join(location_ids) url = f'{rating_curve_url}/{joined_location_ids}' @@ -1447,4 +1512,67 @@ def process_grid(benchmark, benchmark_profile, domain, domain_profile, reference profile.update(nodata = new_nodata_value) profile.update (width = new_benchmark_width) profile.update(height = new_benchmark_height) - return classified_benchmark_projected, profile \ No newline at end of file + + return classified_benchmark_projected, profile + + +def calculate_metrics_from_agreement_raster(agreement_raster): + + ''' Calculates metrics from an agreement raster ''' + + agreement_encoding_digits_to_names = { 0: "TN", + 1: "FN", + 2: "FP", + 3: "TP" + } + + + if isinstance(agreement_raster,rasterio.DatasetReader): + pass + elif isinstance(agreement_raster,str): + agreement_raster = rasterio.open(agreement_raster) + else: + raise TypeError(f"{agreement_raster} is not a Rasterio Dataset Reader or a filepath to a raster") + + # cycle through blocks + totals = dict.from_keys(list(range(4)),0) + for idx,wind in agreement_raster.block_windows(1): + window_data = agreement_raster.read(1,window=wind) + values, counts = np.unique(window_data,return_counts=True) + for val,cts in values_counts: + totals[val] += cts + + results = dict() + for digit,count in totals.items(): + results[agreement_encoding_digits_to_names[digit]] = count + + return(results) + + +# evaluation metric fucntions + +def csi(TP,FP,FN,TN=None): + + ''' Critical Success Index ''' + + return TP / (FP + FN + TP) + + +def tpr(TP,FP,FN,TN=None): + + ''' True Positive Rate ''' + + return TP / (TP + FN) + + +def far(TP,FP,FN,TN=None): + + ''' False Alarm Rate ''' + + return FP / (TP + FP) + + +def mcc(TP,FP,FN,TN=None): + + ''' Matthew's Correlation Coefficient ''' + return (TP*TN - FP*FN) / np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)) diff --git a/tools/tools_shared_variables.py b/tools/tools_shared_variables.py index 0c31a6fb8..da86b828c 100755 --- a/tools/tools_shared_variables.py +++ b/tools/tools_shared_variables.py @@ -10,11 +10,12 @@ BLE_MAGNITUDE_LIST = ['100yr', '500yr'] IFC_MAGNITUDE_LIST = ['2yr', '5yr', '10yr', '25yr', '50yr', '100yr', '200yr', '500yr'] AHPS_MAGNITUDE_LIST = ['action', 'minor', 'moderate', 'major'] +RAS2FIM_MAGNITUDE_LIST = ['2yr', '5yr', '10yr', '25yr', '50yr', '100yr'] -MAGNITUDE_DICT = {'ble': BLE_MAGNITUDE_LIST, 'ifc': IFC_MAGNITUDE_LIST, 'usgs': AHPS_MAGNITUDE_LIST, 'nws': AHPS_MAGNITUDE_LIST} +MAGNITUDE_DICT = {'ble': BLE_MAGNITUDE_LIST, 'ifc': IFC_MAGNITUDE_LIST, 'usgs': AHPS_MAGNITUDE_LIST, 'nws': AHPS_MAGNITUDE_LIST, 'ras2fim': RAS2FIM_MAGNITUDE_LIST} PRINTWORTHY_STATS = ['CSI', 'TPR', 'TNR', 'FAR', 'MCC', 'TP_area_km2', 'FP_area_km2', 'TN_area_km2', 'FN_area_km2', 'contingency_tot_area_km2', 'TP_perc', 'FP_perc', 'TN_perc', 'FN_perc'] GO_UP_STATS = ['CSI', 'TPR', 'MCC', 'TN_area_km2', 'TP_area_km2', 'TN_perc', 'TP_perc', 'TNR'] -GO_DOWN_STATS = ['FAR', 'FN_area_km2', 'FP_area_km2', 'FP_perc', 'FN_perc'] +GO_DOWN_STATS = ['FAR', 'FN_area_km2', 'FP_area_km2', 'FP_perc', 'FN_perc', 'PND'] # Variables for eval_plots.py BAD_SITES = [ @@ -60,3 +61,16 @@ TWHITE = '\033[37m' WHITE_BOLD = '\033[37;1m' CYAN_BOLD = '\033[36;1m' + +# USGS gages acceptance criteria. Likely not constants, so not using all caps. +# ANY CHANGE TO THESE VALUES SHOULD WARRANT A CODE VERSION CHANGE +# https://help.waterdata.usgs.gov/code/coord_acy_cd_query?fmt=html +acceptable_coord_acc_code_list = ['H','1','5','S','R','B','C','D','E', 5, 1] +# https://help.waterdata.usgs.gov/code/coord_meth_cd_query?fmt=html +acceptable_coord_method_code_list = ['C','D','W','X','Y','Z','N','M','L','G','R','F','S'] +# https://help.waterdata.usgs.gov/codes-and-parameters/codes#SI +acceptable_alt_acc_thresh = 1.0 +# https://help.waterdata.usgs.gov/code/alt_meth_cd_query?fmt=html +acceptable_alt_meth_code_list = ['A','D','F','I','J','L','N','R','W','X','Y','Z'] +# https://help.waterdata.usgs.gov/code/site_tp_query?fmt=html +acceptable_site_type_list = ['ST'] diff --git a/tools/vary_mannings_n_composite.py b/tools/vary_mannings_n_composite.py new file mode 100755 index 000000000..450734d18 --- /dev/null +++ b/tools/vary_mannings_n_composite.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 +import datetime as dt +import re +import os +import sys +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +import shutil +import traceback +import warnings +from functools import reduce +from multiprocessing import Pool +from os.path import isfile, join, dirname, isdir +from pathlib import Path +from tqdm import tqdm + +sns.set_theme(style="whitegrid") +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Vary the Manning's n values for in-channel vs. floodplain + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + channel_ratio_src_column : str + SRC attribute containing the channel vs. floodplain attribute + mann_n_table : str + Path to a csv file containing Manning's n values by feature_id + file_suffix : str + Suffix to append to the output log file + number_of_jobs : str + Number of jobs. + src_plot_option : str + Optional (True or False): use this flag to crate src plots for all hydroids +""" + +def variable_mannings_calc(args): + + in_src_bankfull_filename = args[0] + channel_ratio_src_column = args[1] + df_mann = args[2] + huc = args[3] + branch_id = args[4] + htable_filename = args[5] + output_suffix = args[6] + src_plot_option = args[7] + huc_output_dir = args[8] + + ## Read the src_full_crosswalked.csv + print('Calculating variable roughness: ' + str(huc) + ' branch id: ' + str(branch_id)) + log_text = 'Calculating variable roughness: ' + str(huc) + ' branch id: ' + str(branch_id) + '\n' + df_src = pd.read_csv(in_src_bankfull_filename,dtype={'feature_id': 'int64'}) + + ## Check that the channel ratio column the user specified exists in the def + if channel_ratio_src_column not in df_src.columns: + print('WARNING --> ' + str(huc) + ' branch id: ' + str(branch_id) + in_src_bankfull_filename + ' does not contain the specified channel ratio column: ' + channel_ratio_src_column) + print('Skipping --> ' + str(huc) + ' branch id: ' + str(branch_id)) + log_text += 'WARNING --> ' + str(huc) + ' branch id: ' + str(branch_id) + in_src_bankfull_filename + ' does not contain the specified channel ratio column: ' + channel_ratio_src_column + '\n' + else: + try: + if 'comp_ManningN' in df_src.columns: + df_src.drop(['channel_n','overbank_n','comp_ManningN','vmann_on','Discharge (m3s-1)_varMann'], axis=1, inplace=True) # drop these cols (in case vmann was previously performed) + + ## Merge (crosswalk) the df of Manning's n with the SRC df (using the channel/fplain delination in the channel_ratio_src_column) + df_src = df_src.merge(df_mann, how='left', on='feature_id') + check_null = df_src['channel_n'].isnull().sum() + df_src['overbank_n'].isnull().sum() + if check_null > 0: + log_text += str(huc) + ' branch id: ' + str(branch_id) + ' --> ' + 'Null feature_ids found in crosswalk btw roughness dataframe and src dataframe' + ' --> missing entries= ' + str(check_null/84) + '\n' + + ## Calculate composite Manning's n using the channel geometry ratio attribute given by user (e.g. chann_hradius_ratio or chann_vol_ratio) + df_src['comp_ManningN'] = (df_src[channel_ratio_src_column]*df_src['channel_n']) + ((1.0 - df_src[channel_ratio_src_column])*df_src['overbank_n']) + #print('Done calculating composite Manning n (' + channel_ratio_src_column + '): ' + str(huc)) + + ## Check if there are any missing data in the composite ManningN column + check_null_comp = df_src['comp_ManningN'].isnull().sum() + if check_null_comp > 0: + log_text += str(huc) + ' branch id: ' + str(branch_id) + ' --> ' + 'Missing values in the comp_ManningN calculation' + ' --> missing entries= ' + str(check_null_comp/84) + '\n' + df_src['vmann_on'] = np.where(df_src['comp_ManningN'].isnull(), False, True) # create field to identify where vmann is applied (True=yes; False=no) + + ## Define the channel geometry variable names to use from the src + hydr_radius = 'HydraulicRadius (m)' + wet_area = 'WetArea (m2)' + + ## Calculate Q using Manning's equation + #df_src.rename(columns={'Discharge (m3s-1)'}, inplace=True) # rename the previous Discharge column + df_src['Discharge (m3s-1)_varMann'] = df_src[wet_area]* \ + pow(df_src[hydr_radius],2.0/3)* \ + pow(df_src['SLOPE'],0.5)/df_src['comp_ManningN'] + + ## Set Q values to 0 and -999 for specified criteria (thalweg notch check happens in BARC) + # df_src['Discharge (m3s-1)_varMann'].mask(df_src['Stage'] == 0,0,inplace=True) + # if 'Thalweg_burn_elev' in df_src: + # df_src['Discharge (m3s-1)_varMann'].mask(df_src['Stage'] == df_src['Thalweg_burn_elev'],0,inplace=True) + # df_src['Discharge (m3s-1)_varMann'].mask(df_src['Stage'] < df_src['Thalweg_burn_elev'],-999,inplace=True) + + ## Use the default discharge column when vmann is not being applied + df_src['Discharge (m3s-1)_varMann'] = np.where(df_src['vmann_on']==False, df_src['Discharge (m3s-1)'], df_src['Discharge (m3s-1)_varMann']) # reset the discharge value back to the original if vmann=false + df_src['comp_ManningN'] = np.where(df_src['vmann_on']==False, df_src['ManningN'], df_src['comp_ManningN']) # reset the ManningN value back to the original if vmann=false + + ## Output new SRC with bankfull column + df_src.to_csv(in_src_bankfull_filename,index=False) + + ## Output new hydroTable with updated discharge and ManningN column + df_src_trim = df_src[['HydroID','Stage','vmann_on','comp_ManningN','Discharge (m3s-1)_varMann']] + df_src_trim = df_src_trim.rename(columns={'Stage':'stage','comp_ManningN':'vmann_ManningN','Discharge (m3s-1)_varMann': 'vmann_discharge_cms'}) + df_src_trim['ManningN'] = df_src_trim['vmann_ManningN'] # create a copy of vmann modified ManningN (used to track future changes) + df_src_trim['discharge_cms'] = df_src_trim['vmann_discharge_cms'] # create a copy of vmann modified discharge (used to track future changes) + df_htable = pd.read_csv(htable_filename,dtype={'HUC': str}) + + ## Check if BARC ran + # if not set(['orig_discharge_cms']).issubset(df_htable.columns): + # df_htable.rename(columns={'discharge_cms':'orig_discharge_cms'},inplace=True) + # df_htable.rename(columns={'ManningN':'orig_ManningN'},inplace=True) + # else: + + ## drop the previously modified discharge column to be replaced with updated version + df_htable.drop(['vmann_on','discharge_cms','ManningN','vmann_discharge_cms','vmann_ManningN'], axis=1, inplace=True) + df_htable = df_htable.merge(df_src_trim, how='left', left_on=['HydroID','stage'], right_on=['HydroID','stage']) + + df_htable['vmann_on'] = np.where(df_htable['LakeID']>0, False, df_htable['vmann_on']) # reset the ManningN value back to the original if vmann=false + + ## Output new hydroTable csv + if output_suffix != "": + htable_filename = os.path.splitext(htable_filename)[0] + output_suffix + '.csv' + df_htable.to_csv(htable_filename,index=False) + + log_text += 'Completed: ' + str(huc) + + ## plot rating curves + if src_plot_option: + if isdir(huc_output_dir) == False: + os.mkdir(huc_output_dir) + generate_src_plot(df_src, huc_output_dir) + except Exception as ex: + summary = traceback.StackSummary.extract( + traceback.walk_stack(None)) + print(str(huc) + ' branch id: ' + str(branch_id) + " failed for some reason") + print(f"*** {ex}") + print(''.join(summary.format())) + log_text += 'ERROR --> ' + str(huc) + ' branch id: ' + str(branch_id) + " failed (details: " + (f"*** {ex}") + (''.join(summary.format())) + '\n' + + return(log_text) + +def generate_src_plot(df_src, plt_out_dir): + + ## create list of unique hydroids + hydroids = df_src.HydroID.unique().tolist() + + ## plot each hydroid SRC in the huc + for hydroid in hydroids: + print("Creating SRC plot: " + str(hydroid)) + plot_df = df_src.loc[df_src['HydroID'] == hydroid] + + f, ax = plt.subplots(figsize=(6.5, 6.5)) + ax.set_title(str(hydroid)) + sns.despine(f, left=True, bottom=True) + sns.scatterplot(x='Discharge (m3s-1)', y='Stage', data=plot_df, label="Orig SRC", ax=ax, color='blue') + sns.scatterplot(x='Discharge (m3s-1)_varMann', y='Stage', data=plot_df, label="SRC w/ vMann", ax=ax, color='orange') + sns.lineplot(x='Discharge (m3s-1)', y='Stage_bankfull', data=plot_df, color='green', ax=ax) + plt.fill_between(plot_df['Discharge (m3s-1)'], plot_df['Stage_bankfull'],alpha=0.5) + plt.text(plot_df['Discharge (m3s-1)'].median(), plot_df['Stage_bankfull'].median(), "NWM Bankfull Approx: " + str(plot_df['Stage_bankfull'].median())) + ax.legend() + plt.savefig(plt_out_dir + os.sep + str(hydroid) + '_vmann.png',dpi=175, bbox_inches='tight') + plt.close() + +# for hydroid in hydroids: +# print("Creating SRC plot: " + str(hydroid)) +# plot_df = df_src.loc[df_src['HydroID'] == hydroid] +# +# f, ax = plt.subplots(figsize=(6.5, 6.5)) +# ax.set_title(str(hydroid)) +# sns.despine(f, left=True, bottom=True) +# sns.scatterplot(x='comp_ManningN', y='Stage', data=plot_df, label="Orig SRC", ax=ax, color='blue') +# #sns.scatterplot(x='Discharge (m3s-1)_varMann', y='Stage', data=plot_df, label="SRC w/ vMann", ax=ax, color='orange') +# sns.lineplot(x='comp_ManningN', y='Stage_1_5', data=plot_df, color='green', ax=ax) +# plt.fill_between(plot_df['comp_ManningN'], plot_df['Stage_1_5'],alpha=0.5) +# plt.text(plot_df['comp_ManningN'].median(), plot_df['Stage_1_5'].median(), "NWM 1.5yr: " + str(plot_df['Stage_1_5'].median())) +# ax.legend() +# plt.savefig(plt_out_dir + os.sep + str(hydroid) + '.png',dpi=175, bbox_inches='tight') +# plt.close() + +def multi_process(variable_mannings_calc, procs_list, log_file, verbose): + ## Initiate multiprocessing + print(f"Applying variable Manning's n to SRC calcs for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + if verbose: + map_output = tqdm(pool.imap(variable_mannings_calc, procs_list), total=len(procs_list)) + tuple(map_output) # fetch the lazy results + else: + map_output = pool.map(variable_mannings_calc, procs_list) + log_file.writelines(["%s\n" % item for item in map_output]) + +def run_prep(fim_dir,channel_ratio_src_column,mann_n_table,output_suffix,number_of_jobs,verbose,src_plot_option): + procs_list = [] + + print('Writing progress to log file here: ' + str(join(fim_dir,'log_composite_n' + output_suffix + '.log'))) + print('This may take a few minutes...') + ## Create a time var to log run time + begin_time = dt.datetime.now() + + ## initiate log file + log_file = open(join(fim_dir,'logs','log_composite_n' + output_suffix + '.log'),"w") + log_file.write('START TIME: ' + str(begin_time) + '\n') + log_file.write('#########################################################\n\n') + + ## Check that the input fim_dir exists + assert os.path.isdir(fim_dir), 'ERROR: could not find the input fim_dir location: ' + str(fim_dir) + ## Check that the manning's roughness input filepath exists and then read to dataframe + assert os.path.isfile(mann_n_table), 'Can not find the input roughness/feature_id file: ' + str(mann_n_table) + + ## Read the Manning's n csv (ensure that it contains feature_id, channel mannings, floodplain mannings) + print('Importing the Manning roughness data file: ' + mann_n_table) + df_mann = pd.read_csv(mann_n_table,dtype={'feature_id': 'int64'}) + if 'channel_n' not in df_mann.columns or 'overbank_n' not in df_mann.columns or 'feature_id' not in df_mann.columns: + print('Missing required data column ("feature_id","channel_n", and/or "overbank_n")!!! --> ' + df_mann) + else: + print('Running the variable_mannings_calc function...') + + ## Loop through hucs in the fim_dir and create list of variables to feed to multiprocessing + huc_list = os.listdir(fim_dir) + for huc in huc_list: + #if huc != 'logs' and huc[-3:] != 'log' and huc[-4:] != '.csv': + if re.match('\d{8}', huc): + huc_branches_dir = os.path.join(fim_dir, huc,'branches') + for branch_id in os.listdir(huc_branches_dir): + branch_dir = os.path.join(huc_branches_dir,branch_id) + in_src_bankfull_filename = join(branch_dir,'src_full_crosswalked_' + branch_id + '.csv') + htable_filename = join(branch_dir,'hydroTable_' + branch_id + '.csv') + huc_plot_output_dir = join(branch_dir,'src_plots') + + if isfile(in_src_bankfull_filename) and isfile(htable_filename): + procs_list.append([in_src_bankfull_filename, channel_ratio_src_column, df_mann, huc, branch_id, htable_filename, output_suffix, src_plot_option, huc_plot_output_dir]) + else: + print('HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + '\nWARNING --> can not find required file (src_full_crosswalked_bankfull_*.csv or hydroTable_*.csv) in the fim output dir: ' + str(branch_dir) + ' - skipping this branch!!!\n') + log_file.write('HUC: ' + str(huc) + ' branch id: ' + str(branch_id) + '\nWARNING --> can not find required file (src_full_crosswalked_bankfull_*.csv or hydroTable_*.csv) in the fim output dir: ' + str(branch_dir) + ' - skipping this branch!!!\n') + + ## Pass huc procs_list to multiprocessing function + multi_process(variable_mannings_calc, procs_list, log_file, verbose) + + ## Record run time and close log file + end_time = dt.datetime.now() + log_file.write('END TIME: ' + str(end_time) + '\n') + tot_run_time = end_time - begin_time + log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) + log_file.close() + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Vary the Manning's n values for in-channel vs. floodplain (recalculate Manning's eq for Discharge)") + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-bc','--channel-ratio-src-column',help='SRC attribute containing the channel vs. overbank geometry ratio (for composite calc)',required=False,type=str,default='chann_hradius_ratio') + parser.add_argument('-mann','--mann-n-table',help="Path to a csv file containing Manning's n values by featureid",required=True,type=str) + parser.add_argument('-suff','--output-suffix',help="Suffix to append to the output log file (e.g. '_global_06_011')",default="",required=False,type=str) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + parser.add_argument('-vb','--verbose',help='Optional verbose progress bar',required=False,default=None,action='store_true') + parser.add_argument('-plots','--src-plot-option',help='OPTIONAL flag: use this flag to create src plots for all hydroids. WARNING - long runtime',default=False,required=False, action='store_true') + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + channel_ratio_src_column = args['channel_ratio_src_column'] + mann_n_table = args['mann_n_table'] + output_suffix = args['output_suffix'] + number_of_jobs = args['number_of_jobs'] + verbose = bool(args['verbose']) + src_plot_option = args['src_plot_option'] + + run_prep(fim_dir,channel_ratio_src_column,mann_n_table,output_suffix,number_of_jobs,verbose,src_plot_option) \ No newline at end of file diff --git a/unit_tests/README.md b/unit_tests/README.md new file mode 100644 index 000000000..1bf2435c1 --- /dev/null +++ b/unit_tests/README.md @@ -0,0 +1,126 @@ +## Inundation Mapping: Flood Inundation Mapping for U.S. National Water Model + +Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Water Center (NWC). + +#### For more information, see the [Inundation Mapping Wiki](https://github.com/NOAA-OWP/inundation-mapping/wiki). + +# This folder (`/unit_tests`) holds files for unit testing python files + +## Creating unit tests + +For each python code file that is being tested, unit tests should come in two files: a unit test file (based on the original python code file) and an accompanying json paramerters file. + +The files should be named following FIM convention: + +{source py file name}_test.py -> `derive_level_paths_test.py` +{source py file name}_params.json -> `derive_level_paths_params.json` + + +## Tips to create a new json file for a new python unit test file. + +There are multiple way to figure out a set of default json parameters for the new unit test file. + +One way is to use the incoming arg parser. Most python files include the code block of ` __name__ == '__main__':`, followed by external arg parsing (`args = vars(parser.parse_args()`). +* Add a `print(args)` or similar, and get all the values including keys as output. +* Copy that into an editor being used to create the json file. +* Add a line break after every comma. +* Find/replace all single quotes to double quotes then cleanup the left tab formatting. + + +## Running unit tests + +Start a docker container as you normally would for any development. +```bash +docker run --rm -it --name -v /home//projects//:/foss_fim {your docker image name} +``` +Example: +```bash +docker run --rm -it --name mytest -v /home/abcd/projects/dev/innudation-mapping/:/foss_fim -v /abcd_share/foss_fim/outputs/:/outputs -v /abcs_share/foss_fim/:/data fim_4:dev_20220208_8eba0ee +``` + +For unit tests to work, you need to run the following (if not already in place). +Notice a modified branch "deny_branch_unittests.lst" (special for unittests) + +Here are the params and args you need if you need to re-run unit and branch + +```bash +fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u "02020005 05030104" -bd /foss_fim/config/deny_branch_unittests.lst -ud None -j 1 -o +``` + +**NOTICE: the deny file used for fim_pipeline.sh, has a special one for unittests `deny_branch_unittests.lst`. + +If you need to run inundation tests, fun the following: + +```bash +python3 foss_fim/tools/synthesize_test_cases.py -c DEV -v fim_unit_test_data_do_not_remove -jh 1 -jb 1 -m /outputs/fim_unit_test_data_do_not_remove/alpha_test_metrics.csv -o +``` +### If you'd like to test the whole unit test suite: +``` +pytest /foss_fim/unit_tests +``` + +This is not 100% stable, as accurate paths for the parameters `.json` files are not included in this repository, are not uniform accross machines, and are subject to change. + +### If you want to test just one unit test (from the root terminal window): + +```bash +pytest /foss_fim/unit_tests/gms/derive_level_paths_test.py + or +pytest /foss_fim/unit_tests/clip_vectors_to_wbd_test.py +``` + +### If you'd like to run a particular test, you can, for example: +``` +pytest -v -s -k test_append_id_to_file_name_single_identifier_success +``` + +If one test case is choosen, it will scan all of the test files, and scan for the method (test case) specified. + +## Key Notes for creating new unit tests +1) All test functions must start with the phrase `test_`. That is how pytest picks them up. The rest of the function name does not have to match the pattern of `function_name_being_tested` but should. Further, the rest of the function name should say what the test is about, ie) `_failed_input_path`. ie) `test_{some_function_name_from_the_source_code_file}_failed_input_path`. It is fine that the function names get very long (common in the industry). + +2) If you are using this for development purposes, use caution when checking back in files for unit tests files and json file. If you check it in, it still has to work and work for others and not just for a dev test you are doing. + +3) As of now, you can not control the order that unit tests are run within a unit test file. + +4) There must be at least one associated `{original py file name}_params.json` file per unit test. + +5) There must be at least one "happy path (successful)" test inside the unittest file. ie) one function that is expected to fully pass. You can have multiple "happy path" tests if you want to change values that are fundamentally different, but fully expected to pass. + +6) Json files can have multiple nodes, so the default "happy path/success" is suggested to be called `valid_data`, if one does not already exist. Generally, the individual unit tests, will call the `valid_data` node and override a local method value to a invalid data. In semi-rare, but possible cases, you can add more nodes if you like, but try not to create new Json nodes for a few small field changes, generally only use a new node if there are major and lots of value changes (ie: major different test conditions). + +7) Unit test functions can and should test for all "outputs" from a source function. This includes the functions's return output (if any), but any global variables it might set, and even that saved output files (such as .tif files) have been created and successfully. It is ok to have multiple validation checks (or asserts) in one unit test function. + +8) One Python file = one `{original py file name}_test.py` file. + +9) Sometimes you may want to run a full successful "happy path" version through `fim_pipeline.sh` (or similar), to get all of the files you need in place to do your testing. However, you will want to ensure that none of the outputs are being deleted during the test. One way to solve this is to put in an invalid value for the `-d` parameter (denylist). +ie: +```bash +fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/config/params_template.env -j 1 -d /foss_fim/config/deny_unit_default.lst -o +``` +but ours would be: +```bash +fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/config/params_template.env -j 1 -d no_list -o +``` + +## [Pytest](https://docs.pytest.org/en/7.2.x/) particulars + +The `pyproject.toml` file has been added, which contains the build system requirements of Python projects. This file used to specify which warnings are disabled to pass our unit tests. + +A `__init__.py` file has been added to the subdirectory of `/tools` in order for the `pytest` command run in the `/unit_tests` to pick up the tests in those directories as well. + +Luckily, `pytest` works well with The Python Standard Library `unittest`. This made the migration of previous unit tests using `unittest` over to `pytest` quite simple. The caveat is that our current unit tests employ elements of both libraries. A full transition to `pytest` will ideally take place at a future date. + +## Testing for failing conditions +- Over time, you want to start adding functions that specifically look for fail conditions. This is a key part of unit test systems. It is not uncommon to have many dozens of tests functions in one unit test file. Each "fail" type test, must check for ONLY one variable value change. A "fail" test function should not fundamentally pass in an invalid huc AND an invalid file path. Those two failing test conditions and must have two seperate unit test functions. + +- It is possible to let a unit test have more than one failed value but only if they are tightly related to trigger just one failure (RARE though). YES.. Over time, we will see TONS of these types of fail unit test functions and they will take a while to run. + +- When you create a "fail" test function, you can load up the normal full "params" from the json file, but then you can override (hardcoded) the one (or rarely more than one) variable inside the function. There is a way to "catch" a failure you are expecting, ensure it is the type of failure you expected and make that "failure" to become a true fail, ie) a unit test pass. + +An example is in `unit_tests/Derive_level_paths_test.py` -> `test_Derive_level_paths_invalid_input_stream_network` (function). This example gives you the pattern implemented in Pytest. + +## Future Enhancements +1) Full transition to the `pytest` library, removing classes of `unittest.TestCase` and taking full advantage of available code re-use patterns offered through `pytest`. + +2) Over time, it is expected that python files will be broken down to many functions inside the file. Currently, we tend to have one very large function in each python file which makes unit testing harder and less specific. Generally function will result in at least one "happy path" unit test function. This might require having test unit test outputs, such as sample .tif or small .gpkg files in subfolders in the unit tests folder, but this remains to be seen. Note: The files `/derive_level_paths_test.py` and `clip_vectors_to_wbd_test.py` are not complete as they do not yet test all output from a method. diff --git a/unit_tests/__template.json b/unit_tests/__template.json new file mode 100644 index 000000000..20d34f7c4 --- /dev/null +++ b/unit_tests/__template.json @@ -0,0 +1,11 @@ +{ + "valid_data": + { + "some param name (suggested to match the method param name)": "some value", + "another one and as many lines as you need": "some value", + "for 'None', you can just leave this field out, but sometimes it seems like jsut passing the value of None seems to work (not validated) ": "some value", + "for numerics, just put the number": 20, + "for lists/collections: ": ["value 1", "value 2"], + "inline json comments don't work (let us know if you figure it out)": "sorry" + } +} diff --git a/unit_tests/__template.py b/unit_tests/__template.py new file mode 100644 index 000000000..f3d6d483e --- /dev/null +++ b/unit_tests/__template.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +import os +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers + +import as src + +class test_(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + +# Test Cases: + + # MUST start with the name of "test_" + # This is the (or one of the) valid test expected to pass + def test__success(self): + + ''' + < UPDATE THESE NOTES: to say what you are testing and what you are expecting. + If there is no return value from the method, please say so.> + + Dev Notes: (which can be removed after you make this file) + Remember... You need to validate the method output if there is any. However, if you have time, it + is also recommended that you validate other outputs such as writing or updating file on the file + system, aka: Does the expected file exist. Don't worry about its contents. + ''' + + #global params_file + params = self.params["valid_data"].copy() #update "valid_data" value if you need to (aka.. more than one node) + + # for now we are happy if no exceptions are thrown. + + # See the readme.md, clip_vectors_to_wbd_test.py or gms/derive_level_paths_test.py for examples. + # Replace this stub example with your own. + # Try to use the same order to make it easier. + # Remember, if the method accepts **params, then you can sent that in here as well. + # ie: my_py_class.my_method(** params) + + src.subset_vector_layers(hucCode = params["hucCode"], + nwm_streams_filename = params["nwm_streams"], + etc, etc for each param) + + # This is what we are actually testing- + # An assert that evaluates as True passes, one that evaluates to False fails. + # A message (string) can be added after the assert statement to provide detail on the case being tested, and + # why it failed. + assert os.path.exists(params["nwm_streams"]) == True, "The nwm_streams file does not exist" + + + # EXAMPLE SUCCESSFUL TEST CASE WHICH CAPTURES AN EXCEPTION (FAILURE) + + def test_subset_vector_layers_fail_invalid_stream_path(self): + ''' + Notes about what the test is and the expected results (or expected exception if applicable) + ''' + + params = self.params["valid_data"].copy() #update "valid_data" value if you need to (aka.. more than one node) + + params["nwm_streams"] = "/some/bad/path/" + + with pytest.raises(Exception) as e_info: + clip_vectors_to_wbd.subset_vector_layers(hucCode = params["hucCode"], + nwm_streams_filename = params["nwm_streams"], + etc, etc for each param) + diff --git a/unit_tests/aggregate_branch_lists_params.json b/unit_tests/aggregate_branch_lists_params.json new file mode 100644 index 000000000..fa5f36a29 --- /dev/null +++ b/unit_tests/aggregate_branch_lists_params.json @@ -0,0 +1,8 @@ +{ + "valid_data": + { + "output_dir": "/outputs/fim_unit_test_data_do_not_remove/", + "file_name": "branch_ids.csv", + "output_file_name": "/outputs/fim_unit_test_data_do_not_remove/test.csv" + } +} diff --git a/unit_tests/aggregate_branch_lists_test.py b/unit_tests/aggregate_branch_lists_test.py new file mode 100644 index 000000000..e54cd5c85 --- /dev/null +++ b/unit_tests/aggregate_branch_lists_test.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers + +import aggregate_branch_lists as src + + +class test_aggregate_branch_lists(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + +# Test Cases: + + def test_aggregate_branch_lists_success(self): + + #global params_file + + params = self.params["valid_data"].copy() + + src.aggregate_branch_lists(output_dir = params["output_dir"], + file_name = params["file_name"], + output_file_name = params["output_file_name"]) + diff --git a/unit_tests/check_unit_errors_params.json b/unit_tests/check_unit_errors_params.json new file mode 100644 index 000000000..d97ec466f --- /dev/null +++ b/unit_tests/check_unit_errors_params.json @@ -0,0 +1,11 @@ +{ + "valid_data": + { + "fim_dir": "/outputs/fim_unit_test_data_do_not_remove" + }, + + "invalid_path": + { + "fim_dir": "/outputs/check_errors_example_unit_tests_not_not_valid" + } +} \ No newline at end of file diff --git a/unit_tests/check_unit_errors_test.py b/unit_tests/check_unit_errors_test.py new file mode 100644 index 000000000..d10080e96 --- /dev/null +++ b/unit_tests/check_unit_errors_test.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 + +import math +import os +import shutil +import json +import unittest +import pytest + +from utils.fim_enums import FIM_exit_codes +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers +from utils.shared_variables import (UNIT_ERRORS_MIN_NUMBER_THRESHOLD, + UNIT_ERRORS_MIN_PERCENT_THRESHOLD) +import check_unit_errors as src + + +class test_check_unit_errors(unittest.TestCase): + ''' + Allows the params to be loaded one and used for all test methods + ''' + + @classmethod + def setUpClass(self): + + # get_params_filename function in ./unit_test_utils handles errors + params_file_path = ut_helpers.get_params_filename(__file__) + + with open(params_file_path) as params_file: + self.params = json.load(params_file) + + ''' + To make most of this unit test, we have to do the following: + - rename the current unit_errors directory (if it exists) + - empty the unit_errors directory + - create a bunch of dummy files in it + - perform the unit test + - delete the unit_errors directory + - rename the original unit_test folder back to unit_tests + ''' + +# Test Cases: + + def test_check_unit_errors_success_below_min_errors(self): + + # Expecting no errors. + # Test to ensure the number of dummy files is less than the overall min number of error files. + + params = self.params["valid_data"].copy() + + num_dummy_files_reqd = UNIT_ERRORS_MIN_NUMBER_THRESHOLD - 1 + ue_folder_existed = self.__create_temp_unit_errors_folder_files(params["fim_dir"], + num_dummy_files_reqd) + expected_output = 0 + actual_output = src.check_unit_errors(params["fim_dir"], num_dummy_files_reqd) + + err_msg = "Number of dummy files IS NOT less than the overall min number of error files." + assert expected_output == actual_output, err_msg + + if (ue_folder_existed): + self.__remove_temp_unit_errors_folder(params["fim_dir"]) + + + def test_check_unit_errors_fail_above_min_errors(self): + + # Test to ensure the number of dummy files is more than the overall min number of error files. + # Expecting sys.exit of 62 + # We do expect this to fail and if it fails, it is successful. + # Here we expect an exception, and are capturing it using pytest.raises(Exception) + # To query the exception, or validate that it is the correct one, it is captured in the `e_info` object + + params = self.params["valid_data"].copy() + + num_dummy_files_reqd = UNIT_ERRORS_MIN_NUMBER_THRESHOLD + 1 + + self.__create_temp_unit_errors_folder_files(params["fim_dir"], num_dummy_files_reqd) + + with pytest.raises(Exception) as e_info: + src.check_unit_errors(params["fim_dir"], num_dummy_files_reqd) + + # We have to put the unit_errors folders back to the way it was + self.__remove_temp_unit_errors_folder(params["fim_dir"]) + + + def test_check_unit_errors_success_above_percent_errors(self): + + # Expecting no errors. + # Test to ensure the number of dummy files is more than the overall min number of error files. + # We do expect this not to to fail as it is greater than 10 errors but below the percent threshhold. + + params = self.params["valid_data"].copy() + + num_dummy_files_reqd = UNIT_ERRORS_MIN_NUMBER_THRESHOLD * 2 + + ue_folder_existed = self.__create_temp_unit_errors_folder_files(params["fim_dir"], + num_dummy_files_reqd) + + num_total_units = math.trunc(num_dummy_files_reqd * (100 / UNIT_ERRORS_MIN_PERCENT_THRESHOLD)) + 1 + expected_output = 0 + actual_output = src.check_unit_errors(params["fim_dir"], num_total_units) + + err_msg = "Number of dummy files IS NOT more than the overall min number of error files" + assert expected_output == actual_output, err_msg + + if (ue_folder_existed): + self.__remove_temp_unit_errors_folder(params["fim_dir"]) + + # We have to put the unit_errors folders back to the way is was. + self.__remove_temp_unit_errors_folder(params["fim_dir"]) + + + def test_check_unit_errors_fail_below_percent_errors(self): + + # Expecting sys.exit of 62 + # Test to ensure the number of dummy files is more than the overall min number of error files. + + # We do expect this to fail as it is greater than 10 errors + # AND below the percent threshhold (more percent errors than the threshold) + # Here we expect an exception, and are capturing it using pytest.raises(Exception) + # To query the exception, or validate that it is the correct one, it is captured in the `e_info` object + + params = self.params["valid_data"].copy() + + num_dummy_files_reqd = UNIT_ERRORS_MIN_NUMBER_THRESHOLD * 2 + + self.__create_temp_unit_errors_folder_files(params["fim_dir"], num_dummy_files_reqd) + + num_total_units = math.trunc(num_dummy_files_reqd * (100 / UNIT_ERRORS_MIN_PERCENT_THRESHOLD)) - 10 + + with pytest.raises(Exception) as e_info: + src.check_unit_errors(params["fim_dir"], num_total_units) + + # We have to put the unit_errors folders back to the way it was. + self.__remove_temp_unit_errors_folder(params["fim_dir"]) + + +# Helper functions: + + def __create_temp_unit_errors_folder_files(self, output_folder, number_of_files): + ''' + Process: + We want to preserve the original unit_errors folder if it exists, + so we wil rename it. + Then we will make a new unit_errors folder and fill it with a bunch of + dummy files. + A dummy file for non_zero_exit_codes.log will also be created. + Input: + output_folder: the root output folder (ie. /outputs/gms_example_unit_tests/) + number_of_files: how many dummy files to create + Returns: + True if the 'unit_errors' folder did original exist and needs to be renamed back. + False if the 'unit_errors' folder never existed in the first place. + ''' + + ue_folder_preexists = False + + if (not os.path.isdir(output_folder)): + raise Exception(f"unit test root folder of {output_folder} does not exist") + + ue_folder = os.path.join(output_folder, "unit_errors") + temp_ue_folder = ue_folder + "_temp" + if (os.path.isdir(ue_folder)): + ue_folder_preexists = True + os.rename(ue_folder, temp_ue_folder) + + os.mkdir(ue_folder) + + for i in range(0, number_of_files): + file_name = "sample_" + str(i) + ".txt" + file_path = os.path.join(ue_folder, file_name) + with open(file_path, 'w') as fp: + pass + + return ue_folder_preexists + + + def __remove_temp_unit_errors_folder(self, output_folder): + ''' + Process: + We want to preserve the original unit_errors folder if it exists, + so we will delete our temp unit test version of 'unit_errors', and rename + the original back to 'unit_errors' + Note.. it is possible the temp folder does not exist, + but we don't need to error out on it. Sometimes we got here by a try/catch cleanup + Input: + output_folder: the root output folder (ie. /outputs/fim_unit_test_data_do_not_remove/) + Returns: + nothing + ''' + + ue_folder = os.path.join(output_folder, "unit_errors") + if (os.path.isdir(ue_folder)): + shutil.rmtree(ue_folder) + + temp_ue_folder = ue_folder + "_temp" + if (os.path.isdir(temp_ue_folder)): + os.rename(temp_ue_folder, ue_folder) + + +# Test Cases for Helper funcitons: + + def test_create_temp_unit_errors_folder_files(self): + + # Here we are testing our helper function to see if it raise exceptions appropriately with a bad path. + # In this case, we want the exception to be raised if there is an invalid path. + + params = self.params["invalid_path"] + invalid_folder = params["fim_dir"] + + with pytest.raises(Exception) as e_info: + self.__create_temp_unit_errors_folder_files(invalid_folder, 4) + + + def test_remove_temp_unit_errors_folder(self): + + # Test of out helper function to see if the temp folder was removed. + + params = self.params["valid_data"].copy() + + self.__create_temp_unit_errors_folder_files(params["fim_dir"], 2) + + self.__remove_temp_unit_errors_folder(params["fim_dir"]) + + temp_folder_created = os.path.join(params["fim_dir"], "unit_errors") + "_temp" + + assert os.path.exists(temp_folder_created) == False + diff --git a/unit_tests/clip_vectors_to_wbd_params.json b/unit_tests/clip_vectors_to_wbd_params.json new file mode 100644 index 000000000..7875dda5c --- /dev/null +++ b/unit_tests/clip_vectors_to_wbd_params.json @@ -0,0 +1,26 @@ +{ + "valid_data": { + "outputRunDataDir": "/outputs/fim_unit_test_data_do_not_remove", + "subset_nwm_lakes": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_lakes_proj_subset.gpkg", + "subset_nwm_streams": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_subset_streams.gpkg", + "hucCode": "05030104", + "subset_nwm_headwaters": "/outputs/fim_unit_test_data_do_not_remove/05030104/nhd_headwater_points_subset.gpkg", + "wbd_buffer_filename": "/outputs/fim_unit_test_data_do_not_remove/05030104/wbd_buffered.gpkg", + "wbd_filename": "/outputs/fim_unit_test_data_do_not_remove/05030104/wbd.gpkg", + "dem_filename": "/data/inputs/3dep_dems/10m_5070/fim_seamless_3dep_dem_10m_5070.vrt", + "dem_domain" : "/data/inputs/3dep_dems/10m_5070/HUC6_dem_domain.gpkg", + "nwm_lakes": "/data/inputs/nwm_hydrofabric/nwm_lakes.gpkg", + "nwm_catchments": "/data/inputs/nwm_hydrofabric/nwm_catchments.gpkg", + "subset_nwm_catchments": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_catchments_proj_subset.gpkg", + "nld_lines": "/data/inputs/nld_vectors/huc2_levee_lines/nld_preprocessed_05.gpkg", + "landsea": "/data/inputs/landsea/water_polygons_us.gpkg", + "nwm_streams": "/data/inputs/nwm_hydrofabric/nwm_flows.gpkg", + "subset_landsea": "/outputs/fim_unit_test_data_do_not_remove/05030104/LandSea_subset.gpkg", + "nwm_headwaters": "/data/inputs/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg", + "subset_nld_lines": "/outputs/fim_unit_test_data_do_not_remove/05030104/nld_subset_levees.gpkg", + "wbd_buffer_distance": 5000, + "levee_protected_areas": "/data/inputs/nld_vectors/Levee_protected_areas.gpkg", + "subset_levee_protected_areas": "/outputs/fim_unit_test_data_do_not_remove/05030104/LeveeProtectedAreas_subset.gpkg" + } +} + diff --git a/unit_tests/clip_vectors_to_wbd_test.py b/unit_tests/clip_vectors_to_wbd_test.py new file mode 100644 index 000000000..66857c1a8 --- /dev/null +++ b/unit_tests/clip_vectors_to_wbd_test.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +import os +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers +import clip_vectors_to_wbd as src + + +class test_clip_vectors_to_wbd(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + +# Test Cases: + + def test_subset_vector_layers_success(self): + + ''' + This NEEDS be upgraded to check the output, as well as the fact that all of the output files exist as expected. + Most of the output test and internal tests with this function will test a wide variety of conditions. + For subcalls to other py classes will not exist in this file, but the unittest file for the other python file. + Only the basic return output value should be tested to ensure it is as expected. + For now, we are adding the very basic "happy path" test. + ''' + + params = self.params["valid_data"].copy() + + # There is no default return value. + # Later we need to check in this function that files were being created on the file system + + # For now we are happy if no exceptions are thrown. + try: + src.subset_vector_layers(subset_nwm_lakes = params["subset_nwm_lakes"], + subset_nwm_streams = params["subset_nwm_streams"], + hucCode = params["hucCode"], + subset_nwm_headwaters = params["subset_nwm_headwaters"], + wbd_buffer_filename = params["wbd_buffer_filename"], + wbd_filename = params["wbd_filename"], + dem_filename = params["dem_filename"], + dem_domain = params["dem_domain"], + nwm_lakes = params["nwm_lakes"], + nwm_catchments = params["nwm_catchments"], + subset_nwm_catchments = params["subset_nwm_catchments"], + nld_lines = params["nld_lines"], + landsea = params["landsea"], + nwm_streams = params["nwm_streams"], + subset_landsea = params["subset_landsea"], + nwm_headwaters = params["nwm_headwaters"], + subset_nld_lines = params["subset_nld_lines"], + wbd_buffer_distance = params["wbd_buffer_distance"], + levee_protected_areas = params["levee_protected_areas"], + subset_levee_protected_areas = params["subset_levee_protected_areas"]) + + except (RuntimeError, TypeError, NameError) as e_info: + pytest.fail("Error in subset_vector_layers function", e_info) + diff --git a/unit_tests/derive_level_paths_params.json b/unit_tests/derive_level_paths_params.json new file mode 100644 index 000000000..48ce1f23d --- /dev/null +++ b/unit_tests/derive_level_paths_params.json @@ -0,0 +1,18 @@ +{ + "valid_data": + { + "outputRunDataDir" : "/outputs/fim_unit_test_data_do_not_remove/", + "in_stream_network": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_subset_streams.gpkg", + "out_stream_network": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_subset_streams_levelPaths.gpkg", + "branch_id_attribute": "levpa_id", + "out_stream_network_dissolved": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_subset_streams_levelPaths.gpkg", + "headwaters_outfile": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_headwaters.gpkg", + "catchments": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_catchments_proj_subset.gpkg", + "catchments_outfile": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_catchments_proj_subset_levelPaths.gpkg", + "branch_inlets_outfile": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg", + "reach_id_attribute": "ID", + "verbose": true + } +} + + diff --git a/unit_tests/derive_level_paths_test.py b/unit_tests/derive_level_paths_test.py new file mode 100644 index 000000000..8f395e561 --- /dev/null +++ b/unit_tests/derive_level_paths_test.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +import os +import sys +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers + +# # importing python folders in other direcories +sys.path.append('/foss_fim/src/') +import derive_level_paths as src +import stream_branches +from utils.fim_enums import FIM_exit_codes as fec + + +class test_Derive_level_paths(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + + +# Test Cases: + + def test_Derive_level_paths_success_all_params(self): + ''' + This test includes all params with many optional parms being set to the default value of the function + ''' + + params = self.params["valid_data"].copy() + + # Notes: + # Other params such as toNode_attribute and fromNode_attribute are defaulted and not passed into __main__ , + # so we skip them here. + # Returns GeoDataframe (the nwm_subset_streams_levelPaths_dissolved.gpkg) + actual_df = src.Derive_level_paths(in_stream_network = params["in_stream_network"], + out_stream_network = params["out_stream_network"], + branch_id_attribute = params["branch_id_attribute"], + out_stream_network_dissolved = params["out_stream_network_dissolved"], + headwaters_outfile = params["headwaters_outfile"], + catchments = params["catchments"], + catchments_outfile = params["catchments_outfile"], + branch_inlets_outfile = params["branch_inlets_outfile"], + reach_id_attribute = params["reach_id_attribute"], + verbose = params["verbose"]) + + # test data type being return is as expected. Downstream code might to know that type + assert isinstance(actual_df, stream_branches.StreamNetwork) + + + #**** NOTE: Based on 05030104 + # Test row count for dissolved level path GeoDataframe which is returned. + actual_row_count = len(actual_df) + expected_row_count = 4 + assert actual_row_count == expected_row_count + + # Test that output files exist as expected + assert os.path.exists(params["out_stream_network"]) == True, f"Expected file {params['out_stream_network']} but it does not exist." + assert os.path.exists(params["out_stream_network_dissolved"]) == True, f"Expected file {params['out_stream_network_dissolved']} but it does not exist." + assert os.path.exists(params["headwaters_outfile"]) == True, f"Expected file {params['headwaters_outfile']} but it does not exist." + assert os.path.exists(params["catchments_outfile"]) == True, f"Expected file {params['catchments_outfile']} but it does not exist." + assert os.path.exists(params["catchments_outfile"]) == True, f"Expected file {params['catchments_outfile']} but it does not exist." + assert os.path.exists(params["branch_inlets_outfile"]) == True, f"Expected file {params['branch_inlets_outfile']} but it does not exist." + + + # Invalid Input stream for demo purposes. Normally, you would not have this basic of a test (input validation). + def test_Derive_level_paths_invalid_input_stream_network(self): + + # NOTE: As we are expecting an exception, we use pytest.raises(Exception). + + params = self.params["valid_data"].copy() + params["in_stream_network"] = "some bad path" + + with pytest.raises(Exception) as e_info: + actual = src.Derive_level_paths(in_stream_network = ut_helpers.json_concat(params, "outputRunDataDir", "in_stream_network"), + out_stream_network = params["out_stream_network"], + branch_id_attribute = params["branch_id_attribute"], + out_stream_network_dissolved = params["out_stream_network_dissolved"], + huc_id = params["huc_id"], + headwaters_outfile = params["headwaters_outfile"], + catchments = params["catchments"], + catchments_outfile = params["catchments_outfile"], + branch_inlets_outfile = params["branch_inlets_outfile"], + reach_id_attribute = params["reach_id_attribute"], + verbose = params["verbose"] ) + diff --git a/unit_tests/filter_catchments_and_add_attributes_params.json b/unit_tests/filter_catchments_and_add_attributes_params.json new file mode 100644 index 000000000..5b6370373 --- /dev/null +++ b/unit_tests/filter_catchments_and_add_attributes_params.json @@ -0,0 +1,12 @@ +{ + "valid_data": + { + "outputRunDataDir": "/outputs/fim_unit_test_data_do_not_remove", + "input_catchments_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000006/gw_catchments_reaches_3246000006.gpkg", + "input_flows_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000006/demDerived_reaches_split_3246000006.gpkg", + "output_catchments_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000006/gw_catchments_reaches_filtered_addedAttributes_3246000006.gpkg", + "output_flows_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000006/demDerived_reaches_split_filtered_3246000006.gpkg", + "wbd_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/wbd8_clp.gpkg", + "huc_code": "02020005" + } +} diff --git a/unit_tests/filter_catchments_and_add_attributes_test.py b/unit_tests/filter_catchments_and_add_attributes_test.py new file mode 100644 index 000000000..258ff2349 --- /dev/null +++ b/unit_tests/filter_catchments_and_add_attributes_test.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +import inspect +import os +import sys + +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers +import filter_catchments_and_add_attributes as src + +# ************* +# Important: For this to work, when you run gms_run_branch.sh, you have to +# use deny_gms_branches_dev.lst or the word "none" for the deny list arguments +# (unit and branch deny list parameters). Key files need to exist for this unit test to work. +class test_filter_catchments_and_add_attributes(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + +# Test Cases: + + def test_filter_catchments_and_add_attributes_success(self): + + ''' + The gw_catchments_reaches_filtered_addedAttributes_.gpkg and + demDerived_reaches_split_filtered_.gpkg should not exit prior to this test. + If the test is successful, these file will be created. + ''' + + params = self.params["valid_data"].copy() + + # To setup the test, lets start by deleted the two expected output files to ensure + # that they are regenerated. + if os.path.exists(params["output_flows_filename"]): + os.remove(params["output_flows_filename"]) + if os.path.exists(params["output_catchments_filename"]): + os.remove(params["output_catchments_filename"]) + + # Test that the files were deleted + assert os.path.exists(params["output_flows_filename"]) == False + + assert os.path.exists(params["output_catchments_filename"]) == False + + src.filter_catchments_and_add_attributes(input_catchments_filename = params["input_catchments_filename"], + input_flows_filename = params["input_flows_filename"], + output_catchments_filename = params["output_catchments_filename"], + output_flows_filename = params["output_flows_filename"], + wbd_filename = params["wbd_filename"], + huc_code = params["huc_code"]) + + + # Test that the files were created by filer_catchments_and_add_attributes + assert os.path.exists(params["output_flows_filename"]) == True + + assert os.path.exists(params["output_catchments_filename"]) == True + diff --git a/unit_tests/generate_branch_list_csv_params.json b/unit_tests/generate_branch_list_csv_params.json new file mode 100644 index 000000000..004c5719b --- /dev/null +++ b/unit_tests/generate_branch_list_csv_params.json @@ -0,0 +1,20 @@ +{ + "valid_data_add_branch_zero": + { + "huc_id": "05030104", + "branch_id": "0", + "output_branch_csv": "/outputs/fim_unit_test_data_do_not_remove/05030104/branch_ids.csv" + }, + "valid_data_add_branch": + { + "huc_id": "05030104", + "branch_id": "1946000003", + "output_branch_csv": "/outputs/fim_unit_test_data_do_not_remove/05030104/branch_ids.csv" + }, + "invalid_bad_file_extension": + { + "huc_id": "05030104", + "branch_id": "1946000003", + "output_branch_csv": "/outputs/fim_unit_test_data_do_not_remove/05030104/branch_ids2" + } +} diff --git a/unit_tests/generate_branch_list_csv_test.py b/unit_tests/generate_branch_list_csv_test.py new file mode 100644 index 000000000..5d2d75281 --- /dev/null +++ b/unit_tests/generate_branch_list_csv_test.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +import os +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers + +import generate_branch_list_csv as src + +class test_generate_branch_list_csv(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + + # for these tests to work, we have to check if the .csv exists and remove it + # prior to exections of the tests. + + params = self.params["valid_data_add_branch_zero"].copy() + if (os.path.exists(params["output_branch_csv"])): + os.remove(params["output_branch_csv"]) + +# Test Cases: + + def test_generate_branch_list_csv_valid_data_add_branch_zero_success(self): + + # yes.. we know that we can not control the order + + #global params_file + params = self.params["valid_data_add_branch_zero"].copy() + + src.generate_branch_list_csv(huc_id = params["huc_id"], + branch_id = params["branch_id"], + output_branch_csv = params["output_branch_csv"]) + + + + def test_generate_branch_list_csv_valid_data_add_branch_success(self): + + # yes.. we know that we can not control the order + + #global params_file + params = self.params["valid_data_add_branch"].copy() + + src.generate_branch_list_csv(huc_id = params["huc_id"], + branch_id = params["branch_id"], + output_branch_csv = params["output_branch_csv"]) + + + def test_generate_branch_list_csv_invalid_bad_file_extension(self): + + #global params_file + params = self.params["invalid_bad_file_extension"].copy() + + # we expect this to fail. If it does fail with an exception, then this test is sucessful. + with pytest.raises(Exception) as e_info: + src.generate_branch_list_csv(huc_id = params["huc_id"], + branch_id = params["branch_id"], + output_branch_csv = params["output_branch_csv"]) + diff --git a/unit_tests/generate_branch_list_params.json b/unit_tests/generate_branch_list_params.json new file mode 100644 index 000000000..b8c9bb4cc --- /dev/null +++ b/unit_tests/generate_branch_list_params.json @@ -0,0 +1,8 @@ +{ + "valid_data": + { + "stream_network_dissolved": "/outputs/fim_unit_test_data_do_not_remove/05030104/nwm_subset_streams_levelPaths_dissolved.gpkg", + "branch_id_attribute": "levpa_id", + "output_branch_list_file": "/outputs/fim_unit_test_data_do_not_remove/05030104/branch_ids.lst" + } +} diff --git a/unit_tests/generate_branch_list_test.py b/unit_tests/generate_branch_list_test.py new file mode 100644 index 000000000..d05d322c4 --- /dev/null +++ b/unit_tests/generate_branch_list_test.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +import os +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers + +import generate_branch_list as src + +class test_Generate_branch_list(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + +# Test Cases: + + def test_Generate_branch_list_success(self): + + params = self.params["valid_data"].copy() + + src.generate_branch_list(stream_network_dissolved = params["stream_network_dissolved"], + branch_id_attribute = params["branch_id_attribute"], + output_branch_list_file = params["output_branch_list_file"]) + + diff --git a/unit_tests/outputs_cleanup_params.json b/unit_tests/outputs_cleanup_params.json new file mode 100644 index 000000000..f63745ad9 --- /dev/null +++ b/unit_tests/outputs_cleanup_params.json @@ -0,0 +1,25 @@ +{ + "valid_specific_branch_data": + { + "src_dir": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000009", + "deny_list": "/foss_fim/config/deny_branches.lst", + "branch_id": "3246000009", + "verbose": true + }, + + "valid_directory_data": + { + "src_dir": "/outputs/fim_unit_test_data_do_not_remove", + "deny_list": "/foss_fim/config/deny_branches.lst", + "branch_id": "0", + "verbose": true + }, + + "skip_clean": + { + "src_dir": "/outputs/fim_unit_test_data_do_not_remove", + "deny_list": "None", + "branch_id": "", + "verbose": true + } +} \ No newline at end of file diff --git a/unit_tests/outputs_cleanup_test.py b/unit_tests/outputs_cleanup_test.py new file mode 100644 index 000000000..ab1a46a92 --- /dev/null +++ b/unit_tests/outputs_cleanup_test.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 + +import os +import sys +import json +from pathlib import Path +import io +from contextlib import redirect_stdout +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers +from utils.shared_functions import FIM_Helpers as fh + + +sys.path.append('/foss_fim/src/gms') +import outputs_cleanup as src + + +class test_outputs_cleanup(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + + +# Test Cases: + + def test_remove_deny_list_files_specific_branch_success(self): + ''' + This validates removal of files for a directory already pointing to a + specific branch in a HUC + ''' + + params = self.params["valid_specific_branch_data"].copy() + + # Gather all of the file names into an array from the deny_list + deny_files = self.__get_deny_list_filenames(params["src_dir"], params["deny_list"], params["branch_id"]) + + # Test whether we have a list of files to check + assert len(deny_files) > 0 + + src.remove_deny_list_files(src_dir = params["src_dir"], + deny_list = params["deny_list"], + branch_id = params["branch_id"], + verbose = params["verbose"]) + + assert self.__check_no_deny_list_files_exist(params["src_dir"], deny_files) == True + + + def test_remove_deny_list_files_huc_level_success(self): + ''' + This validates removal of files for all files and subdirectory files. + Normally used for covering all hucs and their branch zeros but + can be anything + ''' + + params = self.params["valid_directory_data"].copy() + + # Gather all of the file names into an array from the deny_list + deny_files = self.__get_deny_list_filenames(params["src_dir"], params["deny_list"], params["branch_id"]) + + # Test whether we have a list of files to check + assert len(deny_files) > 0 + + src.remove_deny_list_files(src_dir = params["src_dir"], + deny_list = params["deny_list"], + branch_id = params["branch_id"], + verbose = params["verbose"]) + + assert self.__check_no_deny_list_files_exist(params["src_dir"], deny_files) == True + + + def test_remove_deny_list_skip_cleaning_success(self): + ''' + This validates removal of files for all files and subdirectory files. + Normally used for covering all hucs and their branch zeros but + can be anything + ''' + + params = self.params["skip_clean"].copy() + + deny_files = self.__get_deny_list_filenames(params["src_dir"], params["deny_list"], params["branch_id"]) + + # Ensure we have a value of "None" for a deny_list value, + # __get_deny_list_filenames returns an empty array if "None" is provided + assert len(deny_files) == 0 + + # This is a little tricky, as we're capturing the stdout (return statement) from remove_deny_list_files, + # to verify the function is returning at the correct place, and not removing files when we do not provide + # a deny list file. We set f to the io stream, and redirect it using redirect_stdout. + f = io.StringIO() + with redirect_stdout(f): + src.remove_deny_list_files(src_dir = params["src_dir"], + deny_list = params["deny_list"], + branch_id = params["branch_id"], + verbose = params["verbose"]) + + # Get the stdout value of remove_deny_list_files and set it to skip_clean_out + skip_clean_out = f.getvalue() + + # This string must match the print statement in /src/gms/outputs_cleanup.py, including the \n newline, + # which occurs "behind the scenes" with every call to print() in Python + assert skip_clean_out == "file clean via the deny list skipped\n" + + + def test_remove_deny_list_files_invalid_src_directory(self): + ''' + Double check the src directory exists + ''' + + params = self.params["valid_specific_branch_data"].copy() + params["src_dir"] = "/data/does_no_exist" + + # We want an exception to be thrown here, if so, the test passes. + with pytest.raises(Exception) as e_info: + + src.remove_deny_list_files(src_dir = params["src_dir"], + deny_list = params["deny_list"], + branch_id = params["branch_id"], + verbose = params["verbose"]) + + + def test_remove_deny_list_files_invalid_deny_list_does_not_exist(self): + ''' + Double check the deny list exists + ''' + + params = self.params["valid_specific_branch_data"].copy() + params["deny_list"] = "invalid_file_name.txt" + + # We want an exception to be thrown here, if so, the test passes. + with pytest.raises(Exception) as e_info: + + src.remove_deny_list_files(src_dir = params["src_dir"], + deny_list = params["deny_list"], + branch_id = params["branch_id"], + verbose = params["verbose"]) + + +# Helper Functions: + + def __get_deny_list_filenames(self, src_dir, deny_list, branch_id): + + deny_list_files = [] + + if (deny_list == "None"): + return deny_list_files + + # Note: some of the deny_file_names might be a comment line + # this will validate file exists + deny_file_names = fh.load_list_file(deny_list.strip()) + + for deny_file_name in deny_file_names: + + # Only add files to the list that do not start with a # + deny_file_name = deny_file_name.strip() + if (deny_file_name.startswith("#")): + continue + + deny_file_name = deny_file_name.replace("{}", branch_id) + + deny_list_files.append(deny_file_name) + + return deny_list_files + + + def __check_no_deny_list_files_exist(self, src_dir, deny_array): + + found_files = [] + + for file_name in deny_array: + + found_files.append(os.path.join(src_dir, file_name)) + + for found_file in found_files: + + if os.path.exists(found_file): + return False + + return True + diff --git a/unit_tests/pyproject.toml b/unit_tests/pyproject.toml new file mode 100644 index 000000000..142fbd053 --- /dev/null +++ b/unit_tests/pyproject.toml @@ -0,0 +1,7 @@ +[tool.pytest.ini_options] +filterwarnings = [ + "error", + "ignore::UserWarning", + "ignore::DeprecationWarning", + "ignore::PendingDeprecationWarning" +] diff --git a/unit_tests/rating_curve_comparison_params.json b/unit_tests/rating_curve_comparison_params.json new file mode 100644 index 000000000..dc0ef15bd --- /dev/null +++ b/unit_tests/rating_curve_comparison_params.json @@ -0,0 +1,19 @@ +{ + "valid_data": + { + "02020005": + [ + "/outputs/fim_unit_test_data_do_not_remove/02020005/usgs_elev_table.csv", + "/outputs/fim_unit_test_data_do_not_remove/02020005/branches", + "/data/inputs/usgs_gages/usgs_rating_curves.csv", + "/outputs/fim_unit_test_data_do_not_remove/rating_curve_comparison/tables/usgs_interpolated_elevation_stats_02020005.csv", + "/outputs/fim_unit_test_data_do_not_remove/rating_curve_comparison/tables/nwm_recurrence_flow_elevations_02020005.csv", + "/outputs/fim_unit_test_data_do_not_remove/rating_curve_comparison/plots/FIM-USGS_rating_curve_comparison_02020005.png", + "/data/inundation_review/inundation_nwm_recurr/nwm_recurr_flow_data/", + "/data/inputs/usgs_gages/catfim_flows_cms.csv", + "02020005", + false, + false + ] + } +} \ No newline at end of file diff --git a/unit_tests/rating_curve_comparison_test.py b/unit_tests/rating_curve_comparison_test.py new file mode 100644 index 000000000..f1f4ec3d3 --- /dev/null +++ b/unit_tests/rating_curve_comparison_test.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import os +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers +from rating_curve_comparison import generate_rating_curve_metrics + + +class test_rating_curve_comparison(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + +# Test Cases + + @pytest.mark.skip(reason="rating_curve has invalid pathing based on files "\ + "that are normally not in a runtime folder. This test will disabled for now.") + def test_generate_rating_curve_metrics_02020005_success(self): + + ''' + We are testing whether a .png file was created for a FIM-USGS rating curve comparison, + for HUC 02020005 using the `generate_rating_curve_metrics` function. + The 5th index (parameter) for each HUC in `rating_curve_comparison_params.json` specifies + the FIM-USGS rating curve comparison .png filepath. + ''' + + params = self.params["valid_data"].copy() #update "valid_data" value if you need to (aka.. more than one node) + + _indiv_huc_params = params["02020005"] + + # To setup the test, lets start by deleted the expected output file to ensure + # that it is regenerated. + if os.path.exists(_indiv_huc_params[5]): + os.remove(_indiv_huc_params[5]) + + # Test that the file was deleted + assert os.path.exists(_indiv_huc_params[5]) == False + + generate_rating_curve_metrics(_indiv_huc_params) + + # Test that the file was created by generate_rating_curve_metrics + assert os.path.exists(_indiv_huc_params[5]) == True + diff --git a/unit_tests/shared_functions_params.json b/unit_tests/shared_functions_params.json new file mode 100644 index 000000000..9dce603d1 --- /dev/null +++ b/unit_tests/shared_functions_params.json @@ -0,0 +1,17 @@ +{ + "append_append_id_to_file_name_single_identifier_valid": + { + "outputRunDataDir": "/output/fim_unit_test_data_do_not_remove/", + "file_name": "02020005/branches/3246000003/rem_zeroed_masked_3246000003.tif", + "identifier": "8888", + "expected_output": "02020005/branches/3246000003/rem_zeroed_masked_3246000003_8888.tif" + }, + + "append_append_id_to_file_name_identifier_list_valid": + { + "outputRunDataDir": "/output/fim_unit_test_data_do_not_remove/", + "file_name": "02020005/branches/3246000003/rem_zeroed_masked_3246000003.tif", + "identifier": ["7777", "8888"], + "expected_output": "02020005/branches/3246000003/rem_zeroed_masked_3246000003_7777_8888.tif" + } +} \ No newline at end of file diff --git a/unit_tests/shared_functions_test.py b/unit_tests/shared_functions_test.py new file mode 100644 index 000000000..3f9de9829 --- /dev/null +++ b/unit_tests/shared_functions_test.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers +from utils.shared_functions import FIM_Helpers as src + +class test_shared_functions(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + +# Test Cases: + + def test_append_id_to_file_name_single_identifier_success(self): + + ''' + Pass in a file name with the single identifier and get the single adjusted file name back + ''' + + params = self.params["append_append_id_to_file_name_single_identifier_valid"].copy() + + actual_output = src.append_id_to_file_name(file_name = ut_helpers.json_concat(params, "outputRunDataDir" , "file_name"), + identifier = params["identifier"]) + + err_msg = "actual output does not match expected output" + + expected_output = ut_helpers.json_concat(params, "outputRunDataDir" , "expected_output") + + assert expected_output == actual_output, err_msg + + + def test_append_id_to_file_name_indentifer_list_success(self): + + ''' + Pass in a file name with the list of identifiers and get a file name back with multiple identifers added + ''' + + params = self.params["append_append_id_to_file_name_identifier_list_valid"].copy() + + actual_output = src.append_id_to_file_name(file_name = ut_helpers.json_concat(params, "outputRunDataDir" , "file_name"), + identifier = params["identifier"]) + + err_msg = "actual output does not match expected output" + + expected_output = ut_helpers.json_concat(params, "outputRunDataDir" , "expected_output") + + assert expected_output == actual_output, err_msg + + + def test_append_id_to_file_name_no_file_name_success(self): + + ''' + Pass in an non existant file name and get None back + ''' + + params = self.params["append_append_id_to_file_name_single_identifier_valid"].copy() + + actual_output = src.append_id_to_file_name(None, identifier = params["identifier"]) + + error_msg = "actual output should not have a value" + + assert actual_output == None, error_msg + diff --git a/unit_tests/split_flows_params.json b/unit_tests/split_flows_params.json new file mode 100644 index 000000000..06f4118c2 --- /dev/null +++ b/unit_tests/split_flows_params.json @@ -0,0 +1,16 @@ +{ + "valid_data": + { + "outputRunDataDir" : "/outputs/fim_unit_test_data_do_not_remove/", + "max_length": 1500, + "slope_min": 0.001, + "lakes_buffer_input": 20, + "flows_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000005/demDerived_reaches_3246000005.shp", + "dem_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000005/dem_thalwegCond_3246000005.tif", + "split_flows_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000005/demDerived_reaches_split_3246000005.gpkg", + "split_points_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000005/demDerived_reaches_split_points_3246000005.gpkg", + "wbd8_clp_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/wbd8_clp.gpkg", + "lakes_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/nwm_lakes_proj_subset.gpkg", + "nwm_streams_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/nwm_subset_streams_levelPaths.gpkg" + } +} diff --git a/unit_tests/split_flows_test.py b/unit_tests/split_flows_test.py new file mode 100644 index 000000000..66788a928 --- /dev/null +++ b/unit_tests/split_flows_test.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +import os +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers + +import split_flows as src + + +class test_split_flows(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + +# Test Cases: + + # Ensure split_flows_filename & split_points_filename are created by the split_flows function + def test_split_flows_success(self): + + ''' + The /outputs/fim_unit_test_data_do_not_remove//branches//demDerived_reaches_split_.gpkg and + /outputs/fim_unit_test_data_do_not_remove//branches//demDerived_reaches_split_points_.gpkg should not exit prior to this test. + If the test is successful, these files will be created. + ''' + + params = self.params["valid_data"].copy() + + # to setup the test, lets start by deleted the two expected output files to ensure + # that they are regenerated + if os.path.exists(params["split_flows_filename"]): + os.remove(params["split_flows_filename"]) + if os.path.exists(params["split_points_filename"]): + os.remove(params["split_points_filename"]) + + error_msg = params["split_flows_filename"] + " does exist, when it should not (post os.remove call)" + assert os.path.exists(params["split_flows_filename"]) == False, error_msg + + error_msg = params["split_points_filename"] + " does exist, when it should not (post os.remove call)" + assert os.path.exists(params["split_points_filename"]) == False, error_msg + + src.split_flows(max_length = params["max_length"], + slope_min = params["slope_min"], + lakes_buffer_input = params["lakes_buffer_input"], + flows_filename = params["flows_filename"], + dem_filename = params["dem_filename"], + split_flows_filename = params["split_flows_filename"], + split_points_filename = params["split_points_filename"], + wbd8_clp_filename = params["wbd8_clp_filename"], + lakes_filename = params["lakes_filename"], + nwm_streams_filename = params["nwm_streams_filename"]) + + error_msg = params["split_flows_filename"] + " does not exist" + assert os.path.exists(params["split_flows_filename"]) == True, error_msg + + error_msg = params["split_points_filename"] + " does not exist" + assert os.path.exists(params["split_points_filename"]) == True, error_msg + diff --git a/unit_tests/tools/__init__.py b/unit_tests/tools/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/unit_tests/tools/inundate_gms_params.json b/unit_tests/tools/inundate_gms_params.json new file mode 100644 index 000000000..ec1d3d55f --- /dev/null +++ b/unit_tests/tools/inundate_gms_params.json @@ -0,0 +1,15 @@ +{ + "valid_data_inudation_raster_single_huc": + { + "hydrofabric_dir": "/outputs/fim_unit_test_data_do_not_remove/", + "forecast": "/data/inputs/rating_curve/nwm_recur_flows/nwm21_17C_recurrence_flows_cfs.csv", + "num_workers": 1, + "hucs": "02020005, 05030104", + "inundation_raster": "/outputs/fim_unit_test_data_do_not_remove/inundation_unittest_02020005_gms.tif", + "inundation_polygon": null, + "depths_raster": null, + "verbose": true, + "log_file": "/outputs/fim_unit_test_data_do_not_remove/logs/inundation_logfile.txt", + "output_fileNames": "/outputs/fim_unit_test_data_do_not_remove/logs/inundation_file_list.csv" + } +} diff --git a/unit_tests/tools/inundate_gms_test.py b/unit_tests/tools/inundate_gms_test.py new file mode 100644 index 000000000..9e0f195d6 --- /dev/null +++ b/unit_tests/tools/inundate_gms_test.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 + +import os +import sys + +import json +import unittest +import pytest + +sys.path.append('/foss_fim/unit_tests/') +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers + +sys.path.append('/foss_fim/tools/gms_tools') +import inundate_gms as src + + +class test_inundate_gms(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + +# Test Cases: + @pytest.mark.skip(reason="Inundate_gms will be rebuilt in the future, so this test will be left broken.") + def test_Inundate_gms_create_inundation_raster_directory_single_huc_success(self): + + ''' + Test for creating a gms inundation raster, not a depth raster and no + inundation_polygons. + This test is based on creating a raster based on a single huc and its branches + within a gms output folder. + ''' + + params = self.params["valid_data_inudation_raster_single_huc"].copy() + + output_fileNames_df = src.Inundate_gms(hydrofabric_dir = params["hydrofabric_dir"], + forecast = params["forecast"], + num_workers = params["num_workers"], + hucs = params["hucs"], + inundation_raster = params["inundation_raster"], + inundation_polygon = params["inundation_polygon"], + depths_raster = params["depths_raster"], + verbose = params["verbose"], + # log_file = None, + # output_fileNames = params["output_fileNames"] + ) + + # check if output files df has records. + assert len(output_fileNames_df) > 0, "Expected as least one dataframe record" + + # also check output log file and output raster. Can't... there will be multiple outputs + assert os.path.exists(params["inundation_raster"]), "Inundation Raster does not exist" + + assert os.path.exists(params["log_file"]), "Log file expected and does not exist" + diff --git a/unit_tests/tools/inundate_gms_unittests.py b/unit_tests/tools/inundate_gms_unittests.py new file mode 100644 index 000000000..ade2bfa30 --- /dev/null +++ b/unit_tests/tools/inundate_gms_unittests.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +import inspect +import os +import sys + +import json +import warnings +import unittest + +sys.path.append('/foss_fim/unit_tests/') +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers + +sys.path.append('/foss_fim/tools') +import inundate_gms as src + +# NOTE: This goes directly to the function. +# Ultimately, it should emulate going through command line (not import -> direct function call) +class test_inundate_gms(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + warnings.simplefilter('ignore') + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + + + def test_Inundate_gms_create_inundation_raster_directory_single_huc_success(self): + + ''' + Test for creating a gms inundation raster, not a depth raster and no + inundation_polygons. + This test is based on creating a raster based on a single huc and its branches + within a gms output folder. + ''' + + params = self.params["valid_data_inudation_raster_single_huc"].copy() + + output_fileNames_df = src.Inundate_gms(hydrofabric_dir = params["hydrofabric_dir"], + forecast = params["forecast"], + num_workers = params["num_workers"], + hucs = params["hucs"], + inundation_raster = params["inundation_raster"], + inundation_polygon = params["inundation_polygon"], + depths_raster = params["depths_raster"], + verbose = params["verbose"], + log_file = params["log_file"], + output_fileNames = params["output_fileNames"]) + + # check if output files df has records. + assert len(output_fileNames_df) > 0, "Expected as least one dataframe record" + + # also check output log file and output raster. Can't... there will be multiple outputs + #assert os.path.exists(params["inundation_raster"]), "Inundation Raster does not exist" + + assert os.path.exists(params["log_file"]), "Log file expected and does not exist" + + + print(f"Test Success: {inspect.currentframe().f_code.co_name}") + print("*************************************************************") + + + # *********************** + + +if __name__ == '__main__': + + script_file_name = os.path.basename(__file__) + + print("*****************************") + print(f"Start of {script_file_name} tests") + print() + + unittest.main() + + print() + print(f"End of {script_file_name} tests") + diff --git a/unit_tests/tools/inundation_params.json b/unit_tests/tools/inundation_params.json new file mode 100644 index 000000000..cc4685bcc --- /dev/null +++ b/unit_tests/tools/inundation_params.json @@ -0,0 +1,24 @@ +{ + "valid_data_inundate_branch": + { + "rem": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000001/rem_zeroed_masked_3246000001.tif", + "catchments": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000001/gw_catchments_reaches_filtered_addedAttributes_3246000001.tif", + "catchment_poly": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000001/gw_catchments_reaches_filtered_addedAttributes_crosswalked_3246000001.gpkg", + "hydro_table": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000001/hydroTable_3246000001.csv", + "forecast": "/data/test_cases/usgs_test_cases/validation_data_usgs/02020005/ptvn6/action/ahps_ptvn6_huc_02020005_flows_action.csv", + "mask_type": null, + "hucs": null, + "hucs_layerName": null, + "subset_hucs": null, + "num_workers": 1, + "aggregate": false, + "inundation_raster": "/data/test_cases/usgs_test_cases/02020005_usgs/testing_versions/fim_unit_test_data_do_not_remove/action/ptvn6_inundation_extent_02020005_3246000001.tif", + "inundation_polygon": null, + "depths": null, + "out_raster_profile": null, + "out_vector_profile": null, + "src_table": null, + "quiet": true, + "expected_inundation_raster": "/data/" + } +} \ No newline at end of file diff --git a/unit_tests/tools/inundation_test.py b/unit_tests/tools/inundation_test.py new file mode 100644 index 000000000..3cb4c6517 --- /dev/null +++ b/unit_tests/tools/inundation_test.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +import os +import sys +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers +import inundation as src + + +class test_inundate(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + + +# Test Cases: + + @pytest.mark.skip(reason="Inundate_gms will be rebuilt in the future, so this test will be left broken.") + def test_inundate_create_inundation_raster_single_branch_success(self): + ''' + Test for creating a inundation branch raster, no depth raster and no + inundation_polygons, no subsets, no mask + ''' + + params = self.params["valid_data_inundate_branch"].copy() + + # returns list of rasters and polys + in_rasters, depth_rasters, in_polys = src.inundate(rem = params["rem"], + catchments = params["catchments"], + catchment_poly = params["catchment_poly"], + hydro_table = params["hydro_table"], + forecast = params["forecast"], + mask_type = params["mask_type"], + hucs = params["hucs"], + hucs_layerName = params["hucs_layerName"], + subset_hucs = params["subset_hucs"], + num_workers = params["num_workers"], + aggregate = params["aggregate"], + inundation_raster = params["inundation_raster"], + inundation_polygon = params["inundation_polygon"], + depths = params["depths"], + out_raster_profile = params["out_raster_profile"], + out_vector_profile = params["out_vector_profile"], + src_table = params["src_table"], + quiet = params["quiet"]) + + print("in_rasters") + print(in_rasters) + + assert len(in_rasters) == 1, "Expected exactly one inundation raster path records" + assert depth_rasters[0] == None, "Expected no depth raster path records" + assert in_polys[0] == None, "Expected no inundation_polys records" + + msg = f"Expected file {params['expected_inundation_raster']} but it does not exist." + assert os.path.exists(params["expected_inundation_raster"]) == True, msg + diff --git a/unit_tests/unit_tests_utils.py b/unit_tests/unit_tests_utils.py new file mode 100644 index 000000000..d2c7223eb --- /dev/null +++ b/unit_tests/unit_tests_utils.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +import errno +import os +import sys + +class FIM_unit_test_helpers(object): + + @staticmethod + def get_params_filename(unit_test_file_name): + + unittest_file_name = os.path.basename(unit_test_file_name) + params_file_name = unittest_file_name.replace("_test.py", "_params.json") + params_file_path = os.path.join(os.path.dirname(unit_test_file_name), params_file_name) + + if (not os.path.exists(params_file_path)): + raise FileNotFoundError(f"{params_file_path} does not exist") + + return params_file_path + + @staticmethod + def json_concat(obj, keyone, keytwo): + + concat_string = obj[keyone] + obj[keytwo] + + return concat_string \ No newline at end of file diff --git a/unit_tests/usgs_gage_crosswalk_params.json b/unit_tests/usgs_gage_crosswalk_params.json new file mode 100644 index 000000000..32f51a347 --- /dev/null +++ b/unit_tests/usgs_gage_crosswalk_params.json @@ -0,0 +1,12 @@ +{ + "valid_data": + { + "usgs_gages_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/usgs_subset_gages.gpkg", + "input_flows_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000005/demDerived_reaches_split_filtered_3246000005.gpkg", + "input_catchment_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000005/gw_catchments_reaches_filtered_addedAttributes_3246000005.gpkg", + "dem_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000005/dem_meters_3246000005.tif", + "dem_adj_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000005/dem_thalwegCond_3246000005.tif", + "output_table_filename": "/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000005/usgs_elev_table.csv", + "branch_id": "3246000005" + } +} \ No newline at end of file diff --git a/unit_tests/usgs_gage_crosswalk_test.py b/unit_tests/usgs_gage_crosswalk_test.py new file mode 100644 index 000000000..4b9bedc1a --- /dev/null +++ b/unit_tests/usgs_gage_crosswalk_test.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +import os +import sys +import json +import unittest +import pytest + +from unit_tests_utils import FIM_unit_test_helpers as ut_helpers +from usgs_gage_crosswalk import GageCrosswalk + + +class test_usgs_gage_crosswalk(unittest.TestCase): + + ''' + Allows the params to be loaded one and used for all test methods + ''' + @classmethod + def setUpClass(self): + + params_file_path = ut_helpers.get_params_filename(__file__) + with open(params_file_path) as params_file: + self.params = json.load(params_file) + + +# Test Cases: + + def test_GageCrosswalk_success(self): + + ''' + Test whether the GageCrosswalk object can be instantiated, and test that the run_crosswalk method can successfully + create the output table (usgs_elev_table.csv). + ''' + + params = self.params["valid_data"].copy() #update "valid_data" value if you need to (aka.. more than one node) + + # Delete the usgs_elev_table.csv if it exists + if os.path.exists(params["output_table_filename"]): + os.remove(params["output_table_filename"]) + + # Verify the usgs_elev_table.csv was deleted + msg = f'{params["output_table_filename"]} does exist, when it should have been deleted' + assert os.path.exists(params["output_table_filename"]) == False, msg + + # Instantiate and run GageCrosswalk + gage_crosswalk = GageCrosswalk(params["usgs_gages_filename"], params["branch_id"]) + + # Run crosswalk + gage_crosswalk.run_crosswalk(params["input_catchment_filename"], params["input_flows_filename"], + params["dem_filename"], params["dem_adj_filename"], params["output_table_filename"]) + + # Make sure that the usgs_elev_table.csv was written + msg = f'{params["output_table_filename"]} does not exist' + assert os.path.exists(params["output_table_filename"]) == True, msg +