diff --git a/CHANGELOG.md b/CHANGELOG.md index a12d1911c..808effc01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,25 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+## v3.0.19.1 - 2021-06-17 - [PR #417](https://github.com/NOAA-OWP/cahaba/pull/417) + +Adding a thalweg profile tool to identify significant drops in thalweg elevation. Also setting lateral thalweg adjustment threshold in hydroconditioning. + +## Additions +- `thalweg_drop_check.py` checks the elevation along the thalweg for each stream path downstream of MS headwaters within a HUC. + +## Removals +- Removing `dissolveLinks` arg from `clip_vectors_to_wbd.py`. + + +## Changes +- Cleaned up code in `split_flows.py` to make it more readable. +- Refactored `reduce_nhd_stream_density.py` and `adjust_headwater_streams.py` to limit MS headwater points in `agg_nhd_headwaters_adj.gpkg`. +- Fixed a bug in `adjust_thalweg_lateral.py` lateral elevation replacement threshold; changed threshold to 3 meters. +- Updated `aggregate_vector_inputs.py` to log intermediate processes. + +

+ ## v3.0.19.0 - 2021-06-10 - [PR #415](https://github.com/NOAA-OWP/cahaba/pull/415) Feature to evaluate performance of alternative CatFIM techniques. diff --git a/config/params_calibrated.env b/config/params_calibrated.env index aa7aba1b0..3ca30650e 100644 --- a/config/params_calibrated.env +++ b/config/params_calibrated.env @@ -4,6 +4,7 @@ export negative_burn_value=1000 export agree_DEM_buffer=70 export wbd_buffer=5000 +export thalweg_lateral_elev_threshold=3 #### geospatial parameters #### export max_split_distance_meters=1500 diff --git a/config/params_template.env b/config/params_template.env index a998dc675..87270a669 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -4,6 +4,7 @@ export negative_burn_value=1000 export agree_DEM_buffer=70 export wbd_buffer=5000 +export thalweg_lateral_elev_threshold=3 #### geospatial parameters #### export max_split_distance_meters=1500 diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py index 71f73186e..961d39d01 100644 --- a/src/adjust_headwater_streams.py +++ b/src/adjust_headwater_streams.py @@ -29,10 +29,10 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): nws_lid_limited = nws_lid_limited.loc[nws_lid_limited.nws_lid!=''] nws_lid_limited = nws_lid_limited.drop(columns=['nws_lid']) - # Check for issues in nws_lid layer - if len(nws_lid_limited) < len(nws_lids): - missing_nws_lids = list(set(nws_lids.site_id) - set(nws_lid_limited.site_id)) - print (f"nws lid(s) {missing_nws_lids} missing from aggregate dataset in huc {huc}") + # Check for issues in nws_lid layer (now this reports back non-headwater nws_lids) + # if len(nws_lid_limited) < len(nws_lids): + # missing_nws_lids = list(set(nws_lids.site_id) - set(nws_lid_limited.site_id)) + # print (f"nws lid(s) {missing_nws_lids} missing from aggregate dataset in huc {huc}") # Combine NWM headwaters and AHPS sites to be snapped to NHDPlus HR segments headwater_pts = headwater_limited.append(nws_lid_limited) diff --git a/src/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py index 90c8cecbf..47cd2e209 100755 --- a/src/adjust_thalweg_lateral.py +++ b/src/adjust_thalweg_lateral.py @@ -7,42 +7,43 @@ import numpy as np @profile -def adjust_thalweg_laterally(elevation_raster, stream_raster, allocation_raster, cost_distance_raster, cost_distance_tolerance, dem_lateral_thalweg_adj): - +def adjust_thalweg_laterally(elevation_raster, stream_raster, allocation_raster, cost_distance_raster, cost_distance_tolerance, dem_lateral_thalweg_adj,lateral_elevation_threshold): + # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # # The following algorithm searches for the zonal minimum elevation in each pixel catchment # It updates the catchment_min_dict with this zonal minimum elevation value. @njit def make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, cost_tolerance, ndv): - for i,cm in enumerate(zone_window): + for i,elev_m in enumerate(zone_window): # If the zone really exists in the dictionary, compare elevation values. i = int(i) - cm = int(cm) - + elev_m = int(elev_m) + if (cost_window[i] <= cost_tolerance): if elevation_window[i] > 0: # Don't allow bad elevation values - if (cm in zone_min_dict): - - if (elevation_window[i] < zone_min_dict[cm]): + if (elev_m in zone_min_dict): + + if (elevation_window[i] < zone_min_dict[elev_m]): # If the elevation_window's elevation value is less than the zone_min_dict min, update the zone_min_dict min. - zone_min_dict[cm] = elevation_window[i] + zone_min_dict[elev_m] = elevation_window[i] else: - zone_min_dict[cm] = elevation_window[i] + zone_min_dict[elev_m] = elevation_window[i] + return(zone_min_dict) - + # Open the masked gw_catchments_pixels_masked and dem_thalwegCond_masked. elevation_raster_object = rasterio.open(elevation_raster) allocation_zone_raster_object = rasterio.open(allocation_raster) cost_distance_raster_object = rasterio.open(cost_distance_raster) - + meta = elevation_raster_object.meta.copy() meta['tiled'], meta['compress'] = True, 'lzw' - + # -- Create zone_min_dict -- # print("Create zone_min_dict") zone_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums. # Update catchment_min_dict with pixel sheds minimum. - + for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using elevation_raster_object as template. elevation_window = elevation_raster_object.read(1,window=window).ravel() # Define elevation_window. zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define zone_window. @@ -50,72 +51,69 @@ def make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. zone_min_dict = make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, int(cost_distance_tolerance), meta['nodata']) - - # ------------------------------------------------------------------------------------------------------------------------ # - + + # ------------------------------------------------------------------------------------------------------------------------ # + elevation_raster_object.close() allocation_zone_raster_object.close() cost_distance_raster_object.close() - + # ------------------------------------------- Assign zonal min to thalweg ------------------------------------------------ # @njit def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window): - + # Copy elevation values into new array that will store the minimized elevation values. dem_window_to_return = np.empty_like (dem_window) dem_window_to_return[:] = dem_window - - for i,cm in enumerate(zone_window): + + for i,elev_m in enumerate(zone_window): i = int(i) - cm = int(cm) + elev_m = int(elev_m) thalweg_cell = thalweg_window[i] # From flows_grid_boolean.tif (0s and 1s) if thalweg_cell == 1: # Make sure thalweg cells are checked. - if cm in zone_min_dict: - zone_min_elevation = zone_min_dict[cm] + if elev_m in zone_min_dict: + zone_min_elevation = zone_min_dict[elev_m] dem_thalweg_elevation = dem_window[i] - - elevation_difference = zone_min_elevation - dem_thalweg_elevation - - if zone_min_elevation < dem_thalweg_elevation and elevation_difference <= 5: + + elevation_difference = dem_thalweg_elevation - zone_min_elevation + + if (zone_min_elevation < dem_thalweg_elevation) and (elevation_difference <= lateral_elevation_threshold): dem_window_to_return[i] = zone_min_elevation return(dem_window_to_return) - + # Specify raster object metadata. elevation_raster_object = rasterio.open(elevation_raster) allocation_zone_raster_object = rasterio.open(allocation_raster) thalweg_object = rasterio.open(stream_raster) - + dem_lateral_thalweg_adj_object = rasterio.open(dem_lateral_thalweg_adj, 'w', **meta) - + for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template. dem_window = elevation_raster_object.read(1,window=window) # Define dem_window. window_shape = dem_window.shape dem_window = dem_window.ravel() - + zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define catchments_window. thalweg_window = thalweg_object.read(1,window=window).ravel() # Define thalweg_window. - + # Call numba-optimized function to reassign thalweg cell values to catchment minimum value. minimized_dem_window = minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window) minimized_dem_window = minimized_dem_window.reshape(window_shape).astype(np.float32) - dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1) - + dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1) + elevation_raster_object.close() allocation_zone_raster_object.close() cost_distance_raster_object.close() - - # Delete allocation_raster and distance_raster. - - - + + if __name__ == '__main__': - + # Parse arguments. parser = argparse.ArgumentParser(description='Adjusts the elevation of the thalweg to the lateral zonal minimum.') parser.add_argument('-e','--elevation_raster',help='Raster of elevation.',required=True) @@ -124,11 +122,9 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w parser.add_argument('-d','--cost_distance_raster',help='Raster of cost distances for the allocation raster.',required=True) parser.add_argument('-t','--cost_distance_tolerance',help='Tolerance in meters to use when searching for zonal minimum.',required=True) parser.add_argument('-o','--dem_lateral_thalweg_adj',help='Output elevation raster with adjusted thalweg.',required=True) - + parser.add_argument('-th','--lateral_elevation_threshold',help='Maximum difference between current thalweg elevation and lowest lateral elevation in meters.',required=True,type=int) + # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) - + adjust_thalweg_laterally(**args) - - - diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index 2c1081989..3675b88b8 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -89,6 +89,7 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): # Input wbd if isinstance(wbd,str): + layer = f"WBDHU{huc_unit}" wbd = gpd.read_file(wbd, layer=layer) elif isinstance(wbd,gpd.GeoDataFrame): @@ -175,6 +176,7 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'NHDPlusID': nhdplus_ids,'mainstem': mainstem_flag},crs=nwm_streams.crs,geometry='geometry') huc_intersection = huc_intersection.drop_duplicates() + del nwm_streams,wbd return huc_intersection @@ -182,7 +184,8 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): def collect_stream_attributes(nhdplus_vectors_dir, huc): - print ('Starting huc: ' + str(huc)) + print (f"Starting attribute collection for HUC {huc}",flush=True) + # Collecting NHDPlus HR attributes burnline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') vaa_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') @@ -214,10 +217,10 @@ def collect_stream_attributes(nhdplus_vectors_dir, huc): nhd_streams.to_file(nhd_streams_agg_fileName,driver=getDriver(nhd_streams_agg_fileName),index=False) del nhd_streams - print ('finished huc: ' + str(huc)) + print (f"finished attribute collection for HUC {huc}",flush=True) else: - print ('missing data for huc ' + str(huc)) + print (f"missing data for HUC {huc}",flush=True) def subset_stream_networks(args, huc): @@ -229,10 +232,11 @@ def subset_stream_networks(args, huc): nhdplus_vectors_dir = args[4] nwm_huc4_intersections_filename = args[5] - print("starting HUC " + str(huc),flush=True) + print(f"starting stream subset for HUC {huc}",flush=True) nwm_headwater_id = 'ID' ahps_headwater_id = 'nws_lid' headwater_pts_id = 'site_id' + column_order = ['pt_type', headwater_pts_id, 'mainstem', 'geometry'] nhd_streams_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') @@ -247,13 +251,20 @@ def subset_stream_networks(args, huc): huc_mask = huc_mask.reset_index(drop=True) if len(selected_wbd8.HUC8) > 0: + selected_wbd8 = selected_wbd8.reset_index(drop=True) # Identify FR/NWM headwaters and subset HR network - nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename) + try: + nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename) + except: + print (f"Error subsetting NHD HR network for HUC {huc}",flush=True) # Identify nhd mainstem streams - nhd_streams_all = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_fr,ahps_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True) + try: + nhd_streams_all = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_fr,ahps_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True) + except: + print (f"Error identifing MS network for HUC {huc}",flush=True) # Identify HUC8 intersection points nhd_huc8_intersections = find_nwm_incoming_streams(nhd_streams_all,selected_wbd8,8) @@ -265,17 +276,17 @@ def subset_stream_networks(args, huc): # Load nws lids nws_lids = gpd.read_file(ahps_filename, mask=huc_mask) - nws_lids = nws_lids.drop(columns=['name','nwm_featur']) + nws_lids = nws_lids.drop(columns=['name','nwm_feature_id','usgs_site_code','states','HUC8','is_headwater', 'is_colocated']) nws_lids = nws_lids.rename(columns={"nws_lid": headwater_pts_id}) nws_lids['pt_type'] = 'nws_lid' nws_lids['mainstem'] = True if (len(nwm_headwaters) > 0) or (len(nws_lids) > 0): + # Adjust FR/NWM headwater segments adj_nhd_streams_all, adj_nhd_headwater_points = adjust_headwaters(huc,nhd_streams_all,nwm_headwaters,nws_lids,headwater_pts_id) adj_nhd_headwater_points = adj_nhd_headwater_points[column_order] - nhd_huc8_intersections['pt_type'] = 'nhd_huc8_intersections' nhd_huc8_intersections = nhd_huc8_intersections.rename(columns={"NHDPlusID": headwater_pts_id}) nhd_huc8_intersections = nhd_huc8_intersections[column_order] @@ -290,11 +301,14 @@ def subset_stream_networks(args, huc): adj_nhd_headwater_points_all.to_file(adj_nhd_headwaters_all_fileName,driver=getDriver(adj_nhd_headwaters_all_fileName),index=False) del adj_nhd_streams_all, adj_nhd_headwater_points_all + else: - print (f"skipping headwater adjustments for HUC: {huc}") + + print (f"skipping headwater adjustments for HUC {huc}") del nhd_streams_fr + print(f"finished stream subset for HUC {huc}",flush=True) def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list): @@ -330,6 +344,7 @@ def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileNam def clean_up_intermediate_files(nhdplus_vectors_dir): for huc in os.listdir(nhdplus_vectors_dir): + agg_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') streams_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') headwater_adj_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') @@ -385,18 +400,19 @@ def clean_up_intermediate_files(nhdplus_vectors_dir): if not os.path.isfile(streams_adj_path): missing_subsets = missing_subsets + [huc] - print (f"running subset_results on {len(missing_subsets)} HUC4s") + print (f"Subsetting stream network for {len(missing_subsets)} HUC4s") num_workers=11 with ProcessPoolExecutor(max_workers=num_workers) as executor: # Preprocess nhd hr and add attributes - # collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] + collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] # Subset nhd hr network subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in missing_subsets] - # del wbd4,wbd8 + del wbd4,wbd8 - # Aggregate fr and ms nhd netowrks for entire nwm domain + # Aggregate subset nhd networks for entire nwm domain + print ('Aggregating subset NHD networks for entire NWM domain') aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list) # Remove intermediate files diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index ef7ff4837..65fd72d20 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -8,7 +8,8 @@ from utils.shared_functions import getDriver @profile -def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance,dissolveLinks=False): +def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance): + hucUnitLength = len(str(hucCode)) # Get wbd buffer @@ -88,6 +89,7 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l nhd_streams = nhd_streams.loc[nhd_streams.mainstem==1] if len(nhd_streams) > 0: + # Find incoming stream segments (to WBD buffer) and identify which are upstream threshold_segments = gpd.overlay(nhd_streams, wbd_buffer, how='symmetric_difference') from_list = threshold_segments.FromNode.to_list() @@ -148,8 +150,6 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l parser.add_argument('-gl','--great-lakes-filename',help='Great Lakes layer',required=True) parser.add_argument('-wb','--wbd-buffer-distance',help='WBD Mask buffer distance',required=True,type=int) parser.add_argument('-lb','--lake-buffer-distance',help='Great Lakes Mask buffer distance',required=True,type=int) - parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False) - args = vars(parser.parse_args()) @@ -174,6 +174,5 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l great_lakes_filename = args['great_lakes_filename'] wbd_buffer_distance = args['wbd_buffer_distance'] lake_buffer_distance = args['lake_buffer_distance'] - dissolveLinks = args['dissolve_links'] - subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance,dissolveLinks) \ No newline at end of file + subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance) diff --git a/src/output_cleanup.py b/src/output_cleanup.py index 19d74abc8..d73a2deee 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -39,7 +39,7 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod 'bathy_xs_area_hydroid_lookup.csv', 'src_full_crosswalked.csv', 'usgs_elev_table.csv', - 'hand_ref_elev_table.csv' + 'hand_ref_elev_table.csv', ] # List of files that will be saved during a viz run diff --git a/src/raster.py b/src/raster.py deleted file mode 100644 index a10a02430..000000000 --- a/src/raster.py +++ /dev/null @@ -1,462 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from osgeo import gdal, ogr, osr -import numpy as np -from os.path import isfile -from os import remove -from copy import deepcopy -from subprocess import call - -class Raster: - - """ - Raster object from single band rasters - - ... - - Attributes - ---------- - array : numpy array - raster data in numpy array - gt : list - geotransform. see gdal docs for more info. - proj : str - Projection string - ndv : number - No data value - des : str - band description - ct : gdal.colorTable - color table - dt : int - GDAL GDT data type. See notes. - dim : tuple - raster dimensions (bands, rows, columns) for multi-bands and (row, columns) for single band - nbands : int - number of bands. - nrows : int - number of rows - ncols : int - number of columns - - Methods - ------- - writeRaster(fileName,dtype=None,driverName='GTiff',verbose=False) - Write out raster file as geotiff - copy() - Copy method. Uses deepcopy since array data is present - clipToVector(raster_fileName,vector_fileName,verbose=False,output_fileType='GTiff',output_fileName=None,loadOutput=True) - Clips to vector using gdalwarp command line utility - - Raises - ------ - OSError - If fileName does not exist - ValueError - Raises if input raster - - See Also - -------- - - Notes - ----- - Currently only accepts single band rasters. - - Multiple datatypes are used. The table below shows which numpy datatypes correspond to the the GDAL types and their integer codes. - - # ## Integer Code ## ## Global Descriptor Table ## ## Numpy ## - # 0 GDT_Unknown NA - # 1 GDT_Byte np.bool, np.int ,np.int8, np.long, np.byte, np.uint8 - # 2 GDT_UInt16 np.uint16, np.ushort - # 3 GDT_Int16 np.int16, np.short - # 4 GDT_UInt32 np.uint32 , np.uintc - # 5 GDT_Int32 np.int32, np.intc - # 6 GDT_Float32 np.float32, np.single - # 7 GDT_Float64 np.float64, np.double - # 8 GDT_CInt16 np.complex64 - # 9 GDT_CInt32 np.complex64 - # 10 GDT_CFloat32 np.complex64 - # 11 GDT_CFloat64 np.complex128 - # 12 GDT_TypeCount NA - - Examples - -------- - Load Raster - >>> rasterData = fldpln.Raster('path/to/raster') - - """ - - # converts numpy datatypes and gdal GDT variables to integer codes - dataTypeConversion_name_to_integer = { np.int8 : 1 , np.bool : 1 , np.int : 1 , np.long : 1 , np.byte : 1, np.uint8 : 1, - np.uint16 : 2 , np.int16 : 3 , - np.ushort : 2 , np.short : 3 , - np.uint32 : 4 , np.uintc : 4 , np.int32 : 5 , np.intc : 5 , - np.float32 : 6 , np.single : 6 , - np.float64 : 7 , np.double : 7 , - np.complex64 : 10 , np.complex128 : 11 , - 0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7,8:8,9:9,10:10,11:11,12:12 } - - # converts integer codes and gdal GDT variables to numpy datatypes - dataTypeConversion_integer_to_name = {0 : np.complex128 , 1 : np.int8 , 2 : np.uint16 , 3 : np.int16 , - 4 : np.uint32 , 5 : np.int32 , 6 : np.float32 , 7 : np.float64 , - 8 : np.complex64 , 9 : np.complex64 , 10 : np.complex64 , 11 : np.complex128 } - - - def __init__(self,fileName,loadArray=True,dtype=None): - - """ - Initializes Raster Instance from single band raster - - ... - - Parameters - ---------- - fileName : str - File path to single band raster - dtype : numpy datatype or int, optional - Numpy, GDT, or integer code data type used to override the data type on the file when imported to array (Default Value = None, None sets to the numpy array data type to the one in the raster file) - - Returns - ------- - raster - Instance of raster object - - """ - - if not isfile(fileName): - raise OSError("File \'{}\' does not exist".format(fileName)) - - stream = gdal.Open(fileName,gdal.GA_ReadOnly) - - self.nrows,self.ncols = stream.RasterYSize , stream.RasterXSize - self.nbands = stream.RasterCount - - if loadArray: - self.array = stream.ReadAsArray() - - self.gt = stream.GetGeoTransform() - self.proj = stream.GetProjection() - - # if self.nbands > 1: - # raise ValueError('Raster class only accepts single band rasters for now') - - band = stream.GetRasterBand(1) - - self.ndv = band.GetNoDataValue() - - # set data type - if dtype is not None: # override raster file type - - # sets dt to dtype integer code - try: - self.dt = self.dataTypeConversion_name_to_integer[dtype] - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(dtype)) - - # sets array data type - if isinstance(dtype,type): # if dtype is a numpy data tpe - - self.array = self.array.astype(dtype) - - else: # if dtype is an integer code of GDAL GDT variable - - try: - self.array = self.array.astype(self.dataTypeConversion_integer_to_name[dtype]) - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(dtype)) - - else: # sets to default data type in raster file - - self.dt = band.DataType - - try: - self.array.astype(self.dataTypeConversion_integer_to_name[self.dt]) - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(self.dt)) - - try: - self.des = band.GetDescription() - except AttributeError: - pass - - try: - self.ct = stream.GetRasterColorTable() - except AttributeError: - pass - - # self.dim = self.array.shape - self.fileName = fileName - - stream,band = None,None - - - @property - def dim(self): - """ Property method for number of dimensions """ - - if self.nbands == 1: - DIMS = self.nrows,self.ncols - if self.nbands > 1: - DIMS = self.nbands,self.nrows,self.ncols - - return(DIMS) - - - def copy(self): - """ Copy method. Uses deepcopy since array data is present """ - return(deepcopy(self)) - - - def writeRaster(self,fileName,dtype=None,driverName='GTiff',verbose=False): - - """ - Write out raster file as geotiff - - Parameters - ---------- - fileName : str - File path to output raster to - dtype : numpy datatype or int, optional - Numpy, GDT, or integer code data type (Default Value = self.dt attribute value, otherwise uses data type from the numpy array) - driverName : str, optional - GDAL driver type. See gdal docs for more details. Only tested for GTiff. (Default Value = 'GTiff') - verbose : Boolean, optional - Verbose output (Default Value = False) - - Returns - ------- - None - - Raises - ------ - ValueError - Raises ValueError when the data type parameter is not recognized. See the help docs for raster class to see which numpy, gdal, or encoded values are accepted. - - Examples - -------- - Write Geotiff raster - >>> rasterData = fldpln.Raster('path/to/raster') - >>> rasterData.writeRaster('/different/path/to/raster',dtype=np.int8) - - """ - - driver = gdal.GetDriverByName(driverName) - - if dtype is None: - try: - dtype = self.dt - except AttributeError: - # dtype = gdal.GDT_Float64 - try: - dtype = self.dataTypeConversion_name_to_integer[self.array.dtype] - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from numpy array'.format(self.array.dtype)) - else: - try: - dtype = self.dataTypeConversion_name_to_integer[dtype] - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from numpy array'.format(self.array.dtype)) - - dataset = driver.Create(fileName, self.ncols, self.nrows, 1, dtype) - dataset.SetGeoTransform(self.gt) - dataset.SetProjection(self.proj) - band = dataset.GetRasterBand(1) - - # set color table and color interpretation - #print(band.__dict__) - try: - band.SetRasterColorTable(self.ct) - #band.SetRasterColorInterpretation(gdal.GCI_PaletteIndex) - except AttributeError: - pass - - try: - band.SetDescription(self.des) - except AttributeError: - pass - - band.SetNoDataValue(self.ndv) - band.WriteArray(self.array) - band, dataset = None,None # Close the file - - if verbose: - print("Successfully wrote out raster to {}".format(fileName)) - - def polygonize(self,vector_fileName,vector_driver,layer_name,verbose): - - gdal.UseExceptions() - - # get raster datasource - # - src_ds = gdal.Open( self.fileName ) - srcband = src_ds.GetRasterBand(1) - - # - # create output datasource - driver_ext_dict = {'ESRI Shapefile' : 'shp' , 'GPKG' : 'gpkg'} - - if vector_driver not in driver_ext_dict: - raise ValueError('Driver not found in {}'.format(driver_ext_dict)) - - drv = ogr.GetDriverByName(vector_driver) - dst_ds = drv.CreateDataSource( vector_fileName) - - srs = osr.SpatialReference() - srs.ImportFromWkt(self.proj) - - dst_layer = dst_ds.CreateLayer(layer_name, srs = srs, geom_type = ogr.wkbPolygon ) - - if verbose: - prog_func = gdal.TermProgress_nocb - else: - prog_func = None - - gdal.Polygonize( srcband, None, dst_layer, -1, ['8CONNECTED=8'], callback=prog_func ) - - @classmethod - def clipToVector(cls,raster_fileName,vector_fileName,output_fileName=None,output_fileType='GTiff',verbose=False): - """ - Clips to vector using gdalwarp command line utility - - ... - - Parameters - ---------- - raster_fileName : str - File path to raster to clip - vector_fileName : str - File path to vector layer to clip with - output_fileName : str - Set file path to output clipped raster (Default Value = None) - output_fileType : str - Set file type of output from GDAL drivers list (Default Value = 'GTiff') - verbose : Boolean - Verbose output (Default Value = False) - - Returns - ------- - raster : raster - Clipped raster layer - - Notes - ----- - gdalwarp utility must be installed and callable via a subprocess - - Examples - -------- - clip raster and don't return - >>> fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector','path/to/write/output/raster/to') - Clip raster and return but don't write - >>> clippedRaster = fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector') - - - """ - - # create temp output if none is desired - if output_fileName is None: - output_fileName = 'temp.tif' - - # generate command - command = ['gdalwarp','-overwrite','-of',output_fileType,'-cutline',vector_fileName,'-crop_to_cutline',raster_fileName,output_fileName] - - # insert quiet flag if not verbose - if not verbose: - command = command.insert(1,'-q') - - # call command - call(command) - - # remove temp file - if output_fileName is None: - remove(output_fileName) - - return(cls(output_fileName)) - - def getCoordinatesFromIndex(self,row,col): - """ - Returns coordinates in the rasters projection from a given multi-index - - """ - - # extract variables for readability - x_upper_limit, y_upper_limit = self.gt[0], self.gt[3] - x_resolution, y_resolution = self.gt[1], self.gt[5] - nrows, ncols = self.nrows, self.ncols - - x = x_upper_limit + (col * x_resolution) - y = y_upper_limit + (row * y_resolution) - - return(x,y) - - - def sampleFromCoordinates(self,x,y,returns='value'): - """ - Sample raster value from coordinates - ... - - Parameters - ---------- - raster_fileName : str - File path to raster to clip - vector_fileName : str - File path to vector layer to clip with - output_fileName : str - Set file path to output clipped raster (Default Value = None) - output_fileType : str - Set file type of output from GDAL drivers list (Default Value = 'GTiff') - verbose : Boolean - Verbose output (Default Value = False) - - Returns - ------- - raster : raster - Clipped raster layer - - Notes - ----- - gdalwarp utility must be installed and callable via a subprocess - - Examples - -------- - clip raster and don't return - >>> fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector','path/to/write/output/raster/to') - Clip raster and return but don't write - >>> clippedRaster = fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector') - - - """ - - # extract variables for readability - x_upper_limit, y_upper_limit = self.gt[0], self.gt[3] - x_resolution, y_resolution = self.gt[1], self.gt[5] - nrows, ncols = self.nrows, self.ncols - - # get upper left hand corner coordinates from the centroid coordinates of the upper left pixel - x_upper_limit = x_upper_limit - (x_resolution/2) - y_upper_limit = y_upper_limit - (y_resolution/2) - - # get indices - columnIndex = int( ( x - x_upper_limit) / x_resolution) - rowIndex = int( ( y - y_upper_limit) / y_resolution) - - # check indices lie within raster limits - columnIndexInRange = ncols > columnIndex >= 0 - rowIndexInRange = nrows > rowIndex >= 0 - - if (not columnIndexInRange) | (not rowIndexInRange): - raise ValueError("Row Index {} or column index {} not in raster range ({},{})".format(rowIndex,columnIndex,nrows,ncols)) - - # check value is not ndv - if self.array[rowIndex,columnIndex] == self.ndv: - raise ValueError("Sample value is no data at ({},{})".format(nrows,ncols)) - - # return if statements - if returns == 'value': - return(self.array[rowIndex,columnIndex]) - elif returns == 'multi-index': - return(rowIndex,columnIndex) - elif returns == 'ravel-index': - return(np.ravel_multi_index((rowIndex,columnIndex),(nrows,ncols))) - else: - raise ValueError('Enter valid returns argument') diff --git a/src/reachID_grid_to_vector_points.py b/src/reachID_grid_to_vector_points.py index 5dadd43c1..c77bcc732 100755 --- a/src/reachID_grid_to_vector_points.py +++ b/src/reachID_grid_to_vector_points.py @@ -1,17 +1,15 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -from osgeo import gdal import numpy as np import osgeo.ogr import osgeo.osr import sys - -import cProfile +import argparse from tqdm import tqdm import geopandas as gpd +from utils.shared_variables import PREP_PROJECTION from shapely.geometry import Point -from raster import Raster +import rasterio from utils.shared_functions import getDriver """ @@ -19,67 +17,55 @@ ./reachID_grid_to_vector_points.py """ +def convert_grid_cells_to_points(raster,index_option,output_points_filename=False): -path = sys.argv[1] -outputFileName = sys.argv[2] -writeOption = sys.argv[3] - -#r = gdal.Open(path) -#band = r.GetRasterBand(1) -boolean=Raster(path) - -#(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = r.GetGeoTransform() -(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = boolean.gt - -#a = band.ReadAsArray().astype(np.float) - -# indices = np.nonzero(a != band.GetNoDataValue()) -indices = np.nonzero(boolean.array >= 1) + # Input raster + if isinstance(raster,str): + raster = rasterio.open(raster,'r') -# Init the shapefile stuff.. -#srs = osgeo.osr.SpatialReference() -#srs.ImportFromWkt(r.GetProjection()) + elif isinstance(raster,rasterio.io.DatasetReader): + pass -#driver = osgeo.ogr.GetDriverByName('GPKG') -#shapeData = driver.CreateDataSource(outputFileName) + else: + raise TypeError("Pass raster dataset or filepath for raster") -#layer = shapeData.CreateLayer('ogr_pts', srs, osgeo.ogr.wkbPoint) -#layerDefinition = layer.GetLayerDefn() + (upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = raster.get_transform() + indices = np.nonzero(raster.read(1) >= 1) -#idField = osgeo.ogr.FieldDefn("id", osgeo.ogr.OFTInteger) -#layer.CreateField(idField) + id =[None] * len(indices[0]);points = [None]*len(indices[0]) -id =[None] * len(indices[0]);points = [None]*len(indices[0]) + # Iterate over the Numpy points.. + i = 1 + for y_index,x_index in zip(*indices): + x = x_index * x_size + upper_left_x + (x_size / 2) # add half the cell size + y = y_index * y_size + upper_left_y + (y_size / 2) # to center the point + points[i-1] = Point(x,y) + if index_option == 'reachID': + reachID = np.array(list(raster.sample((Point(x,y).coords), indexes=1))).item() # check this; needs to add raster cell value + index + id[i-1] = reachID*10000 + i #reachID + i/100 + elif (index_option == 'featureID') |(index_option == 'pixelID'): + id[i-1] = i + i += 1 -# Iterate over the Numpy points.. -i = 1 -for y_index,x_index in tqdm(zip(*indices),total=len(indices[0])): - x = x_index * x_size + upper_left_x + (x_size / 2) #add half the cell size - y = y_index * y_size + upper_left_y + (y_size / 2) #to centre the point + pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=PREP_PROJECTION,geometry='geometry') - # get raster value - #reachID = a[y_index,x_index] + if output_points_filename == False: + return pointGDF + else: + pointGDF.to_file(output_points_filename,driver=getDriver(output_points_filename),index=False) - #point = osgeo.ogr.Geometry(osgeo.ogr.wkbPoint) - #point.SetPoint(0, x, y) - points[i-1] = Point(x,y) +if __name__ == '__main__': - #feature = osgeo.ogr.Feature(layerDefinition) - #feature.SetGeometry(point) - #feature.SetFID(i) - if writeOption == 'reachID': - reachID = a[y_index,x_index] - id[i-1] = reachID - #feature.SetField("id",reachID) - elif (writeOption == 'featureID') |( writeOption == 'pixelID'): - #feature.SetField("id",i) - id[i-1] = i - #layer.CreateFeature(feature) + # Parse arguments + parser = argparse.ArgumentParser(description='Converts a raster to points') + parser.add_argument('-r','--raster',help='Raster to be converted to points',required=True,type=str) + parser.add_argument('-i', '--index-option',help='Indexing option',required=True,type=str,choices=['reachID','featureID','pixelID']) + parser.add_argument('-p', '--output-points-filename',help='Output points layer filename',required=False,type=str,default=False) - i += 1 + args = vars(parser.parse_args()) -pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=boolean.proj,geometry='geometry') -pointGDF.to_file(outputFileName,driver=getDriver(outputFileName),index=False) + raster = args['raster'] + index_option = args['index_option'] + output_points_filename = args['output_points_filename'] -print("Complete") -#shapeData.Destroy() + convert_grid_cells_to_points(raster,index_option,output_points_filename) diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py index 9614bfe32..c32afd990 100644 --- a/src/reduce_nhd_stream_density.py +++ b/src/reduce_nhd_stream_density.py @@ -7,6 +7,7 @@ import argparse import pygeos from shapely.wkb import dumps +from shapely.geometry import Point from utils.shared_functions import getDriver @@ -29,7 +30,7 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file for index, row in selected_wbd8.iterrows(): huc = row["HUC8"] - # Double check that this is a nested HUC (probably overkill) + # Double check that this is a nested HUC if huc.startswith(str(huc4)): huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8==huc] @@ -44,6 +45,9 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file streams_subset = gpd.read_file(nhd_streams_, mask = huc8_mask) else: streams_subset = nhd_streams.loc[nhd_streams.HUC8==huc].copy() + if headwaters_mask.is_headwater.dtype != 'int': headwaters_mask.is_headwater = headwaters_mask.is_headwater.astype('int') + if headwaters_mask.is_colocated.dtype != 'int': headwaters_mask.is_colocated = headwaters_mask.is_colocated.astype('int') + headwaters_mask = headwaters_mask.loc[headwaters_mask.is_headwater==True] if not streams_subset.empty: streams_subset[headwater_col] = False @@ -63,14 +67,14 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file # Add headwaters_id column streams_subset[id_col] = n - # Find stream segment closest to headwater point + distance_from_upstream = {} for index, point in headwaters_mask.iterrows(): # Convert headwaterpoint geometries to WKB representation - wkb_points = dumps(point.geometry) + wkb_point = dumps(point.geometry) # Create pygeos headwaterpoint geometries from WKB representation - pointbin_geom = pygeos.io.from_wkb(wkb_points) + pointbin_geom = pygeos.io.from_wkb(wkb_point) # Distance to each stream segment distances = pygeos.measurement.distance(streambin_geom, pointbin_geom) @@ -78,9 +82,30 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file # Find minimum distance min_index = np.argmin(distances) + headwater_point_name = point[headwater_id] + + # Find stream segment closest to headwater point + if mainstem_flag==True: + + if point.is_colocated==True: + + closest_stream = streams_subset.iloc[min_index] + distance_to_line = point.geometry.distance(Point(closest_stream.geometry.coords[-1])) + + print(f"{point.nws_lid} distance on line {closest_stream.NHDPlusID}: {np.round(distance_to_line,1)}") + if not closest_stream.NHDPlusID in distance_from_upstream.keys(): + + distance_from_upstream[closest_stream.NHDPlusID] = [point.nws_lid,distance_to_line] + + elif distance_from_upstream[closest_stream.NHDPlusID][1] > distance_to_line: + + distance_from_upstream[closest_stream.NHDPlusID] = [point.nws_lid,distance_to_line] + + headwater_point_name = distance_from_upstream[closest_stream.NHDPlusID][0] + # Closest segment to headwater streams_subset.loc[min_index,headwater_col] = True - streams_subset.loc[min_index,id_col] = point[headwater_id] + streams_subset.loc[min_index,id_col] = headwater_point_name headwater_streams = headwater_streams.append(streams_subset[['NHDPlusID',headwater_col,id_col,'HUC8']]) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 6e9cfca7b..cab360898 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -132,9 +132,8 @@ python3 -m memory_profiler $srcDir/burn_in_levees.py -dem $outputHucDataDir/dem_ Tcount ## DEM Reconditioning ## -# Using AGREE methodology, hydroenforce the DEM so that it is consistent -# with the supplied stream network. This allows for more realistic catchment -# delineation which is ultimately reflected in the output FIM mapping. +# Using AGREE methodology, hydroenforce the DEM so that it is consistent with the supplied stream network. +# This allows for more realistic catchment delineation which is ultimately reflected in the output FIM mapping. echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer"$stopDiv date -u Tstart @@ -191,7 +190,7 @@ Tcount echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber"$stopDiv date -u Tstart -python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif +python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif -th $thalweg_lateral_elev_threshold Tcount ## MASK BURNED DEM FOR STREAMS ONLY ### @@ -277,7 +276,7 @@ echo -e $startDiv"Vectorize Pixel Centroids $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/flows_points_pixels.gpkg ] && \ -$srcDir/reachID_grid_to_vector_points.py $demDerived_streamPixels $outputHucDataDir/flows_points_pixels.gpkg featureID +$srcDir/reachID_grid_to_vector_points.py -r $demDerived_streamPixels -i featureID -p $outputHucDataDir/flows_points_pixels.gpkg Tcount ## GAGE WATERSHED FOR PIXELS ## diff --git a/src/split_flows.py b/src/split_flows.py index 67b69f7e9..9d51065dd 100755 --- a/src/split_flows.py +++ b/src/split_flows.py @@ -181,27 +181,19 @@ else: print ('Error: Could not add network attributes to stream segments') -# Get Outlet Point Only -#outlet = OrderedDict() -#for i,segment in split_flows_gdf.iterrows(): -# outlet[segment.geometry.coords[-1]] = segment[hydro_id] - -#hydroIDs_points = [hidp for hidp in outlet.values()] -#split_points = [Point(*point) for point in outlet] - # Get all vertices split_points = OrderedDict() -for row in split_flows_gdf[['geometry',hydro_id, 'NextDownID']].iterrows(): - lineString = row[1][0] +for index, segment in split_flows_gdf.iterrows(): + lineString = segment.geometry for point in zip(*lineString.coords.xy): if point in split_points: - if row[1][2] == split_points[point]: + if segment.NextDownID == split_points[point]: pass else: - split_points[point] = row[1][1] + split_points[point] = segment[hydro_id] else: - split_points[point] = row[1][1] + split_points[point] = segment[hydro_id] hydroIDs_points = [hidp for hidp in split_points.values()] split_points = [Point(*point) for point in split_points] diff --git a/tools/thalweg_drop_check.py b/tools/thalweg_drop_check.py new file mode 100644 index 000000000..c56d743e5 --- /dev/null +++ b/tools/thalweg_drop_check.py @@ -0,0 +1,385 @@ +#!/usr/bin/env python3 + +import os +import sys +import geopandas as gpd +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION +from shapely.geometry import Point, LineString +import rasterio +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from collections import deque +from os.path import join +from multiprocessing import Pool +from utils.shared_functions import getDriver +from rasterio import features +from reachID_grid_to_vector_points import convert_grid_cells_to_points +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Plot Rating Curves and Compare to USGS Gages + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + output_dir : str + Stream layer to be evaluated. + stream_type : str + File name of USGS rating curves. + point_density : str + Elevation sampling density. + number_of_jobs : str + Number of jobs. +""" + + +def compare_thalweg(args): + + huc_dir = args[0] + stream_type = args[1] + point_density = args[2] + huc = args[3] + dem_meters_filename = args[4] + dem_lateral_thalweg_adj_filename = args[5] + dem_thalwegCond_filename = args[6] + profile_plots_filename = args[7] + profile_gpkg_filename = args[8] + profile_table_filename = args[9] + flows_grid_boolean_filename = args[10] + + if stream_type == 'derived': + + dem_derived_reaches_filename = os.path.join(huc_dir,'demDerived_reaches_split.gpkg') + streams = gpd.read_file(dem_derived_reaches_filename) + nhd_headwater_filename = os.path.join(huc_dir,'nhd_headwater_points_subset.gpkg') + wbd_filename = os.path.join(huc_dir,'wbd.gpkg') + wbd = gpd.read_file(wbd_filename) + headwaters_layer = gpd.read_file(nhd_headwater_filename,mask=wbd) + headwater_list = headwaters_layer.loc[headwaters_layer.pt_type == 'nws_lid'] + stream_id = 'HydroID' + + elif stream_type == 'burnline': + + nhd_reaches_filename = os.path.join(huc_dir,'NHDPlusBurnLineEvent_subset.gpkg') + nhd_reaches = gpd.read_file(nhd_reaches_filename) + streams = nhd_reaches.copy() + headwaters_layer = None + + # Get lists of all complete reaches using headwater attributes + headwater_list = streams.loc[streams.nws_lid!=''].nws_lid + stream_id = 'NHDPlusID' + + headwater_col = 'is_headwater' + streams[headwater_col] = False + headwater_list = headwater_list.reset_index(drop=True) + + if stream_type == 'derived': + streams['nws_lid'] = '' + + if streams.NextDownID.dtype != 'int': streams.NextDownID = streams.NextDownID.astype(int) + + min_dist = np.empty(len(headwater_list)) + streams['min_dist'] = 1000 + + for i, point in headwater_list.iterrows(): + streams['min_dist'] = [point.geometry.distance(line) for line in streams.geometry] + streams.loc[streams.min_dist==np.min(streams.min_dist),'nws_lid'] = point.site_id + + headwater_list = headwater_list.site_id + + streams.set_index(stream_id,inplace=True,drop=False) + + # Collect headwater streams + single_stream_paths = [] + dem_meters = rasterio.open(dem_meters_filename,'r') + index_option = 'reachID' + for index, headwater_site in enumerate(headwater_list): + stream_path = get_downstream_segments(streams.copy(),'nws_lid', headwater_site,'downstream',stream_id,stream_type) + stream_path = stream_path.reset_index(drop=True) + stream_path = stream_path.sort_values(by=['downstream_count']) + stream_path = stream_path.loc[stream_path.downstream==True] + if stream_type == 'burnline': + geom_value = [] + for index, segment in stream_path.iterrows(): + lineString = LineString(segment.geometry.coords[::-1]) + geom_value = geom_value + [(lineString, segment.downstream_count)] + nhd_reaches_raster = features.rasterize(shapes=geom_value , out_shape=[dem_meters.height, dem_meters.width],fill=dem_meters.nodata,transform=dem_meters.transform, all_touched=True, dtype=np.float32) + flow_bool = rasterio.open(flows_grid_boolean_filename) + flow_bool_data = flow_bool.read(1) + nhd_reaches_raster = np.where(flow_bool_data == int(0), -9999.0, (nhd_reaches_raster).astype(rasterio.float32)) + out_dem_filename = os.path.join(huc_dir,'NHDPlusBurnLineEvent_raster.tif') + with rasterio.open(out_dem_filename, "w", **dem_meters.profile, BIGTIFF='YES') as dest: + dest.write(nhd_reaches_raster, indexes = 1) + stream_path = convert_grid_cells_to_points(out_dem_filename,index_option) + stream_path["headwater_path"] = headwater_site + single_stream_paths = single_stream_paths + [stream_path] + print(f"length of {headwater_site} path: {len(stream_path)}") + + # Collect elevation values from multiple grids along each individual reach point + dem_lateral_thalweg_adj = rasterio.open(dem_lateral_thalweg_adj_filename,'r') + dem_thalwegCond = rasterio.open(dem_thalwegCond_filename,'r') + thalweg_points = gpd.GeoDataFrame() + for path in single_stream_paths: + split_points = [] + stream_ids = [] + dem_m_elev = [] + dem_burned_filled_elev = [] + dem_lat_thal_adj_elev = [] + dem_thal_adj_elev = [] + headwater_path = [] + index_count = [] + for index, segment in path.iterrows(): + if stream_type == 'derived': + linestring = segment.geometry + if point_density == 'midpoints': + midpoint = linestring.interpolate(0.5,normalized=True) + stream_ids = stream_ids + [segment[stream_id]] + split_points = split_points + [midpoint] + index_count = index_count + [segment.downstream_count] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(midpoint).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(midpoint).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(midpoint).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] + elif point_density == 'all_points': + count=0 + for point in zip(*linestring.coords.xy): + stream_ids = stream_ids + [segment[stream_id]] + split_points = split_points + [Point(point)] + count = count + 1 + index_count = index_count + [segment.downstream_count*1000 + count] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(point).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(point).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(point).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] + elif stream_type == 'burnline': + stream_ids = stream_ids + [segment['id']] + split_points = split_points + [Point(segment.geometry)] + index_count = index_count + [segment['id']] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(segment.geometry).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(segment.geometry).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(segment.geometry).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] + # gpd.GeoDataFrame({**data, "source": "dem_m"}) + dem_m_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_m', 'elevation_m': dem_m_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') + dem_lat_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_lat_thal_adj', 'elevation_m': dem_lat_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') + dem_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'thal_adj_dem', 'elevation_m': dem_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') + for raster in [dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts]: + raster = raster.sort_values(by=['index_count']) + raster.set_index('index_count',inplace=True,drop=True) + raster = raster.reset_index(drop=True) + raster.index.names = ['index_count'] + raster = raster.reset_index(drop=False) + thalweg_points = thalweg_points.append(raster,ignore_index = True) + del raster + del dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts + + del dem_lateral_thalweg_adj,dem_thalwegCond,dem_meters + + try: + # Remove nodata_pts and convert elevation to ft + thalweg_points = thalweg_points.loc[thalweg_points.elevation_m > 0.0] + thalweg_points.elevation_m = np.round(thalweg_points.elevation_m,3) + thalweg_points['elevation_ft'] = np.round(thalweg_points.elevation_m*3.28084,3) + + # Plot thalweg profile + plot_profile(thalweg_points, profile_plots_filename) + + # Filter final thalweg ajdusted layer + thal_adj_points = thalweg_points.loc[thalweg_points.source=='thal_adj_dem'].copy() + # thal_adj_points.to_file(profile_gpkg_filename,driver=getDriver(profile_gpkg_filename)) + + # Identify significant rises/drops in elevation + thal_adj_points['elev_change'] = thal_adj_points.groupby(['headwater_path', 'source'])['elevation_m'].apply(lambda x: x - x.shift()) + elev_changes = thal_adj_points.loc[(thal_adj_points.elev_change<=-lateral_elevation_threshold) | (thal_adj_points.elev_change>0.0)] + + if not elev_changes.empty: + # elev_changes.to_csv(profile_table_filename,index=False) + elev_changes.to_file(profile_gpkg_filename,index=False,driver=getDriver(profile_gpkg_filename)) + + + # Zoom in to plot only areas with steep elevation changes + # select_streams = elev_changes.stream_id.to_list() + # downstream_segments = [index + 1 for index in select_streams] + # upstream_segments = [index - 1 for index in select_streams] + # select_streams = list(set(upstream_segments + downstream_segments + select_streams)) + # thal_adj_points_select = thal_adj_points.loc[thal_adj_points.stream_id.isin(select_streams)] + # plot_profile(thal_adj_points_select, profile_plots_filename_zoom) + + except: + print(f"huc {huc} has {len(thalweg_points)} thalweg points") + +def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stream_id,stream_type): + + streams[flag_column] = False + streams['downstream_count'] = -9 + streams.loc[streams[headwater_col]==headwater_id,flag_column] = True + streams.loc[streams[headwater_col]==headwater_id,'downstream_count'] = 0 + count = 0 + + Q = deque(streams.loc[streams[headwater_col]==headwater_id,stream_id].tolist()) + visited = set() + + while Q: + q = Q.popleft() + + if q in visited: + continue + + visited.add(q) + + count = count + 1 + if stream_type == 'burnline': + + toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] + downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() + + # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) + if len(set(downstream_ids)) > 1: # special case: remove duplicate NHDPlusIDs + + relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] + + else: + + relevant_ids = downstream_ids + + elif stream_type == 'derived': + + toNode = streams.loc[q,['NextDownID']].item() + relevant_ids = streams.loc[streams[stream_id] == toNode,:].index.tolist() + + streams.loc[relevant_ids,flag_column] = True + streams.loc[relevant_ids,'downstream_count'] = count + + for i in relevant_ids: + + if i not in visited: + Q.append(i) + + streams = streams.loc[streams[flag_column],:] + + return streams + + +def plot_profile(elevation_table,profile_plots_filename): + num_plots = len(elevation_table.headwater_path.unique()) + unique_rasters = elevation_table.source.unique() + if num_plots > 3: + columns = int(np.ceil(num_plots / 3)) + else: + columns = 1 + # palette = dict(zip(unique_rasters, sns.color_palette(n_colors=len(unique_rasters)))) + # palette.update({'dem_m':'gray'}) + sns.set(style="ticks") + if len(unique_rasters) > 1: + g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", hue_order=['dem_m', 'dem_lat_thal_adj', 'thal_adj_dem'], sharex=False, sharey=False,col_wrap=columns) + else: + g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", sharex=False, sharey=False,col_wrap=columns) + g.map(sns.lineplot, "index_count", "elevation_ft", palette="tab20c") + g.set_axis_labels(x_var="Longitudinal Profile (index)", y_var="Elevation (ft)") + # Iterate thorugh each axis to get individual y-axis bounds + for ax in g.axes.flat: + mins = [] + maxes = [] + for line in ax.lines: + mins = mins + [min(line.get_ydata())] + maxes = maxes + [max(line.get_ydata())] + min_y = min(mins) - (max(maxes) - min(mins))/10 + max_y = max(maxes) + (max(maxes) - min(mins))/10 + ax.set_ylim(min_y,max_y) + # if len(unique_rasters) > 1: + # ax.lines[0].set_linestyle("--") + # ax.lines[0].set_color('gray') + # box = ax.get_position() + # ax.set_position([box.x0, box.y0 + box.height * 0.1,box.width, box.height * 0.9]) + # Adjust the arrangement of the plots + # g.fig.tight_layout(w_pad=5) #w_pad=2 + g.add_legend() + # plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.subplots_adjust(bottom=0.25) + plt.savefig(profile_plots_filename) + plt.close() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str) + # parser.add_argument('-rasters','--raster-list',help='list of rasters to be evaluated',required=True,type=str) + parser.add_argument('-stream_type','--stream-type',help='stream layer to be evaluated',required=True,type=str,choices=['derived','burnline']) + parser.add_argument('-point_density','--point-density',help='elevation sampling density',required=True,type=str,choices=['midpoints','all_points']) + parser.add_argument('-th','--elevation_threshold',help='significant elevation drop threshold in meters.',required=True) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + output_dir = args['output_dir'] + # raster_list = args['raster_list'] + stream_type = args['stream_type'] + point_density = args['point_density'] + number_of_jobs = args['number_of_jobs'] + + # dem_meters_dir = os.environ.get('dem_meters') + + plots_dir = join(output_dir,'plots') + os.makedirs(plots_dir, exist_ok=True) + spatial_dir = os.path.join(output_dir,'spatial_layers') + os.makedirs(spatial_dir, exist_ok=True) + + # Open log file + sys.__stdout__ = sys.stdout + log_file = open(join(output_dir,'thalweg_profile_comparison.log'),"w") + sys.stdout = log_file + + procs_list = [] + huc_list = os.listdir(fim_dir) + for huc in huc_list: + if huc != 'logs': + + huc_dir = os.path.join(fim_dir,huc) + flows_grid_boolean_filename = os.path.join(huc_dir,'flows_grid_boolean.tif') + dem_meters_filename = os.path.join(huc_dir,'dem_meters.tif') + dem_lateral_thalweg_adj_filename = os.path.join(huc_dir,'dem_lateral_thalweg_adj.tif') + dem_thalwegCond_filename = os.path.join(huc_dir,'dem_thalwegCond.tif') + profile_plots_filename = os.path.join(plots_dir,f"profile_drop_plots_{huc}_{point_density}_{stream_type}.png") + profile_gpkg_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.gpkg") + profile_table_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.csv") + + procs_list.append([huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename,profile_gpkg_filename,profile_table_filename,flows_grid_boolean_filename]) + + # Initiate multiprocessing + print(f"Generating thalweg elevation profiles for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + # Get elevation values along thalweg for each headwater stream path + pool.map(compare_thalweg, procs_list) + + # Append all elevation change spatial layers to a single gpkg + spatial_list = os.listdir(spatial_dir) + agg_thalweg_elevations_gpkg_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.gpkg") + agg_thalweg_elevation_table_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.csv") + for layer in spatial_list: + + huc_gpd = gpd.read_file(os.path.join(spatial_dir,layer)) + + # Write aggregate layer + if os.path.isfile(agg_thalweg_elevations_gpkg_fileName): + huc_gpd.to_file(agg_thalweg_elevations_gpkg_fileName,driver=getDriver(agg_thalweg_elevations_gpkg_fileName),index=False, mode='a') + else: + huc_gpd.to_file(agg_thalweg_elevations_gpkg_fileName,driver=getDriver(agg_thalweg_elevations_gpkg_fileName),index=False) + + del huc_gpd + + # Create csv of elevation table + huc_table = gpd.read_file(agg_thalweg_elevations_gpkg_fileName) + huc_table.to_csv(agg_thalweg_elevation_table_fileName,index=False) + + # Close log file + sys.stdout = sys.__stdout__ + log_file.close()