From ca8cb7e8000befe1b05495277f0b2d362565a860 Mon Sep 17 00:00:00 2001 From: Brad Date: Thu, 10 Jun 2021 10:54:32 -0500 Subject: [PATCH] Feature to evaluate alternative CatFIM technique performance Added eval_catfim_alt.py to evaluate performance of alternative CatFIM techniques. This resolves #414. --- CHANGELOG.md | 8 ++ tools/eval_alt_catfim.py | 221 +++++++++++++++++++++++++++++++++ tools/synthesize_test_cases.py | 2 - 3 files changed, 229 insertions(+), 2 deletions(-) create mode 100644 tools/eval_alt_catfim.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 81f2e3962..a12d1911c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+## v3.0.19.0 - 2021-06-10 - [PR #415](https://github.com/NOAA-OWP/cahaba/pull/415) + +Feature to evaluate performance of alternative CatFIM techniques. + +## Additions +- Added `eval_catfim_alt.py` to evaluate performance of alternative CatFIM techniques. + +

## v3.0.18.0 - 2021-06-09 - [PR #404](https://github.com/NOAA-OWP/cahaba/pull/404) To help analyze the memory consumption of the Fim Run process, the python module `memory-profiler` has been added to give insights into where peak memory usage is with in the codebase. diff --git a/tools/eval_alt_catfim.py b/tools/eval_alt_catfim.py new file mode 100644 index 000000000..ffe86cf0e --- /dev/null +++ b/tools/eval_alt_catfim.py @@ -0,0 +1,221 @@ + +import os +import argparse +from multiprocessing import Pool +import csv +import json + +from tools_shared_variables import TEST_CASES_DIR +from tools_shared_functions import compute_contingency_stats_from_rasters + + +def create_master_metrics_csv_alt(master_metrics_csv_output, json_list, version): + + # Construct header + metrics_to_write = ['true_negatives_count', + 'false_negatives_count', + 'true_positives_count', + 'false_positives_count', + 'contingency_tot_count', + 'cell_area_m2', + 'TP_area_km2', + 'FP_area_km2', + 'TN_area_km2', + 'FN_area_km2', + 'contingency_tot_area_km2', + 'predPositive_area_km2', + 'predNegative_area_km2', + 'obsPositive_area_km2', + 'obsNegative_area_km2', + 'positiveDiff_area_km2', + 'CSI', + 'FAR', + 'TPR', + 'TNR', + 'PPV', + 'NPV', + 'ACC', + 'Bal_ACC', + 'MCC', + 'EQUITABLE_THREAT_SCORE', + 'PREVALENCE', + 'BIAS', + 'F1_SCORE', + 'TP_perc', + 'FP_perc', + 'TN_perc', + 'FN_perc', + 'predPositive_perc', + 'predNegative_perc', + 'obsPositive_perc', + 'obsNegative_perc', + 'positiveDiff_perc', + 'masked_count', + 'masked_perc', + 'masked_area_km2' + ] + + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] + list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config'] + ["calibrated"]] + + + + for full_json_path in json_list: + + # Parse variables from json path. + split_json_handle = os.path.split(full_json_path)[1].split('_') + + benchmark_source = split_json_handle[2] + huc = split_json_handle[1] + nws_lid = split_json_handle[0] + magnitude = split_json_handle[3].replace('.json', '') + + real_json_path = os.path.join(os.path.split(full_json_path)[0], nws_lid + '_b0m_stats.json') + + sub_list_to_append = [version, nws_lid, magnitude, huc] + + stats_dict = json.load(open(real_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(real_json_path) + sub_list_to_append.append('NA') + sub_list_to_append.append(benchmark_source) + sub_list_to_append.append('MS') + sub_list_to_append.append('yes') + + list_to_write.append(sub_list_to_append) + + + with open(master_metrics_csv_output, 'w', newline='') as csvfile: + csv_writer = csv.writer(csvfile) + csv_writer.writerows(list_to_write) + + +def process_alt_comparison(args): + + predicted_raster_path = args[0] + benchmark_raster_path = args[1] + agreement_raster = args[2] + stats_csv = args[3] + stats_json = args[4] + mask_values = args[5] + stats_modes_list = args[6] + test_id = args[7] + mask_dict = args[8] + + compute_contingency_stats_from_rasters(predicted_raster_path, + benchmark_raster_path, + agreement_raster, + stats_csv=stats_csv, + stats_json=stats_json, + mask_values=[], + stats_modes_list=stats_modes_list, + test_id=test_id, + mask_dict=mask_dict, + ) + + print("Finished processing " + agreement_raster) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Produces metrics for alternative CatFIM.') + parser.add_argument('-d','--catfim-directory',help='Path to directory storing CatFIM outputs. This is the most parent dir, usually named by a version.',required=True) + parser.add_argument('-w','--output-workspace',help='Add a special name to the end of the branch.',required=True, default="") + parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=False,default=None) + parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") + + # Assign variables from arguments. + args = vars(parser.parse_args()) + catfim_dir = args['catfim_directory'] + output_workspace = args['output_workspace'] + job_number = int(args['job_number']) + master_metrics_csv = args['master_metrics_csv'] + + if master_metrics_csv == None: + master_metrics_csv = os.path.join(output_workspace, 'master_metrics.csv') + + if not os.path.exists(catfim_dir): + print("CatFIM directory: " + catfim_dir + " does not exist.") + quit + + if not os.path.exists(output_workspace): + os.mkdir(output_workspace) + + catfim_dir_list = os.listdir(catfim_dir) + + procs_list = [] + json_list = [] + + for huc in catfim_dir_list: + if len(huc) == 8: + + huc_workspace = os.path.join(output_workspace, huc) + if not os.path.exists(huc_workspace): + os.mkdir(huc_workspace) + + huc_dir_path = os.path.join(catfim_dir, huc) + + # List AHPS sites. + site_list = os.listdir(huc_dir_path) + + # Loop through AHPS sites. + for site in site_list: + site_dir = os.path.join(huc_dir_path, site) + + site_workspace = os.path.join(huc_workspace, site) + if not os.path.exists(site_workspace): + os.mkdir(site_workspace) + + for category in ['action', 'minor', 'moderate', 'major']: + # Presumptiously define inundation grid path. + category_grid_path = os.path.join(site_dir, site + '_' + category + '_extent_' + huc + '.tif') + + if os.path.exists(category_grid_path): + + site_category_workspace = os.path.join(site_workspace, category) + if not os.path.exists(site_category_workspace): + os.mkdir(site_category_workspace) + + # Map path to benchmark data, both NWS and USGS. + for benchmark_type in ['nws', 'usgs']: + benchmark_grid = os.path.join(TEST_CASES_DIR, benchmark_type + '_test_cases', 'validation_data_' + benchmark_type, huc, site, category, 'ahps_' + site + '_huc_' + huc + '_extent_' + category + '.tif') + + if os.path.exists(benchmark_grid): + + # Create dir in output workspace for results. + file_handle = site + '_' + huc + '_' + benchmark_type + '_' + category + + predicted_raster_path = category_grid_path + benchmark_raster_path = benchmark_grid + agreement_raster = os.path.join(site_category_workspace, file_handle + '.tif') + stats_csv = os.path.join(site_category_workspace, file_handle + '.csv') + stats_json = os.path.join(site_category_workspace, file_handle + '.json') + mask_values=None + stats_modes_list=['total_area'] + test_id='' + mask_dict={'levees': {'path': '/data/test_cases/other/zones/leveed_areas_conus.shp', 'buffer': None, 'operation': 'exclude'}, + 'waterbodies': {'path': '/data/test_cases/other/zones/nwm_v2_reservoirs.shp', 'buffer': None, 'operation': 'exclude'}, + site: {'path': '/data/test_cases/{benchmark_type}_test_cases/validation_data_{benchmark_type}/{huc}/{site}/{site}_domain.shp'.format(benchmark_type=benchmark_type, site=site, category=category, huc=huc), 'buffer': None, 'operation': 'include'}} + + json_list.append(stats_json) + + # Either add to list to multiprocess or process serially, depending on user specification. + if job_number > 1: + procs_list.append([predicted_raster_path, benchmark_raster_path, agreement_raster,stats_csv,stats_json,mask_values,stats_modes_list,test_id, mask_dict]) + else: + process_alt_comparison([predicted_raster_path, benchmark_raster_path, agreement_raster,stats_csv,stats_json, mask_values,stats_modes_list,test_id, mask_dict]) + + # Multiprocess. + if job_number > 1: + with Pool(processes=job_number) as pool: + pool.map(process_alt_comparison, procs_list) + + # Merge stats into single file. + version = os.path.split(output_workspace)[1] + create_master_metrics_csv_alt(master_metrics_csv, json_list, version) + + + + \ No newline at end of file diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py index de035359c..9fef7841d 100755 --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -175,8 +175,6 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include next(reader) for row in reader: flow = row[1] - if nws_lid == 'mcc01': - print(flow) stats_dict = json.load(open(full_json_path)) for metric in metrics_to_write: