Skip to content

Commit

Permalink
Updated MCS tracking for MCSMIP. (#69)
Browse files Browse the repository at this point in the history
1. Removed unused PF thresholds and added new ones in the output file global attributes in: robustmcspf_saag.py.
2. Added two config examples for DYAMOND:
- config_dyamond_summer_obs.yml
- config_dyamond_summer_ifs.yml
3. Changed output base_time type to datetime64[ns] to suppress warning message from Xarray in: tracksingle_drift.py
  • Loading branch information
feng045 authored Jul 14, 2023
1 parent c25cf69 commit 261bf58
Show file tree
Hide file tree
Showing 4 changed files with 315 additions and 14 deletions.
153 changes: 153 additions & 0 deletions config/config_dyamond_summer_ifs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
---
# DYAMOND MCS tracking configuration file
# Tracking uses collocated Tb + Precipitation

# Processing steps:
run_idfeature : True
run_tracksingle : True
run_gettracks : True
run_trackstats : True
run_identifymcs : True
run_matchpf : True
run_robustmcs : True
run_mapfeature : True
run_speed : True

# Parallel processing set up
# run_parallel: 1 (local cluster), 2 (Dask MPI)
run_parallel: 1
nprocesses : 128 # Number of processors to use if run_parallel=1
dask_tmp_dir: '/tmp' # Dask temporary directory if run_parallel=1
timeout: 360 # [seconds] Dask timeout limit

# Start/end date and time
startdate: '20160801.0000'
enddate: '20160910.0000'

# Specify tracking input data date/time string format
# This is the preprocessed file that contains Tb & rainrate
# E.g., databasename20181101.011503.nc --> yyyymodd.hhmmss
# E.g., databasename2018-11-01_01:15:00 --> yyyy-mo-dd_hh:mm:ss
time_format: 'yyyymoddhh'
databasename: 'pr_rlut_ifs_'

# Input files directory
clouddata_path: '/pscratch/sd/f/feng045/DYAMOND/Summer/IFS/olr_pcp/'
# Working directory for the tracking data
root_path: '/pscratch/sd/f/feng045/DYAMOND/Summer/IFS/'
# Working sub-directory names
tracking_path_name: 'tracking'
stats_path_name: 'stats'
pixel_path_name: 'mcstracking'

# Land mask file
landmask_filename: '/global/cfs/cdirs/m1867/gsharing/MCSMIP/DYAMOND/maps/IMERG_landmask_180W-180E_60S-60N.nc'
# landmask_filename: '/global/cfs/cdirs/m1867/gsharing/MCSMIP/DYAMOND/maps/IMERG_landmask_0-360_60S-60N.nc'
landmask_varname: 'landseamask'
landmask_x_dimname: 'lon'
landmask_y_dimname: 'lat'
landmask_x_coordname: 'lon'
landmask_y_coordname: 'lat'
landfrac_thresh: [0, 90] # Define the range of fraction for land (depends on what value is land the landmask file)

# Input dataset structure
pixel_radius: 10.0 # [km] Spatial resolution of the input data
datatimeresolution: 1.0 # [hour] Temporal resolution of the input data
# Variable names in the input data
olr2tb: True
olr_varname: 'ttr'
pcp_varname: 'tp'
clouddatasource: 'model'
# pfdatasource: 'imerg'
time_dimname: 'time'
x_dimname: 'lon'
y_dimname: 'lat'
bnds_dimname: 'bnds'
time_coordname: 'time'
x_coordname: 'lon'
y_coordname: 'lat'

# Specify types of feature being tracked
# This adds additional feature-specific statistics to be computed
feature_type: 'tb_pf'

# Cloud identification parameters
mincoldcorepix: 4 # Minimum number of pixels for the cold core
smoothwindowdimensions: 10 # Dimension of the Box2DKernel filter on Tb.
medfiltsize: 5 # Window size to perform medfilt2d to fill missing Tb pixels, must be an odd number
geolimits: [-60, -360, 60, 360] # [lat_min, lon_min, lat_max, lon_max] 4-element array to subset domain boundaries
area_thresh: 800 # [km^2] Minimum area to define a cloud
miss_thresh: 0.4 # Missing data fraction threshold. If missing data exceeds this, the time frame will be omitted.
cloudtb_core: 225.0 # [K]
cloudtb_cold: 241.0 # [K]
cloudtb_warm: 261.0 # [K]
cloudtb_cloud: 261.0 # [K]
absolutetb_threshs: [160, 330] # K [min, max] absolute Tb range allowed.
warmanvilexpansion: 0 # Not working yet, set this to 0 for now
cloudidmethod: 'label_grow'
# Specific parameters to link cloud objects using PF
linkpf: 1 # Set to 1 to turn on linkpf option; default: 0
pf_smooth_window: 5 # Smoothing window for identifying PF
pf_dbz_thresh: 3 # [dBZ] for reflectivity, or [mm/h] for rainrate
pf_link_area_thresh: 648.0 # [km^2]

# Tracking parameters
othresh: 0.5 # overlap fraction threshold. Clouds that overlap more than this between times are tracked.
timegap: 3.1 # [hour] If missing data duration longer than this, tracking restarts
nmaxlinks: 50 # Maximum number of clouds that any single cloud can be linked to
maxnclouds: 3000 # Maximum number of clouds in one snapshot
duration_range: [2, 400] # A vector [minlength,maxlength] to specify the duration range for the tracks
# Flag to remove short-lived tracks [< min(duration_range)] that are not mergers/splits with other tracks
# 0:keep all tracks; 1:remove short tracks
remove_shorttracks: 1
# Set this flag to 1 to write a dense (2D) trackstats netCDF file
# Note that for datasets with lots of tracks, the memory consumption could be large
trackstats_dense_netcdf: 1
# Minimum time difference threshold to match track stats with cloudid files
match_pixel_dt_thresh: 60.0 # seconds

# MCS Tb parameters
mcs_tb_area_thresh: 40000 # [km^2] Tb area threshold
mcs_tb_duration_thresh: 4 # [hour] Tb minimum length of a mcs
mcs_tb_split_duration: 12 # [hour] Tb tracks smaller or equal to this length will be included with the MCS splits from
mcs_tb_merge_duration: 12 # [hour] Tb tracks smaller or equal to this length will be included with the MCS merges into
mcs_tb_gap: 1 # [unitless] Allowable temporal gap in Tb data for MCS area threshold
# MCS PF parameters
mcs_pf_majoraxis_thresh: 0 # [km] MCS PF major axis length lower limit
max_pf_majoraxis_thresh: 100000 # [km] MCS PF major axis length upper limit
mcs_pf_durationthresh: 4 # [hour] PF minimum length of mcs
mcs_pf_majoraxis_for_lifetime: 20 # [km] Minimum PF size to count PF lifetime
mcs_pf_gap: 1 # [unitless] Allowable temporal gap in PF data for MCS characteristics

# Specify rain rate parameters
pf_rr_thres: 2.0 # [mm/hr] Rain rate threshold
nmaxpf: 3 # Maximum number of precipitation features that can be within a cloud feature
nmaxcore: 20 # Maximum number of convective cores that can be within a cloud feature
pcp_thresh: 1.0 # Pixels with hourly precipitation larger than this will be labeled with track number
heavy_rainrate_thresh: 10.0 # [mm/hr] Heavy rain rate threshold
mcs_min_rainvol_thresh: 20000 # [km^2 mm/h] Min rain volumne threshold
mcs_volrain_duration_thresh: 1.0 # [hour] Min volume rain threshold

# Define tracked feature variable names
feature_varname: 'feature_number'
nfeature_varname: 'nfeatures'
featuresize_varname: 'npix_feature'

# Track statistics output file dimension names
tracks_dimname: 'tracks'
times_dimname: 'times'
pf_dimname: 'nmaxpf'
fillval: -9999
# MCS track stats file base names
mcstbstats_filebase: 'mcs_tracks_'
mcspfstats_filebase: 'mcs_tracks_pf_'
mcsrobust_filebase: 'mcs_tracks_robust_'
pixeltracking_filebase: 'mcstrack_'
mcsfinal_filebase: 'mcs_tracks_final_'

# Feature movement speed parameters
lag_for_speed: 1 # lag intervals between tracked features to calculate movement
track_number_for_speed: "pcptracknumber"
track_field_for_speed: 'precipitation'
min_size_thresh_for_speed: 20 # [km] Min PF major axis length to calculate movement
max_speed_thresh: 50 # [m/s]
153 changes: 153 additions & 0 deletions config/config_dyamond_summer_obs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
---
# DYAMOND MCS tracking configuration file
# Tracking uses collocated Tb + Precipitation

# Processing steps:
run_idfeature : True
run_tracksingle : True
run_gettracks : True
run_trackstats : True
run_identifymcs : True
run_matchpf : True
run_robustmcs : True
run_mapfeature : True
run_speed : True

# Parallel processing set up
# run_parallel: 1 (local cluster), 2 (Dask MPI)
run_parallel: 1
nprocesses : 128 # Number of processors to use if run_parallel=1
dask_tmp_dir: '/tmp' # Dask temporary directory if run_parallel=1
timeout: 360 # [seconds] Dask timeout limit

# Start/end date and time
startdate: '20160801.0000'
enddate: '20160910.0000'

# Specify tracking input data date/time string format
# This is the preprocessed file that contains Tb & rainrate
# E.g., databasename20181101.011503.nc --> yyyymodd.hhmmss
# E.g., databasename2018-11-01_01:15:00 --> yyyy-mo-dd_hh:mm:ss
time_format: 'yyyymoddhh'
databasename: 'merg_'

# Input files directory
clouddata_path: '/pscratch/sd/f/feng045/DYAMOND/Summer/OBS/olr_pcp/'
# Working directory for the tracking data
root_path: '/pscratch/sd/f/feng045/DYAMOND/Summer/OBS/'
# Working sub-directory names
tracking_path_name: 'tracking'
stats_path_name: 'stats'
pixel_path_name: 'mcstracking'

# Land mask file
landmask_filename: '/global/cfs/cdirs/m1867/gsharing/MCSMIP/DYAMOND/maps/IMERG_landmask_180W-180E_60S-60N.nc'
# landmask_filename: '/global/cfs/cdirs/m1867/gsharing/MCSMIP/DYAMOND/maps/IMERG_landmask_0-360_60S-60N.nc'
landmask_varname: 'landseamask'
landmask_x_dimname: 'lon'
landmask_y_dimname: 'lat'
landmask_x_coordname: 'lon'
landmask_y_coordname: 'lat'
landfrac_thresh: [0, 90] # Define the range of fraction for land (depends on what value is land the landmask file)

# Input dataset structure
pixel_radius: 10.0 # [km] Spatial resolution of the input data
datatimeresolution: 1.0 # [hour] Temporal resolution of the input data
# Variable names in the input data
tb_varname: 'Tb'
pcp_varname: 'precipitationCal'
clouddatasource: 'gpmirimerg'
time_dimname: 'time'
x_dimname: 'lon'
y_dimname: 'lat'
time_coordname: 'time'
x_coordname: 'lon'
y_coordname: 'lat'
idclouds_hourly: 1 # 0: No, 1: Yes
idclouds_minute: 00 # 0 or 30 min, which minute mark to use the Tb data
idclouds_dt_thresh: 5 # [minute], time difference allowed between actual data and idclouds_minute

# Specify types of feature being tracked
# This adds additional feature-specific statistics to be computed
feature_type: 'tb_pf'

# Cloud identification parameters
mincoldcorepix: 4 # Minimum number of pixels for the cold core
smoothwindowdimensions: 10 # Dimension of the Box2DKernel filter on Tb.
medfiltsize: 5 # Window size to perform medfilt2d to fill missing Tb pixels, must be an odd number
geolimits: [-60, -360, 60, 360] # 4-element array to subset domain boundaries [lat_min, lon_min, lat_max, lon_max]
area_thresh: 800 # [km^2] Minimum area to define a cloud
miss_thresh: 0.4 # Missing data fraction threshold. If missing data exceeds this, the time frame will be omitted.
cloudtb_core: 225.0 # [K]
cloudtb_cold: 241.0 # [K]
cloudtb_warm: 261.0 # [K]
cloudtb_cloud: 261.0 # [K]
absolutetb_threshs: [160, 330] # K [min, max] absolute Tb range allowed.
warmanvilexpansion: 0 # Not working yet, set this to 0 for now
cloudidmethod: 'label_grow'
# Specific parameters to link cloud objects using PF
linkpf: 1 # Set to 1 to turn on linkpf option; default: 0
pf_smooth_window: 5 # Smoothing window for identifying PF
pf_dbz_thresh: 3 # [dBZ] for reflectivity, or [mm/h] for rainrate
pf_link_area_thresh: 648.0 # [km^2]

# Tracking parameters
othresh: 0.5 # overlap fraction threshold. Clouds that overlap more than this between times are tracked.
timegap: 3.1 # [hour] If missing data duration longer than this, tracking restarts
nmaxlinks: 50 # Maximum number of clouds that any single cloud can be linked to
maxnclouds: 3000 # Maximum number of clouds in one snapshot
duration_range: [2, 400] # A vector [minlength,maxlength] to specify the duration range for the tracks
# Flag to remove short-lived tracks [< min(duration_range)] that are not mergers/splits with other tracks
# 0:keep all tracks; 1:remove short tracks
remove_shorttracks: 1
# Set this flag to 1 to write a dense (2D) trackstats netCDF file
# Note that for datasets with lots of tracks, the memory consumption could be large
trackstats_dense_netcdf: 1
# Minimum time difference threshold to match track stats with cloudid files
match_pixel_dt_thresh: 60.0 # seconds

# MCS Tb parameters
mcs_tb_area_thresh: 40000 # [km^2] Tb area threshold
mcs_tb_duration_thresh: 4 # [hour] Tb minimum length of a mcs
mcs_tb_split_duration: 12 # [hour] Tb tracks smaller or equal to this length will be included with the MCS splits from
mcs_tb_merge_duration: 12 # [hour] Tb tracks smaller or equal to this length will be included with the MCS merges into
mcs_tb_gap: 1 # [unitless] Allowable temporal gap in Tb data for MCS area threshold
# MCS PF parameters
mcs_pf_majoraxis_thresh: 0 # [km] MCS PF major axis length lower limit
max_pf_majoraxis_thresh: 100000 # [km] MCS PF major axis length upper limit
mcs_pf_durationthresh: 4 # [hour] PF minimum length of mcs
mcs_pf_majoraxis_for_lifetime: 20 # [km] Minimum PF size to count PF lifetime
mcs_pf_gap: 1 # [unitless] Allowable temporal gap in PF data for MCS characteristics

# Specify rain rate parameters
pf_rr_thres: 2.0 # [mm/hr] Rain rate threshold
nmaxpf: 3 # Maximum number of precipitation features that can be within a cloud feature
nmaxcore: 20 # Maximum number of convective cores that can be within a cloud feature
pcp_thresh: 1.0 # Pixels with hourly precipitation larger than this will be labeled with track number
heavy_rainrate_thresh: 10.0 # [mm/hr] Heavy rain rate threshold
mcs_min_rainvol_thresh: 20000 # [km^2 mm/h] Min rain volumne threshold
mcs_volrain_duration_thresh: 1.0 # [hour] Min volume rain threshold

# Define tracked feature variable names
feature_varname: 'feature_number'
nfeature_varname: 'nfeatures'
featuresize_varname: 'npix_feature'

# Track statistics output file dimension names
tracks_dimname: 'tracks'
times_dimname: 'times'
pf_dimname: 'nmaxpf'
fillval: -9999
# MCS track stats file base names
mcstbstats_filebase: 'mcs_tracks_'
mcspfstats_filebase: 'mcs_tracks_pf_'
mcsrobust_filebase: 'mcs_tracks_robust_'
pixeltracking_filebase: 'mcstrack_'
mcsfinal_filebase: 'mcs_tracks_final_'

# Feature movement speed parameters
lag_for_speed: 1 # lag intervals between tracked features to calculate movement
track_number_for_speed: "pcptracknumber"
track_field_for_speed: 'precipitation'
min_size_thresh_for_speed: 20 # [km] Min PF major axis length to calculate movement
max_speed_thresh: 50 # [m/s]
19 changes: 7 additions & 12 deletions pyflextrkr/robustmcspf_saag.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
import xarray as xr
import os, shutil
import os
import sys
import time
import warnings
Expand Down Expand Up @@ -29,18 +29,14 @@ def define_robust_mcs_pf(config):
mcs_pf_durationthresh = config["mcs_pf_durationthresh"]
mcs_pf_majoraxis_for_lifetime = config["mcs_pf_majoraxis_for_lifetime"]
mcs_pf_gap = config["mcs_pf_gap"]
coefs_pf_area = config["coefs_pf_area"]
coefs_pf_rr = config["coefs_pf_rr"]
coefs_pf_skew = config["coefs_pf_skew"]
coefs_pf_heavyratio = config["coefs_pf_heavyratio"]
max_pf_majoraxis_thresh = config["max_pf_majoraxis_thresh"]
tracks_dimname = config["tracks_dimname"]
times_dimname = config["times_dimname"]
pf_dimname = config["pf_dimname"]
pixel_radius = config["pixel_radius"]
mcs_min_rainvol_thresh = config["mcs_min_rainvol_thresh"]
heavy_rainrate_thresh = config["heavy_rainrate_thresh"]
mcs_volrain_durationthresh = config["mcs_volrain_durationthresh"]
mcs_volrain_duration_thresh = config["mcs_volrain_duration_thresh"]

np.set_printoptions(threshold=np.inf)
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -145,9 +141,9 @@ def define_robust_mcs_pf(config):
dur_volrain = float(ct_volrain) * time_res

# Duration of max rain rate >= pf_mcs_dur [hour] and
# Duration of volume rain >= mcs_volrain_durationthresh
# Duration of volume rain >= mcs_volrain_duration_thresh
if (dur_maxrr >= mcs_pf_durationthresh) & (
dur_volrain >= mcs_volrain_durationthresh
dur_volrain >= mcs_volrain_duration_thresh
):
# Label this period as an mcs
pf_mcsstatus[nt, igroup_indices] = 1
Expand Down Expand Up @@ -243,10 +239,9 @@ def define_robust_mcs_pf(config):
dsout.attrs["MCS_PF_majoraxis_thresh"] = mcs_pf_majoraxis_thresh
dsout.attrs["MCS_PF_duration_thresh"] = mcs_pf_durationthresh
dsout.attrs["PF_PF_min_majoraxis_thresh"] = mcs_pf_majoraxis_for_lifetime
dsout.attrs["coefs_pf_area"] = coefs_pf_area
dsout.attrs["coefs_pf_rr"] = coefs_pf_rr
dsout.attrs["coefs_pf_skew"] = coefs_pf_skew
dsout.attrs["coefs_pf_heavyratio"] = coefs_pf_heavyratio
dsout.attrs["heavy_rainrate_thresh"] = heavy_rainrate_thresh
dsout.attrs["mcs_min_rainvol_thresh"] = mcs_min_rainvol_thresh
dsout.attrs["mcs_volrain_duration_thresh"] = mcs_volrain_duration_thresh
dsout.attrs["Created_on"] = time.ctime(time.time())


Expand Down
4 changes: 2 additions & 2 deletions pyflextrkr/tracksingle_drift.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,11 @@ def trackclouds(

bt_new = np.array(
[pd.to_datetime(new_data["base_time"].data, unit="s")],
dtype="datetime64[s]",
dtype="datetime64[ns]",
)[0]
bt_ref = np.array(
[pd.to_datetime(reference_data["base_time"].data, unit="s")],
dtype="datetime64[s]",
dtype="datetime64[ns]",
)[0]

# Define output variables dictionary
Expand Down

0 comments on commit 261bf58

Please sign in to comment.