Skip to content

Commit

Permalink
adding important mom6 processing from cefi_interactive
Browse files Browse the repository at this point in the history
  • Loading branch information
chiaweh2 committed Apr 1, 2024
1 parent 04c3f65 commit 3b0d9a0
Show file tree
Hide file tree
Showing 5 changed files with 7,755 additions and 0 deletions.
59 changes: 59 additions & 0 deletions mom6_climatology.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""
This script is designed to do batch calculating the
climatology (monthly and daily smoothed)
"""
# %%
import warnings
import xarray as xr
from mom6_regrid import mom6_encoding_attr, mom6_hist_run
from dask.distributed import Client
from time_series_processes import cal_daily_climo


warnings.simplefilter("ignore")

# %%
if __name__=="__main__":

client = Client(processes=False,memory_limit='150GB',silence_logs=50)
print(client)
print(client.cluster.dashboard_link)

# %%
# file location and name
MOM6_DIR = "/Datasets.private/regional_mom6/hist_run/regrid/"
file_list = mom6_hist_run(MOM6_DIR)

# %%
# open each file and calculate climatology
for file in file_list :
ds = xr.open_dataset(file)
ds = ds.isel(time=slice(1,None)) # since the first is 1980
varname = file.split("/")[-1].split(".")[-2]

# %%
if "daily" in file.split("/")[-1].split(".")[0]:
ds_climo = xr.Dataset()
da_climo = cal_daily_climo(
ds.btm_o2,
smooth = True,
dim = 'time',
nharm = 4,
apply_taper = False
)
ds_climo[varname] = da_climo
# %%
else:
ds_climo = ds.groupby('time.month').mean(dim='time')

# %%
# output the netcdf file
print(f'output {MOM6_DIR}/climo/{file[len(MOM6_DIR):][:-3]}.climo.nc')
mom6_encoding_attr(
ds,
ds_climo,
var_names=varname,
dataset_name='regional mom6 regridded climatology'
)
ds_climo.to_netcdf(f'{MOM6_DIR}/climo/{file[len(MOM6_DIR):][:-3]}.climo.nc')
312 changes: 312 additions & 0 deletions mom6_regrid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
"""
This script is designed to do batch regridding of the
regional mom6 output
The regridding is using the xesmf package with the bilinear method
there are other more conservative way to doing the regriddeding
https://xesmf.readthedocs.io/en/stable/notebooks/Compare_algorithms.html
Current regridded product is based on the following keys
1. The number of grid is close or similar to the original
curvilinear grid.
2. The regridded product should cover the maximum/minimum lon lat
range that is provided by the original curvilinear grid
Based on the above mentioned keys
1. the number of grid in lon,lat is the same as xh,yh
2. the linspace range of lon lat is defined by
max(geolon/geolat) and min(geolon/geolat)
"""
# %%
import sys
import warnings
import numpy as np
import xarray as xr
import xesmf as xe
from dask.distributed import Client
warnings.simplefilter("ignore")

def mom6_encoding_attr(
ds_data_ori,
ds_data,
var_names=None,
dataset_name='regional mom6 regrid'
):
"""
This function is designed for creating attribute and netCDF encoding
for the regional mom6 regrid file format.
Parameters
----------
ds_data_ori : xr.Dataset
original dataset
ds_data : xr.Dataset
new output regridded dataset
var_name : string
var name in the dataset
dataset : string
name of the dataset
Returns
-------
ds_data : xr.Dataset
new output regridded dataset with attr and encoding setup.
Raises
------
"""

# besides lon lat which has PSL format
# put all dim name that may encounter in
# different mom6 output. These dims will
# follow its original data attr and encoding
misc_dims_list = ['time','lead','init','member','z_l']

if var_names is None:
var_names = []

# global attrs and encoding
ds_data.attrs = ds_data_ori.attrs
ds_data.encoding = ds_data_ori.encoding
ds_data.attrs['history'] = "Derived and written at NOAA Physical Science Laboratory"
ds_data.attrs['contact'] = "[email protected]"
ds_data.attrs['dataset'] = dataset_name


# lon and lat attrs and encoding (PSL format)
# longitude attrs
ds_data['lon'].attrs = {
'standard_name' : 'longitude',
'long_name' : 'longitude',
'units' : 'degrees_east',
'axis' : 'X',
'actual_range' : (
np.float64(ds_data['lon'].min()),
np.float64(ds_data['lon'].max())
)
}
ds_data['lon'].encoding = {
'zlib': True,
'szip': False,
'zstd': False,
'bzip2': False,
'blosc': False,
'shuffle': True,
'complevel': 2,
'fletcher32': False,
'contiguous': False,
'chunksizes': [len(ds_data['lon'].data)],
'original_shape': [len(ds_data['lon'].data)],
'dtype': 'float64'}

# latitude attrs
ds_data['lat'].attrs = {
'standard_name' : 'latitude',
'long_name' : 'latitude',
'units' : 'degrees_north',
'axis' : 'Y',
'actual_range' : (
np.float64(ds_data['lat'].min()),
np.float64(ds_data['lat'].max())
)
}
ds_data['lat'].encoding = {
'zlib': True,
'szip': False,
'zstd': False,
'bzip2': False,
'blosc': False,
'shuffle': True,
'complevel': 2,
'fletcher32': False,
'contiguous': False,
'chunksizes': [len(ds_data['lon'].data)],
'original_shape': [len(ds_data['lon'].data)],
'dtype': 'float64'}

# copy original attrs and encoding for dims
for dim in misc_dims_list:
try:
ds_data[dim].attrs = ds_data_ori[dim].attrs
ds_data[dim].encoding = ds_data_ori[dim].encoding
ds_data[dim].encoding['complevel'] = 2
except KeyError:
print(f'no {dim} dimension')

# copy original attrs and encoding for variables
for var_name in var_names:
ds_data[var_name].attrs = ds_data_ori[var_name].attrs
ds_data[var_name].encoding = ds_data_ori[var_name].encoding
ds_data[var_name].encoding['complevel'] = 2

return ds_data

def mom6_hist_run(parent_dir):
"""
Create list of files to be able to be opened
by Xarray.
"""
# h point list
hpoint_file_list = [
"ocean_monthly.199301-201912.MLD_003.nc",
"ocean_monthly.199301-201912.sos.nc",
"ocean_monthly.199301-201912.ssh.nc",
"ocean_monthly.199301-201912.tob.nc",
"ocean_monthly.199301-201912.tos.nc",
"ocean_monthly_z.199301-201912.so.nc",
"ocean_monthly_z.199301-201912.thetao.nc",
"ocean_cobalt_daily_2d.19930101-20191231.btm_o2.nc",
"ocean_cobalt_omip_sfc.199301-201912.chlos.nc",
"ocean_cobalt_omip_sfc.199301-201912.dissicos.nc",
"ocean_cobalt_omip_sfc.199301-201912.talkos.nc",
"ocean_cobalt_sfc.199301-201912.sfc_co3_ion.nc",
"ocean_cobalt_sfc.199301-201912.sfc_co3_sol_arag.nc",
"ocean_cobalt_sfc.199301-201912.sfc_no3.nc",
"ocean_cobalt_sfc.199301-201912.sfc_po4.nc",
"ocean_cobalt_tracers_int.199301-201912.mesozoo_200.nc"
]
hpoint_file_list = [f"{parent_dir}{file}" for file in hpoint_file_list]

# T point that is the same as h point
tpoint_file_list = [
"ice_monthly.199301-201912.siconc.nc"
]
tpoint_file_list = [f"{parent_dir}{file}" for file in tpoint_file_list]

all_file_list = hpoint_file_list+tpoint_file_list

return all_file_list

def mom6_hindcast(parent_dir):
"""
Create list of files to be able to be opened
by Xarray.
"""
tob_files = [f"tob_forecasts_i{mon}.nc" for mon in range(3,13,3)]
tos_files = [f"tos_forecasts_i{mon}.nc" for mon in range(3,13,3)]

# h point list
hpoint_file_list = (
tob_files+
tos_files
)

hpoint_file_list = [f"{parent_dir}{file}" for file in hpoint_file_list]

all_file_list = hpoint_file_list

return all_file_list

# %%
if __name__=="__main__":

client = Client(processes=False,memory_limit='150GB',silence_logs=50)
print(client)
print(client.cluster.dashboard_link)

# check argument exist
if len(sys.argv) < 2:
print("Usage: python mom6_regrid.py hist/fcst")
sys.exit(1)
# %%
if sys.argv[1] == 'hist':
# file location and name
mom6_dir = "/Datasets.private/regional_mom6/hist_run/"
file_list = mom6_hist_run(mom6_dir)
elif sys.argv[1] == 'fcst':
mom6_dir = "/Datasets.private/regional_mom6/hindcast/"
file_list = mom6_hindcast(mom6_dir)

# %%
# static field
ds_static = xr.open_dataset('/Datasets.private/regional_mom6/ocean_static.nc')
ds_static = ds_static.set_coords(
['geolon','geolat',
'geolon_c','geolat_c',
'geolon_u','geolat_u',
'geolon_v','geolat_v']
)
ds_static['geolon'] = ds_static['geolon']+360.
# for xesmf to rename
ds_static = ds_static.rename({'geolon':'lon','geolat':'lat'})

# %%
# Create longitude and latitude arrays (e.g., 1D arrays)
x = np.linspace(
ds_static.lon.min(),
ds_static.lon.max(),
len(ds_static.xh)-1
) # Example longitude values
y = np.linspace(
ds_static.lat.min(),
ds_static.lat.max(),
len(ds_static.yh)-1
) # Example latitude values

# Create a dummy data variable (optional, can be empty)
ds = xr.open_dataset(file_list[0],chunks={})
if sys.argv[1] == 'hist':
data = xr.DataArray(
data=None,
coords={'lon': x, 'lat': y},
dims=('lon', 'lat')
)
elif sys.argv[1] == 'fcst':
data = xr.DataArray(
data=None,
coords={'lon': x, 'lat': y, "member": ds.member, "init": ds.init, "lead": ds.lead},
dims=('lon', 'lat', 'member', 'init', 'lead')
)

# Create an xarray dataset with empty dataarray
ds_regrid = xr.Dataset({'var': data})

# %%
# use xesmf to create regridder
# !!!! regridded only suited for geolon and geolat to x and y
regridder = xe.Regridder(ds_static, ds_regrid, "bilinear", unmapped_to_nan=True)

# %%
# open each file and regrid
for file in file_list :
ds = xr.open_dataset(file)

if "siconc" in file:
# cover dim name to the corrected name for Tpoint(error in the model)
ds = ds.rename({'xT':'xh','yT':'yh'})

# merge with static field
ds = xr.merge([ds,ds_static])

if sys.argv[1] == 'hist':
varname = file.split(".")[-2]
ds_regrid = xr.Dataset()
# perform regrid for each field
ds_regrid[varname] = regridder(ds[varname])

elif sys.argv[1] == 'fcst':
varname = file.split("/")[-1].split("_")[0]
ds_regrid = xr.Dataset()
# perform regrid for each field
ds_regrid[varname] = regridder(ds[varname])
ds_regrid[f"{varname}_anom"] = regridder(ds[f"{varname}_anom"])

# output the netcdf file
print(f'output {mom6_dir}/regrid/{file[len(mom6_dir):]}')
mom6_encoding_attr(
ds,
ds_regrid,
var_names=list(ds_regrid.keys()),
dataset_name='regional mom6 regrid'
)
try:
ds_regrid.to_netcdf(f'{mom6_dir}/regrid/{file[len(mom6_dir):]}',mode='w')
except PermissionError:
print(f'{mom6_dir}/regrid/{file[len(mom6_dir):]} is used by other scripts' )

Loading

0 comments on commit 3b0d9a0

Please sign in to comment.