Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update nsidc g02202 Sea Ice Concentration to Version 5 #3869

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/sphinx/source/input.rst
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| NSIDC-0116-[nh|sh] [#note4]_ | usi, vsi (day) | 3 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| NSIDC-g02202-[sh] | siconc (SImon) | 3 | Python |
| NSIDC-g02202-[nh|sh] | siconc (SImon) | 3 | Python |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
| NSIDC-g02202-[nh|sh] | siconc (SImon) | 3 | Python |
| NSIDC-G02202-[nh|sh] | siconc (SImon) | 3 | Python |

update to capital 'G' used in data commands

+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| OceanSODA-ETHZ | areacello (Ofx), co3os, dissicos, fgco2, phos, spco2, talkos (Omon) | 2 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
Expand Down
23 changes: 23 additions & 0 deletions esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-nh.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
filename: sic_psn25_{year}.*.nc
# Common global attributes for Cmorizer output
attributes:
dataset_id: NSIDC-G02202-nh
version: '5'
tier: 3
modeling_realm: reanaly
project_id: OBS6
source: 'https://nsidc.org/data/g02202/versions/5'
reference: 'nsidc-g02202'
comment: ''

variables:
siconc:
mip: SImon
raw: cdr_seaice_conc_monthly
compress: true


custom:
create_areacello: true
area_file: psn25area_v3.dat
6 changes: 3 additions & 3 deletions esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
---
filename: seaice_conc_monthly_sh_{year}.*.nc
filename: sic_pss25_{year}.*.nc
# Common global attributes for Cmorizer output
attributes:
dataset_id: NSIDC-G02202-sh
version: '4'
version: '5'
tier: 3
modeling_realm: reanaly
project_id: OBS6
source: 'https://nsidc.org/data/g02202/versions/4'
source: 'https://nsidc.org/data/g02202/versions/5'
reference: 'nsidc-g02202'
comment: ''

Expand Down
12 changes: 10 additions & 2 deletions esmvaltool/cmorizers/data/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1131,11 +1131,19 @@ datasets:

NSIDC-G02202-sh:
tier: 3
source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v4shmday
last_access: 2023-05-13
source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v5shmday
last_access: 2025-01-24
info: |
Download monthly data.
Login required for download, and also requires citation only to use

NSIDC-G02202-nh:
tier: 3
source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v5shmday
last_access: 2025-01-24
info: |
Download monthly data.
Login required for download, and also requires citation only to use

OceanSODA-ETHZ:
tier: 2
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# pylint: disable=too-many-arguments
# pylint: disable=too-many-function-args
# pylint: disable=R0917
# pylint: disable=too-many-locals
"""Script to download NSIDC-G02202-nh."""
import logging
from datetime import datetime
from dateutil import relativedelta

from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader

logger = logging.getLogger(__name__)


def download_dataset(config, dataset, dataset_info, start_date, end_date,
overwrite):
"""Download dataset.

Parameters
----------
config : dict
ESMValTool's user configuration
dataset : str
Name of the dataset
dataset_info : dict
Dataset information from the datasets.yml file
start_date : datetime
Start of the interval to download
end_date : datetime
End of the interval to download
overwrite : bool
Overwrite already downloaded files
"""
if start_date is None:
start_date = datetime(1979, 1, 1)
if end_date is None:
end_date = datetime(2024, 6, 1)

loop_date = start_date

downloader = WGetDownloader(
config=config,
dataset=dataset,
dataset_info=dataset_info,
overwrite=overwrite,
)

# need area file
area_dat = ('ftp://sidads.colorado.edu/DATASETS/seaice'
'/polar-stereo/tools/psn25area_v3.dat')
downloader.download_folder(area_dat, [])

anc_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/'
'ancillary/G02202-ancillary-psn25-v05r00.nc')
downloader.download_folder(anc_path, [])

base_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/north/monthly'
'/sic_psn25_{year}{month:02d}_{other}_v05r00.nc')

datels = [datetime(1978, 11, 1), datetime(1987, 7, 30),
datetime(1991, 12, 30), datetime(1995, 9, 30),
datetime(2007, 12, 30), end_date]
suffls = ['n07', 'F08', 'F11', 'F13', 'F17']
isuf = 0
suffix = suffls[isuf]
# initialize suffix if dates start higher than initial
while loop_date >= datels[isuf]:
suffix = suffls[isuf]
isuf += 1

while loop_date <= end_date:

if loop_date > datels[isuf]:
suffix = suffls[isuf]
isuf += 1

downloader.download_folder(
base_path.format(year=loop_date.year, month=loop_date.month,
other=suffix), [])
loop_date += relativedelta.relativedelta(months=1)
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
if start_date is None:
start_date = datetime(1979, 1, 1)
if end_date is None:
end_date = datetime(2023, 1, 1)
end_date = datetime(2024, 6, 1)

loop_date = start_date

Expand All @@ -47,20 +47,17 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
'/polar-stereo/tools/pss25area_v3.dat')
downloader.download_folder(area_dat, [])

anc_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V4/'
'ancillary/G02202-cdr-ancillary-sh.nc')
anc_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/'
'ancillary/G02202-ancillary-pss25-v05r00.nc')
downloader.download_folder(anc_path, [])

base_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V4/south/monthly'
'/seaice_conc_monthly_sh_{year}{month:02d}_{other}_v04r00.nc')
base_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/south/monthly'
'/sic_pss25_{year}{month:02d}_{other}_v05r00.nc')

# regex for n07 changes to f08.. file names
# bins #{'197811':'n07','198708':'f08',
# '199201':'f11','199510':'f13', '200801':'f17'}
datels = [datetime(1978, 11, 1), datetime(1987, 7, 30),
datetime(1991, 12, 30), datetime(1995, 9, 30),
datetime(2007, 12, 30), end_date]
suffls = ['n07', 'f08', 'f11', 'f13', 'f17']
suffls = ['n07', 'F08', 'F11', 'F13', 'F17']
isuf = 0
suffix = suffls[isuf]
# initialize suffix if dates start higher than initial
Expand All @@ -78,4 +75,3 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
base_path.format(year=loop_date.year, month=loop_date.month,
other=suffix), [])
loop_date += relativedelta.relativedelta(months=1)
# check loop_date is => next bin
192 changes: 192 additions & 0 deletions esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_nh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# pylint: disable=unused-argument
# pylint: disable=too-many-arguments
# pylint: disable=too-many-function-args
# pylint: disable=R0917
# pylint: disable=E1121
# pylint: disable=too-many-locals
# flake8: noqa
"""ESMValTool CMORizer for Sea Ice Concentration CDR (Northern Hemisphere).

Tier
Tier 3: restricted dataset.

Source
https://nsidc.org/data/g02202/versions/5

Last access
20250124

Download and processing instructions
Download data from:
https://noaadata.apps.nsidc.org/NOAA/G02202_V5/north/monthly
lat and lon from:
https://noaadata.apps.nsidc.org/NOAA/G02202_V5/ancillary/
area file:
ftp://sidads.colorado.edu/DATASETS/seaice/polar-stereo/tools/
psn25area_v3.dat

https://nsidc.org/sites/default/files/documents/user-guide/g02202-v005-userguide.pdf

"""

import logging
import os
import re

import numpy as np

import iris
from cf_units import Unit
from iris.coords import AuxCoord

from esmvaltool.cmorizers.data import utilities as utils

logger = logging.getLogger(__name__)


def _get_filepaths(in_dir, basename, yyyy):
"""Find correct name of file (extend basename with timestamp)."""
f_name = basename.format(year=yyyy)
regex = re.compile(f_name)
return_files = []
for files in os.listdir(in_dir):
if regex.match(files):
return_files.append(os.path.join(in_dir, files))

return return_files


def _fix_time_coord(cube, _field, _filename):
"""Set time points to central day of month."""
time_coord = cube.coord("time")
new_unit = Unit("days since 1850-01-01 00:00:00", calendar="standard")
time_coord.convert_units(new_unit)
old_time = new_unit.num2date(time_coord.points)
new_time = [d.replace(day=15) for d in old_time]
time_coord.points = new_unit.date2num(new_time)


def _prom_dim_coord(cube, _field, _filename):
iris.util.promote_aux_coord_to_dim_coord(cube, "time")


def _create_coord(cubes, var_name, standard_name):
cube = cubes.extract_cube(standard_name)
coord = AuxCoord(
cube.data,
standard_name=standard_name,
long_name=cube.long_name,
var_name=var_name,
units="degrees", # cube.units,
)
return coord


def _extract_variable(raw_var, cmor_info, attrs, filepath, out_dir, latlon):
"""Extract variable from all files."""
var = cmor_info.short_name
cubes = iris.load(filepath, raw_var, _prom_dim_coord)
iris.util.equalise_attributes(cubes)

cube = cubes.concatenate_cube()
iris.util.promote_aux_coord_to_dim_coord(cube, "projection_y_coordinate")
iris.util.promote_aux_coord_to_dim_coord(cube, "projection_x_coordinate")
cube.coord("projection_y_coordinate").rename("y")
cube.coord("projection_x_coordinate").rename("x")

cube.add_aux_coord(latlon[0], (1, 2))
cube.add_aux_coord(latlon[1], (1, 2))
# add coord typesi
area_type = AuxCoord(
[1.0], standard_name="area_type",
var_name="type", long_name="Sea Ice area type"
)
cube.add_aux_coord(area_type)

# cube.convert_units(cmor_info.units)
cube.units = "%"
cube.data[cube.data > 100] = np.nan
cube = cube * 100

# utils.fix_coords(cube) #latlon multidimensional
utils.fix_var_metadata(cube, cmor_info)
utils.set_global_atts(cube, attrs)

utils.save_variable(cube, var, out_dir,
attrs, unlimited_dimensions=["time"])

return cube


def _create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir):
if not cfg["custom"].get("create_areacello", False):
return
var_info = cfg["cmor_table"].get_variable("Ofx", "areacello")
glob_attrs["mip"] = "Ofx"
lat_coord = sample_cube.coord("latitude")

area_file = os.path.join(in_dir, cfg["custom"]["area_file"])
with open(area_file, "rb") as datfile:
areasdmnd = np.fromfile(
datfile,
dtype=np.int32).reshape(lat_coord.shape)

# Divide by 1000 to get km2 then multiply by 1e6 to m2 ...*1000
ardata = areasdmnd * 1000

cube = iris.cube.Cube(
ardata,
standard_name=var_info.standard_name,
long_name=var_info.long_name,
var_name=var_info.short_name,
units="m2",
dim_coords_and_dims=[(sample_cube.coord("y"), 0),
(sample_cube.coord("x"), 1)],
)
cube.add_aux_coord(lat_coord, (0, 1))
cube.add_aux_coord(sample_cube.coord("longitude"), (0, 1))
utils.fix_var_metadata(cube, var_info)
utils.set_global_atts(cube, glob_attrs)
utils.save_variable(
cube, var_info.short_name, out_dir,
glob_attrs, zlib=True)


def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
"""Cmorization func call."""
glob_attrs = cfg["attributes"]
cmor_table = cfg["cmor_table"]

# get aux nc file
cubesaux = iris.load(
os.path.join(
in_dir, "G02202-ancillary-psn25-v05r00.nc")
)
coords = [_create_coord(cubesaux, "lat", "latitude"),
_create_coord(cubesaux, "lon", "longitude")]

sample_cube = None
for year in range(1979, 2025):
filepaths = _get_filepaths(in_dir, cfg["filename"], year)

if filepaths:
logger.info("Found %d files in '%s'", len(filepaths), in_dir)

for var, var_info in cfg["variables"].items():
logger.info("CMORizing variable '%s'", var)
glob_attrs["mip"] = var_info["mip"]
cmor_info = cmor_table.get_variable(var_info["mip"], var)
sample_cube = _extract_variable(
var_info.get("raw", var),
cmor_info,
glob_attrs,
filepaths,
out_dir,
coords,
)
else:
logger.info("No files found ")
logger.info("year: %d basename: %s", year, cfg["filename"])

if sample_cube is not None:
_create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir)
Loading