Skip to content

Commit 3ee287c

Browse files
committed
chore: Add test for checking physical limits and zeroes in NWP data openclimatefix#335 and openclimatefix#337
1 parent 0010180 commit 3ee287c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+505
-2
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ coverage.xml
5959
.pytest_cache/
6060
test.nc
6161

62+
#test data generator
63+
tests/load/nwp/test_data_generator.py
64+
6265
# Translations
6366
*.mo
6467
*.pot

ocf_datapipes/load/nwp/nwp.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,85 @@ def __init__(
2626
self,
2727
zarr_path: Union[Path, str, list[Path], list[str]],
2828
provider: str = "ukv",
29+
check_for_zeros: bool = False,
30+
check_physical_limits: bool = False,
2931
):
3032
"""
3133
Opens NWP Zarr and yields it
3234
3335
Args:
3436
zarr_path: Path to the Zarr file
3537
provider: NWP provider
38+
check_for_zeros: Check for zeros in the NWP data
39+
check_physical_limits: Check the physical limits of nwp data (e.g. -100<temperature<100)
3640
"""
3741
self.zarr_path = zarr_path
42+
self.check_for_zeros = check_for_zeros
43+
self.check_physical_limits = check_physical_limits
44+
self.limits = {
45+
"temperature": (-100, 60), # Celsius
46+
"specific_humidity": (0, 0.03), # kg/kg
47+
"relative_humidity": (0, 100), # Percentage
48+
"pressure": (0, 1100), # hPa (sea level pressure)
49+
"u_wind": (-200, 200), # m/s
50+
"v_wind": (-200, 200), # m/s
51+
"geopotential": (0, 100000), # m^2/s^2
52+
"total_precipitation": (0, 2000), # mm/day
53+
"convective_precipitation": (0, 1000), # mm/day
54+
"snowfall": (0, 1000), # mm water equivalent/day
55+
"graupel": (0, 500), # mm water equivalent/day
56+
"cloud_cover": (0, 100), # Percentage
57+
"surface_temperature": (-90, 60), # Celsius
58+
"sea_surface_temperature": (-2, 35), # Celsius
59+
"soil_temperature": (-50, 60), # Celsius
60+
"soil_moisture": (0, 1), # m^3/m^3
61+
"visibility": (0, 100000), # meters
62+
"wind_gust": (0, 250), # m/s
63+
"solar_radiation": (0, 1500), # W/m^2
64+
"longwave_radiation": (0, 750), # W/m^2
65+
"evaporation": (0, 50), # mm/day
66+
"potential_evaporation": (0, 100), # mm/day
67+
"boundary_layer_height": (0, 5000), # meters
68+
"cape": (0, 10000), # J/kg
69+
"cin": (0, 1000), # J/kg
70+
"lifted_index": (-15, 15), # Kelvin
71+
"total_column_water": (0, 100), # kg/m^2
72+
"ozone_concentration": (0, 1000), # Dobson units
73+
"dew_point_temperature": (-100, 35), # Celsius
74+
"wet_bulb_temperature": (-100, 35), # Celsius
75+
"potential_temperature": (0, 1000), # Kelvin
76+
"equivalent_potential_temperature": (0, 1000), # Kelvin
77+
"vorticity": (-1e-3, 1e-3), # 1/s
78+
"divergence": (-1e-3, 1e-3), # 1/s
79+
"vertical_velocity": (-50, 50), # m/s
80+
"cloud_base_height": (0, 20000), # meters
81+
"cloud_top_height": (0, 20000), # meters
82+
"cloud_water_content": (0, 5), # g/kg
83+
"ice_water_content": (0, 5), # g/kg
84+
"surface_roughness": (0, 10), # meters
85+
"albedo": (0, 1), # dimensionless
86+
"friction_velocity": (0, 5), # m/s
87+
"sensible_heat_flux": (-500, 500), # W/m^2
88+
"latent_heat_flux": (-500, 500), # W/m^2
89+
"momentum_flux": (-10, 10), # N/m^2
90+
"surface_pressure": (300, 1100), # hPa
91+
"mean_sea_level_pressure": (870, 1090), # hPa
92+
"tropopause_pressure": (50, 500), # hPa
93+
"tropopause_temperature": (-100, 0), # Celsius
94+
"precipitable_water": (0, 100), # mm
95+
"total_cloud_cover": (0, 100), # Percentage
96+
"low_cloud_cover": (0, 100), # Percentage
97+
"medium_cloud_cover": (0, 100), # Percentage
98+
"high_cloud_cover": (0, 100), # Percentage
99+
"convective_available_potential_energy": (0, 10000), # J/kg
100+
"convective_inhibition": (0, 1000), # J/kg
101+
"storm_relative_helicity": (-1000, 1000), # m^2/s^2
102+
"bulk_richardson_number": (-10, 10), # dimensionless
103+
"lifted_condensation_level": (0, 5000), # meters
104+
"level_of_free_convection": (0, 20000), # meters
105+
"equilibrium_level": (0, 20000), # meters
106+
"UKV": (250, 330), # UKV specific
107+
}
38108
logger.info(f"Using {provider.lower()}")
39109
if provider.lower() == "ukv":
40110
self.open_nwp = open_ukv
@@ -53,9 +123,37 @@ def __init__(
53123
else:
54124
raise ValueError(f"Unknown provider: {provider}")
55125

56-
def __iter__(self) -> Union[xr.DataArray, xr.Dataset]:
126+
def __iter__(self) -> Union[xr.DataArray, xr.Dataset]: # type: ignore
57127
"""Opens the NWP data"""
58128
logger.debug("Opening NWP data: %s", self.zarr_path)
59129
nwp = self.open_nwp(self.zarr_path)
130+
if self.check_for_zeros:
131+
self.check_if_zeros(nwp)
132+
if self.check_physical_limits:
133+
self.check_if_physical_limits(nwp)
60134
while True:
61135
yield nwp
136+
137+
def check_if_zeros(self, nwp: Union[xr.DataArray, xr.Dataset]):
138+
"""Checks if the NWP data contains zeros"""
139+
if isinstance(nwp, xr.DataArray):
140+
if (nwp.values == 0).any():
141+
raise ValueError("NWP DataArray contains zeros")
142+
if isinstance(nwp, xr.Dataset):
143+
for var in nwp:
144+
if (nwp[var].values == 0).any():
145+
raise ValueError(f"NWP Dataset variable{var} contains zeros")
146+
147+
def check_if_physical_limits(self, nwp: Union[xr.DataArray, xr.Dataset]):
148+
"""Checks if the NWP data is within physical limits"""
149+
if isinstance(nwp, xr.DataArray):
150+
var_name = nwp.name
151+
if var_name in self.limits:
152+
lower, upper = self.limits[var_name]
153+
if (nwp < lower).any() or (nwp > upper).any():
154+
raise ValueError(f"NWP data {var_name} is outside physical limits")
155+
elif isinstance(nwp, xr.Dataset):
156+
for var_name, (lower, upper) in self.limits.items():
157+
if var_name in nwp.variables:
158+
if not((nwp[var_name]>=lower).all() and (nwp[var_name]<=upper).all()):
159+
raise ValueError(f"NWP data {var_name} is outside physical limits")
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"zarr_format": 2
3+
}
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
{
2+
"metadata": {
3+
".zattrs": {},
4+
".zgroup": {
5+
"zarr_format": 2
6+
},
7+
"UKV/.zarray": {
8+
"chunks": [
9+
1,
10+
1,
11+
10,
12+
352,
13+
274
14+
],
15+
"compressor": {
16+
"blocksize": 0,
17+
"clevel": 5,
18+
"cname": "lz4",
19+
"id": "blosc",
20+
"shuffle": 1
21+
},
22+
"dtype": "<f2",
23+
"fill_value": "NaN",
24+
"filters": null,
25+
"order": "C",
26+
"shape": [
27+
1,
28+
9,
29+
10,
30+
704,
31+
548
32+
],
33+
"zarr_format": 2
34+
},
35+
"UKV/.zattrs": {
36+
"Conventions": "CF-1.7",
37+
"GRIB_centre": "egrr",
38+
"GRIB_centreDescription": "U.K. Met Office - Exeter",
39+
"GRIB_edition": 2,
40+
"GRIB_subCentre": 0,
41+
"_ARRAY_DIMENSIONS": [
42+
"variable",
43+
"init_time",
44+
"step",
45+
"y",
46+
"x"
47+
],
48+
"institution": "U.K. Met Office - Exeter"
49+
},
50+
"init_time/.zarray": {
51+
"chunks": [
52+
9
53+
],
54+
"compressor": {
55+
"blocksize": 0,
56+
"clevel": 5,
57+
"cname": "lz4",
58+
"id": "blosc",
59+
"shuffle": 1
60+
},
61+
"dtype": "<i8",
62+
"fill_value": null,
63+
"filters": null,
64+
"order": "C",
65+
"shape": [
66+
9
67+
],
68+
"zarr_format": 2
69+
},
70+
"init_time/.zattrs": {
71+
"_ARRAY_DIMENSIONS": [
72+
"init_time"
73+
],
74+
"calendar": "proleptic_gregorian",
75+
"long_name": "initial time of forecast",
76+
"standard_name": "forecast_reference_time",
77+
"units": "hours since 2020-04-01 00:00:00"
78+
},
79+
"step/.zarray": {
80+
"chunks": [
81+
10
82+
],
83+
"compressor": {
84+
"blocksize": 0,
85+
"clevel": 5,
86+
"cname": "lz4",
87+
"id": "blosc",
88+
"shuffle": 1
89+
},
90+
"dtype": "<i8",
91+
"fill_value": null,
92+
"filters": null,
93+
"order": "C",
94+
"shape": [
95+
10
96+
],
97+
"zarr_format": 2
98+
},
99+
"step/.zattrs": {
100+
"_ARRAY_DIMENSIONS": [
101+
"step"
102+
],
103+
"long_name": "time since forecast_reference_time",
104+
"standard_name": "forecast_period",
105+
"units": "hours"
106+
},
107+
"variable/.zarray": {
108+
"chunks": [
109+
1
110+
],
111+
"compressor": {
112+
"blocksize": 0,
113+
"clevel": 5,
114+
"cname": "lz4",
115+
"id": "blosc",
116+
"shuffle": 1
117+
},
118+
"dtype": "|O",
119+
"fill_value": null,
120+
"filters": [
121+
{
122+
"id": "vlen-utf8"
123+
}
124+
],
125+
"order": "C",
126+
"shape": [
127+
1
128+
],
129+
"zarr_format": 2
130+
},
131+
"variable/.zattrs": {
132+
"_ARRAY_DIMENSIONS": [
133+
"variable"
134+
]
135+
},
136+
"x/.zarray": {
137+
"chunks": [
138+
548
139+
],
140+
"compressor": {
141+
"blocksize": 0,
142+
"clevel": 5,
143+
"cname": "lz4",
144+
"id": "blosc",
145+
"shuffle": 1
146+
},
147+
"dtype": "<i4",
148+
"fill_value": null,
149+
"filters": null,
150+
"order": "C",
151+
"shape": [
152+
548
153+
],
154+
"zarr_format": 2
155+
},
156+
"x/.zattrs": {
157+
"_ARRAY_DIMENSIONS": [
158+
"x"
159+
]
160+
},
161+
"y/.zarray": {
162+
"chunks": [
163+
704
164+
],
165+
"compressor": {
166+
"blocksize": 0,
167+
"clevel": 5,
168+
"cname": "lz4",
169+
"id": "blosc",
170+
"shuffle": 1
171+
},
172+
"dtype": "<i4",
173+
"fill_value": null,
174+
"filters": null,
175+
"order": "C",
176+
"shape": [
177+
704
178+
],
179+
"zarr_format": 2
180+
},
181+
"y/.zattrs": {
182+
"_ARRAY_DIMENSIONS": [
183+
"y"
184+
]
185+
}
186+
},
187+
"zarr_consolidated_format": 1
188+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"chunks": [
3+
1,
4+
1,
5+
10,
6+
352,
7+
274
8+
],
9+
"compressor": {
10+
"blocksize": 0,
11+
"clevel": 5,
12+
"cname": "lz4",
13+
"id": "blosc",
14+
"shuffle": 1
15+
},
16+
"dtype": "<f2",
17+
"fill_value": "NaN",
18+
"filters": null,
19+
"order": "C",
20+
"shape": [
21+
1,
22+
9,
23+
10,
24+
704,
25+
548
26+
],
27+
"zarr_format": 2
28+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"Conventions": "CF-1.7",
3+
"GRIB_centre": "egrr",
4+
"GRIB_centreDescription": "U.K. Met Office - Exeter",
5+
"GRIB_edition": 2,
6+
"GRIB_subCentre": 0,
7+
"_ARRAY_DIMENSIONS": [
8+
"variable",
9+
"init_time",
10+
"step",
11+
"y",
12+
"x"
13+
],
14+
"institution": "U.K. Met Office - Exeter"
15+
}
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)