Skip to content

Commit fc1a6ae

Browse files
authored
Merge pull request #1159 from benjwadams/domain_variables_update
Domain variables update
2 parents 8b399a1 + 38d6540 commit fc1a6ae

File tree

6 files changed

+236
-175
lines changed

6 files changed

+236
-175
lines changed

compliance_checker/cf/cf_1_9.py

Lines changed: 157 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from compliance_checker.cf import util
99
from compliance_checker.cf.cf_1_8 import CF1_8Check
1010
from compliance_checker.cf.util import VariableReferenceError, reference_attr_variables
11+
from compliance_checker.cfutil import get_coordinate_variables
1112

1213

1314
class CF1_9Check(CF1_8Check):
@@ -119,176 +120,187 @@ def check_time_coordinate(self, ds):
119120
return prev_return
120121

121122
def check_domain_variables(self, ds: Dataset):
122-
# Domain variables should have coordinates attribute, but should have
123-
# scalar dimensions
123+
124124
results = []
125-
for domain_var in (
126-
var
127-
for var in ds.get_variables_by_attributes(
128-
coordinates=lambda c: c is not None,
125+
domain_valid = TestCtx(BaseCheck.MEDIUM, self.section_titles["5.8"])
126+
is_ragged_array_repr = cfutil.is_dataset_valid_ragged_array_repr_featureType(
127+
ds,
128+
getattr(ds, "featureType", ""),
129+
)
130+
if is_ragged_array_repr:
131+
domain_valid.out_of += 1
132+
ragged_array_dim_variable, ragged_attr_name = (
133+
cfutil.resolve_ragged_array_dimension(ds)
129134
)
130-
):
131-
132-
# IMPLICIT CONFORMANCE REQUIRED 1/4
133-
# Has a dimensions *NetCDF* attribute
134-
try:
135-
dim_nc_attr = domain_var.getncattr("dimensions")
136-
# most variables are unlikely to be domain variables, so don't treat this
137-
# as a failure
138-
except AttributeError:
139-
continue
140-
# IMPLICIT CONFORMANCE REQUIRED 2/4
141-
# Aforementioned dimensions attribute is comprised of space separated
142-
# dimension names which must exist in the file
143-
domain_valid = TestCtx(BaseCheck.MEDIUM, self.section_titles["5.8"])
144-
domain_valid.out_of += 2
145-
domain_dims, dim_errors = reference_attr_variables(ds, dim_nc_attr, " ")
146-
if dim_errors:
147-
errors_str = ", ".join(dim_errors)
148-
domain_valid.messages.append(
149-
"Could not find the following "
150-
"dimensions referenced in "
151-
"dimensions attribute from "
152-
"domain variable "
153-
f"{domain_var.name}: {errors_str}",
154-
)
155-
else:
156-
domain_valid.score += 1
157-
domain_coord_vars, domain_coord_var_errors = reference_attr_variables(
135+
dim_name = getattr(ragged_array_dim_variable, ragged_attr_name)
136+
referenced_dim = reference_attr_variables(
158137
ds,
159-
domain_var.coordinates,
160-
" ",
138+
dim_name,
139+
reference_type="dimensions",
161140
)
162-
if domain_coord_var_errors:
163-
errors_str = ", ".join(err.name for err in domain_coord_var_errors)
141+
if isinstance(referenced_dim, VariableReferenceError):
164142
domain_valid.messages.append(
165-
"Could not find the following "
166-
"variables referenced in "
167-
"coordinates attribute from "
168-
"domain variable "
169-
f"{domain_var.name}: {errors_str}",
143+
f"Found ragged array variable {ragged_array_dim_variable.name}, "
144+
f"but dimension {dim_name} referenced from {ragged_attr_name} does not exist in file",
170145
)
171-
else:
172-
domain_valid.score += 1
173146

174-
is_ragged_array_repr = (
175-
cfutil.is_dataset_valid_ragged_array_repr_featureType(
176-
ds,
177-
getattr(ds, "featureType", ""),
178-
)
179-
)
180-
if is_ragged_array_repr:
181-
domain_valid.out_of += 1
182-
ragged_array_dim_variable, ragged_attr_name = (
183-
cfutil.resolve_ragged_array_dimension(ds)
184-
)
185-
dim_name = getattr(ragged_array_dim_variable, ragged_attr_name)
186-
referenced_dim = reference_attr_variables(
187-
ds,
188-
dim_name,
189-
reference_type="dimension",
190-
)
191-
if isinstance(referenced_dim, VariableReferenceError):
147+
coord_var_reference_failures = []
148+
for coord_var in reference_attr_variables(ds, dim_name, " ", "dimensions"):
149+
if isinstance(coord_var, VariableReferenceError):
150+
coord_var_reference_failures.append(coord_var)
151+
domain_valid.messages.append(
152+
f"Referenced coordinate variable {coord_var} does not exist in file",
153+
)
154+
continue
155+
# TODO: check for label variables
156+
if not set(
157+
util.get_possible_label_variable_dimensions(coord_var),
158+
).issubset({referenced_dim}):
192159
domain_valid.messages.append(
193160
f"Found ragged array variable {ragged_array_dim_variable.name}, "
194161
f"but dimension {dim_name} referenced from {ragged_attr_name} does not exist in file",
195162
)
163+
else:
164+
domain_valid.score += 1
165+
else:
166+
# IMPLICIT CONFORMANCE REQUIRED 1/4
167+
# Domain variables must have a dimensions attribute
168+
for domain_var in (
169+
ds.variables[var]
170+
for var in ds.variables
171+
if "dimensions" in ds.variables[var].ncattrs()
172+
):
173+
# store NetCDF variable attribute named "dimensions"
174+
dim_nc_attr = domain_var.getncattr("dimensions")
175+
# Every variable should have dimensionality (not to be confused with
176+
# dimensions NetCDF attribute here) even if scalar/empty, but non-empty
177+
# dimensions aren't allowed for domain variables
196178

197-
coord_var_reference_failures = []
198-
for coord_var in reference_attr_variables(ds, dim_name, " "):
199-
if isinstance(coord_var, VariableReferenceError):
200-
coord_var_reference_failures.append(coord_var)
201-
domain_valid.messages.append(
202-
f"Referenced coordinate variable {coord_var} does not exist in file",
203-
)
204-
continue
205-
# TODO: check for label variables
206-
if not set(
207-
util.get_possible_label_variable_dimensions(coord_var),
208-
).issubset({referenced_dim}):
209-
domain_valid.messages.append(
210-
f"Found ragged array variable {ragged_array_dim_variable.name}, "
211-
f"but dimension {dim_name} referenced from {ragged_attr_name} does not exist in file",
212-
)
213-
else:
214-
domain_valid.score += 1
215-
else:
216-
for coord_var in domain_coord_vars:
217-
domain_valid.out_of += 1
218-
domain_dims_names = {var.name for var in domain_dims}
219-
variable_dim = util.get_possible_label_variable_dimensions(
220-
coord_var,
179+
domain_valid.out_of += 3
180+
if domain_var.dimensions:
181+
domain_valid.messages.append(
182+
"Domain variable "
183+
f"{domain_var.name} should not have non-scalar/"
184+
"non-empty variable dimensions",
221185
)
222-
if not (
223-
set(
224-
util.get_possible_label_variable_dimensions(coord_var),
225-
).issubset(domain_dims_names)
226-
):
186+
else:
187+
domain_valid.score += 1
188+
189+
# IMPLICIT CONFORMANCE REQUIRED 2/4
190+
# Aforementioned dimensions attribute is comprised of space separated
191+
# dimension names which must exist in the file
192+
193+
domain_dims, dim_errors = reference_attr_variables(
194+
ds,
195+
dim_nc_attr,
196+
" ",
197+
"dimensions",
198+
)
199+
if dim_errors:
200+
errors_str = ", ".join(dim_errors)
201+
domain_valid.messages.append(
202+
"Could not find the following "
203+
"dimensions referenced in "
204+
"dimensions attribute from "
205+
"domain variable "
206+
f"{domain_var.name}: {errors_str}",
207+
)
208+
else:
209+
domain_valid.score += 1
210+
if "coordinates" in domain_var.ncattrs():
211+
domain_coord_vars, domain_coord_var_errors = (
212+
reference_attr_variables(ds, domain_var.coordinates, " ")
213+
)
214+
if domain_coord_var_errors:
215+
errors_str = ", ".join(
216+
err.name for err in domain_coord_var_errors
217+
)
227218
domain_valid.messages.append(
228219
"Could not find the following "
229220
"variables referenced in "
230221
"coordinates attribute from "
231222
"domain variable "
232-
f"{variable_dim}: {domain_dims_names}",
223+
f"{domain_var.name}: {errors_str}",
233224
)
234225
else:
235226
domain_valid.score += 1
227+
for coord_var in domain_coord_vars:
228+
if coord_var not in get_coordinate_variables(ds):
229+
continue
230+
domain_valid.out_of += 1
231+
domain_dims_names = {var.name for var in domain_dims}
232+
variable_dim = util.get_possible_label_variable_dimensions(
233+
coord_var,
234+
)
235+
if not (
236+
set(
237+
util.get_possible_label_variable_dimensions(
238+
coord_var,
239+
),
240+
).issubset(domain_dims_names)
241+
):
242+
domain_valid.messages.append(
243+
"Could not find the following "
244+
"variables referenced in "
245+
"coordinates attribute from "
246+
"domain variable "
247+
f"{variable_dim}: {domain_dims_names}",
248+
)
249+
else:
250+
domain_valid.score += 1
236251

237-
# not in conformance docs, but mentioned as recommended anyways
238-
domain_valid.out_of += 1
239-
long_name = getattr(domain_var, "long_name", None)
240-
if long_name is None or not isinstance(long_name, str):
241-
domain_valid.messages.append(
242-
f"For domain variable {domain_var.name} "
243-
f"it is recommended that attribute long_name be present and a string",
244-
)
245-
results.append(domain_valid.to_result())
246-
else:
247-
domain_valid.score += 1
248-
appendix_a_not_recommended_attrs = []
249-
for attr_name in domain_var.ncattrs():
250-
if (
251-
attr_name in self.appendix_a
252-
and "D" not in self.appendix_a[attr_name]["attr_loc"]
253-
):
254-
appendix_a_not_recommended_attrs.append(attr_name)
252+
# not in conformance docs, but mentioned as recommended anyways
253+
domain_valid.out_of += 1
254+
long_name = getattr(domain_var, "long_name", None)
255+
if long_name is None or not isinstance(long_name, str):
256+
domain_valid.messages.append(
257+
f"For domain variable {domain_var.name} "
258+
f"it is recommended that attribute long_name be present and a string",
259+
)
260+
else:
261+
domain_valid.score += 1
255262

256-
domain_valid.out_of += 1
257-
if appendix_a_not_recommended_attrs:
258-
domain_valid.messages.append(
259-
f"The following attributes appear in variable {domain_var.name} "
260-
"and CF Appendix A, but are not for use in domain variables: "
261-
f"{appendix_a_not_recommended_attrs}",
262-
)
263-
else:
264-
# no errors occurred
265-
domain_valid.score += 1
263+
appendix_a_not_recommended_attrs = []
264+
for attr_name in domain_var.ncattrs():
265+
if (
266+
attr_name in self.appendix_a
267+
and "D" not in self.appendix_a[attr_name]["attr_loc"]
268+
):
269+
appendix_a_not_recommended_attrs.append(attr_name)
266270

267-
# IMPLEMENTATION CONFORMANCE 5.8 REQUIRED 4/4
268-
# variables named by domain variable's cell_measures attributes must themselves be a subset
269-
# of dimensions named by domain variable's dimensions NetCDF attribute
270-
if hasattr(domain_var, "cell_measures"):
271-
cell_measures_var_names = regex.findall(
272-
r"\b(?:area|volume):\s+(\w+)",
273-
domain_var.cell_measures,
274-
)
275-
# check exist
276-
for var_name in cell_measures_var_names:
277-
try:
278-
cell_measures_variable = ds.variables[var_name]
279-
except ValueError:
280-
# TODO: what to do here?
281-
continue
282-
domain_coord_var_names = {
283-
var_like.name for var_like in domain_coord_vars
284-
}
285-
domain_valid.assert_true(
286-
set(cell_measures_variable.dimensions).issubset(
287-
domain_coord_var_names,
288-
),
289-
"Variables named in the cell_measures attributes must have a dimensions attribute with "
290-
"values that are a subset of the referring domain variable's dimension attribute",
271+
domain_valid.out_of += 1
272+
if appendix_a_not_recommended_attrs:
273+
domain_valid.messages.append(
274+
f"The following attributes appear in variable {domain_var.name} "
275+
"and CF Appendix A, but are not for use in domain variables: "
276+
f"{appendix_a_not_recommended_attrs}",
291277
)
278+
else:
279+
# no errors occurred
280+
domain_valid.score += 1
281+
282+
# IMPLEMENTATION CONFORMANCE 5.8 REQUIRED 4/4
283+
# variables named by domain variable's cell_measures attributes must themselves be a subset
284+
# of dimensions named by domain variable's dimensions NetCDF attribute
285+
if hasattr(domain_var, "cell_measures"):
286+
cell_measures_var_names = regex.findall(
287+
r"\b(?:area|volume):\s+(\w+)",
288+
domain_var.cell_measures,
289+
)
290+
# check exist
291+
for var_name in cell_measures_var_names:
292+
try:
293+
cell_measures_variable = ds.variables[var_name]
294+
except ValueError:
295+
# TODO: what to do here?
296+
continue
297+
domain_valid.assert_true(
298+
set(cell_measures_variable.dimensions).issubset(
299+
{dd.name for dd in domain_dims},
300+
),
301+
"Variables named in the cell_measures attributes must have dimensions with "
302+
"values that are a subset of the referring domain variable's dimensions attribute",
303+
)
292304

293305
results.append(domain_valid.to_result())
294306

compliance_checker/cf/util.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ def reference_attr_variables(
466466
dataset: Dataset,
467467
attributes_string: str,
468468
split_by: str = None,
469-
reference_type: str = "variable",
469+
reference_type: str = "variables",
470470
group: Union[Group, Dataset] = None,
471471
):
472472
"""
@@ -476,21 +476,23 @@ def reference_attr_variables(
476476
references, errors = [], []
477477
if attributes_string is None:
478478
return None, None
479-
elif reference_type == "variable":
480-
if split_by is None:
481-
return_val = dataset.variables.get(
482-
attributes_string,
483-
VariableReferenceError(attributes_string),
484-
)
485-
if not isinstance(return_val, VariableReferenceError):
486-
return return_val, None
487-
else:
488-
return None, return_val
479+
if reference_type not in {"variables", "dimensions"}:
480+
raise ValueError("reference_type must be one of 'variables' or 'dimensions'")
481+
reference_attr = getattr(dataset, reference_type)
482+
if split_by is None:
483+
return_val = reference_attr.get(
484+
attributes_string,
485+
VariableReferenceError(attributes_string),
486+
)
487+
if not isinstance(return_val, VariableReferenceError):
488+
return return_val, None
489489
else:
490-
string_proc = attributes_string.split(split_by)
491-
for var_name in string_proc:
492-
if var_name in dataset.variables:
493-
references.append(dataset.variables[var_name])
494-
else:
495-
errors.append(VariableReferenceError(var_name))
496-
return references, errors
490+
return None, return_val
491+
else:
492+
string_proc = attributes_string.split(split_by)
493+
for var_name in string_proc:
494+
if var_name in reference_attr:
495+
references.append(reference_attr[var_name])
496+
else:
497+
errors.append(VariableReferenceError(var_name))
498+
return references, errors

0 commit comments

Comments
 (0)