|
8 | 8 | from compliance_checker.cf import util
|
9 | 9 | from compliance_checker.cf.cf_1_8 import CF1_8Check
|
10 | 10 | from compliance_checker.cf.util import VariableReferenceError, reference_attr_variables
|
| 11 | +from compliance_checker.cfutil import get_coordinate_variables |
11 | 12 |
|
12 | 13 |
|
13 | 14 | class CF1_9Check(CF1_8Check):
|
@@ -119,176 +120,187 @@ def check_time_coordinate(self, ds):
|
119 | 120 | return prev_return
|
120 | 121 |
|
121 | 122 | def check_domain_variables(self, ds: Dataset):
|
122 |
| - # Domain variables should have coordinates attribute, but should have |
123 |
| - # scalar dimensions |
| 123 | + |
124 | 124 | results = []
|
125 |
| - for domain_var in ( |
126 |
| - var |
127 |
| - for var in ds.get_variables_by_attributes( |
128 |
| - coordinates=lambda c: c is not None, |
| 125 | + domain_valid = TestCtx(BaseCheck.MEDIUM, self.section_titles["5.8"]) |
| 126 | + is_ragged_array_repr = cfutil.is_dataset_valid_ragged_array_repr_featureType( |
| 127 | + ds, |
| 128 | + getattr(ds, "featureType", ""), |
| 129 | + ) |
| 130 | + if is_ragged_array_repr: |
| 131 | + domain_valid.out_of += 1 |
| 132 | + ragged_array_dim_variable, ragged_attr_name = ( |
| 133 | + cfutil.resolve_ragged_array_dimension(ds) |
129 | 134 | )
|
130 |
| - ): |
131 |
| - |
132 |
| - # IMPLICIT CONFORMANCE REQUIRED 1/4 |
133 |
| - # Has a dimensions *NetCDF* attribute |
134 |
| - try: |
135 |
| - dim_nc_attr = domain_var.getncattr("dimensions") |
136 |
| - # most variables are unlikely to be domain variables, so don't treat this |
137 |
| - # as a failure |
138 |
| - except AttributeError: |
139 |
| - continue |
140 |
| - # IMPLICIT CONFORMANCE REQUIRED 2/4 |
141 |
| - # Aforementioned dimensions attribute is comprised of space separated |
142 |
| - # dimension names which must exist in the file |
143 |
| - domain_valid = TestCtx(BaseCheck.MEDIUM, self.section_titles["5.8"]) |
144 |
| - domain_valid.out_of += 2 |
145 |
| - domain_dims, dim_errors = reference_attr_variables(ds, dim_nc_attr, " ") |
146 |
| - if dim_errors: |
147 |
| - errors_str = ", ".join(dim_errors) |
148 |
| - domain_valid.messages.append( |
149 |
| - "Could not find the following " |
150 |
| - "dimensions referenced in " |
151 |
| - "dimensions attribute from " |
152 |
| - "domain variable " |
153 |
| - f"{domain_var.name}: {errors_str}", |
154 |
| - ) |
155 |
| - else: |
156 |
| - domain_valid.score += 1 |
157 |
| - domain_coord_vars, domain_coord_var_errors = reference_attr_variables( |
| 135 | + dim_name = getattr(ragged_array_dim_variable, ragged_attr_name) |
| 136 | + referenced_dim = reference_attr_variables( |
158 | 137 | ds,
|
159 |
| - domain_var.coordinates, |
160 |
| - " ", |
| 138 | + dim_name, |
| 139 | + reference_type="dimensions", |
161 | 140 | )
|
162 |
| - if domain_coord_var_errors: |
163 |
| - errors_str = ", ".join(err.name for err in domain_coord_var_errors) |
| 141 | + if isinstance(referenced_dim, VariableReferenceError): |
164 | 142 | domain_valid.messages.append(
|
165 |
| - "Could not find the following " |
166 |
| - "variables referenced in " |
167 |
| - "coordinates attribute from " |
168 |
| - "domain variable " |
169 |
| - f"{domain_var.name}: {errors_str}", |
| 143 | + f"Found ragged array variable {ragged_array_dim_variable.name}, " |
| 144 | + f"but dimension {dim_name} referenced from {ragged_attr_name} does not exist in file", |
170 | 145 | )
|
171 |
| - else: |
172 |
| - domain_valid.score += 1 |
173 | 146 |
|
174 |
| - is_ragged_array_repr = ( |
175 |
| - cfutil.is_dataset_valid_ragged_array_repr_featureType( |
176 |
| - ds, |
177 |
| - getattr(ds, "featureType", ""), |
178 |
| - ) |
179 |
| - ) |
180 |
| - if is_ragged_array_repr: |
181 |
| - domain_valid.out_of += 1 |
182 |
| - ragged_array_dim_variable, ragged_attr_name = ( |
183 |
| - cfutil.resolve_ragged_array_dimension(ds) |
184 |
| - ) |
185 |
| - dim_name = getattr(ragged_array_dim_variable, ragged_attr_name) |
186 |
| - referenced_dim = reference_attr_variables( |
187 |
| - ds, |
188 |
| - dim_name, |
189 |
| - reference_type="dimension", |
190 |
| - ) |
191 |
| - if isinstance(referenced_dim, VariableReferenceError): |
| 147 | + coord_var_reference_failures = [] |
| 148 | + for coord_var in reference_attr_variables(ds, dim_name, " ", "dimensions"): |
| 149 | + if isinstance(coord_var, VariableReferenceError): |
| 150 | + coord_var_reference_failures.append(coord_var) |
| 151 | + domain_valid.messages.append( |
| 152 | + f"Referenced coordinate variable {coord_var} does not exist in file", |
| 153 | + ) |
| 154 | + continue |
| 155 | + # TODO: check for label variables |
| 156 | + if not set( |
| 157 | + util.get_possible_label_variable_dimensions(coord_var), |
| 158 | + ).issubset({referenced_dim}): |
192 | 159 | domain_valid.messages.append(
|
193 | 160 | f"Found ragged array variable {ragged_array_dim_variable.name}, "
|
194 | 161 | f"but dimension {dim_name} referenced from {ragged_attr_name} does not exist in file",
|
195 | 162 | )
|
| 163 | + else: |
| 164 | + domain_valid.score += 1 |
| 165 | + else: |
| 166 | + # IMPLICIT CONFORMANCE REQUIRED 1/4 |
| 167 | + # Domain variables must have a dimensions attribute |
| 168 | + for domain_var in ( |
| 169 | + ds.variables[var] |
| 170 | + for var in ds.variables |
| 171 | + if "dimensions" in ds.variables[var].ncattrs() |
| 172 | + ): |
| 173 | + # store NetCDF variable attribute named "dimensions" |
| 174 | + dim_nc_attr = domain_var.getncattr("dimensions") |
| 175 | + # Every variable should have dimensionality (not to be confused with |
| 176 | + # dimensions NetCDF attribute here) even if scalar/empty, but non-empty |
| 177 | + # dimensions aren't allowed for domain variables |
196 | 178 |
|
197 |
| - coord_var_reference_failures = [] |
198 |
| - for coord_var in reference_attr_variables(ds, dim_name, " "): |
199 |
| - if isinstance(coord_var, VariableReferenceError): |
200 |
| - coord_var_reference_failures.append(coord_var) |
201 |
| - domain_valid.messages.append( |
202 |
| - f"Referenced coordinate variable {coord_var} does not exist in file", |
203 |
| - ) |
204 |
| - continue |
205 |
| - # TODO: check for label variables |
206 |
| - if not set( |
207 |
| - util.get_possible_label_variable_dimensions(coord_var), |
208 |
| - ).issubset({referenced_dim}): |
209 |
| - domain_valid.messages.append( |
210 |
| - f"Found ragged array variable {ragged_array_dim_variable.name}, " |
211 |
| - f"but dimension {dim_name} referenced from {ragged_attr_name} does not exist in file", |
212 |
| - ) |
213 |
| - else: |
214 |
| - domain_valid.score += 1 |
215 |
| - else: |
216 |
| - for coord_var in domain_coord_vars: |
217 |
| - domain_valid.out_of += 1 |
218 |
| - domain_dims_names = {var.name for var in domain_dims} |
219 |
| - variable_dim = util.get_possible_label_variable_dimensions( |
220 |
| - coord_var, |
| 179 | + domain_valid.out_of += 3 |
| 180 | + if domain_var.dimensions: |
| 181 | + domain_valid.messages.append( |
| 182 | + "Domain variable " |
| 183 | + f"{domain_var.name} should not have non-scalar/" |
| 184 | + "non-empty variable dimensions", |
221 | 185 | )
|
222 |
| - if not ( |
223 |
| - set( |
224 |
| - util.get_possible_label_variable_dimensions(coord_var), |
225 |
| - ).issubset(domain_dims_names) |
226 |
| - ): |
| 186 | + else: |
| 187 | + domain_valid.score += 1 |
| 188 | + |
| 189 | + # IMPLICIT CONFORMANCE REQUIRED 2/4 |
| 190 | + # Aforementioned dimensions attribute is comprised of space separated |
| 191 | + # dimension names which must exist in the file |
| 192 | + |
| 193 | + domain_dims, dim_errors = reference_attr_variables( |
| 194 | + ds, |
| 195 | + dim_nc_attr, |
| 196 | + " ", |
| 197 | + "dimensions", |
| 198 | + ) |
| 199 | + if dim_errors: |
| 200 | + errors_str = ", ".join(dim_errors) |
| 201 | + domain_valid.messages.append( |
| 202 | + "Could not find the following " |
| 203 | + "dimensions referenced in " |
| 204 | + "dimensions attribute from " |
| 205 | + "domain variable " |
| 206 | + f"{domain_var.name}: {errors_str}", |
| 207 | + ) |
| 208 | + else: |
| 209 | + domain_valid.score += 1 |
| 210 | + if "coordinates" in domain_var.ncattrs(): |
| 211 | + domain_coord_vars, domain_coord_var_errors = ( |
| 212 | + reference_attr_variables(ds, domain_var.coordinates, " ") |
| 213 | + ) |
| 214 | + if domain_coord_var_errors: |
| 215 | + errors_str = ", ".join( |
| 216 | + err.name for err in domain_coord_var_errors |
| 217 | + ) |
227 | 218 | domain_valid.messages.append(
|
228 | 219 | "Could not find the following "
|
229 | 220 | "variables referenced in "
|
230 | 221 | "coordinates attribute from "
|
231 | 222 | "domain variable "
|
232 |
| - f"{variable_dim}: {domain_dims_names}", |
| 223 | + f"{domain_var.name}: {errors_str}", |
233 | 224 | )
|
234 | 225 | else:
|
235 | 226 | domain_valid.score += 1
|
| 227 | + for coord_var in domain_coord_vars: |
| 228 | + if coord_var not in get_coordinate_variables(ds): |
| 229 | + continue |
| 230 | + domain_valid.out_of += 1 |
| 231 | + domain_dims_names = {var.name for var in domain_dims} |
| 232 | + variable_dim = util.get_possible_label_variable_dimensions( |
| 233 | + coord_var, |
| 234 | + ) |
| 235 | + if not ( |
| 236 | + set( |
| 237 | + util.get_possible_label_variable_dimensions( |
| 238 | + coord_var, |
| 239 | + ), |
| 240 | + ).issubset(domain_dims_names) |
| 241 | + ): |
| 242 | + domain_valid.messages.append( |
| 243 | + "Could not find the following " |
| 244 | + "variables referenced in " |
| 245 | + "coordinates attribute from " |
| 246 | + "domain variable " |
| 247 | + f"{variable_dim}: {domain_dims_names}", |
| 248 | + ) |
| 249 | + else: |
| 250 | + domain_valid.score += 1 |
236 | 251 |
|
237 |
| - # not in conformance docs, but mentioned as recommended anyways |
238 |
| - domain_valid.out_of += 1 |
239 |
| - long_name = getattr(domain_var, "long_name", None) |
240 |
| - if long_name is None or not isinstance(long_name, str): |
241 |
| - domain_valid.messages.append( |
242 |
| - f"For domain variable {domain_var.name} " |
243 |
| - f"it is recommended that attribute long_name be present and a string", |
244 |
| - ) |
245 |
| - results.append(domain_valid.to_result()) |
246 |
| - else: |
247 |
| - domain_valid.score += 1 |
248 |
| - appendix_a_not_recommended_attrs = [] |
249 |
| - for attr_name in domain_var.ncattrs(): |
250 |
| - if ( |
251 |
| - attr_name in self.appendix_a |
252 |
| - and "D" not in self.appendix_a[attr_name]["attr_loc"] |
253 |
| - ): |
254 |
| - appendix_a_not_recommended_attrs.append(attr_name) |
| 252 | + # not in conformance docs, but mentioned as recommended anyways |
| 253 | + domain_valid.out_of += 1 |
| 254 | + long_name = getattr(domain_var, "long_name", None) |
| 255 | + if long_name is None or not isinstance(long_name, str): |
| 256 | + domain_valid.messages.append( |
| 257 | + f"For domain variable {domain_var.name} " |
| 258 | + f"it is recommended that attribute long_name be present and a string", |
| 259 | + ) |
| 260 | + else: |
| 261 | + domain_valid.score += 1 |
255 | 262 |
|
256 |
| - domain_valid.out_of += 1 |
257 |
| - if appendix_a_not_recommended_attrs: |
258 |
| - domain_valid.messages.append( |
259 |
| - f"The following attributes appear in variable {domain_var.name} " |
260 |
| - "and CF Appendix A, but are not for use in domain variables: " |
261 |
| - f"{appendix_a_not_recommended_attrs}", |
262 |
| - ) |
263 |
| - else: |
264 |
| - # no errors occurred |
265 |
| - domain_valid.score += 1 |
| 263 | + appendix_a_not_recommended_attrs = [] |
| 264 | + for attr_name in domain_var.ncattrs(): |
| 265 | + if ( |
| 266 | + attr_name in self.appendix_a |
| 267 | + and "D" not in self.appendix_a[attr_name]["attr_loc"] |
| 268 | + ): |
| 269 | + appendix_a_not_recommended_attrs.append(attr_name) |
266 | 270 |
|
267 |
| - # IMPLEMENTATION CONFORMANCE 5.8 REQUIRED 4/4 |
268 |
| - # variables named by domain variable's cell_measures attributes must themselves be a subset |
269 |
| - # of dimensions named by domain variable's dimensions NetCDF attribute |
270 |
| - if hasattr(domain_var, "cell_measures"): |
271 |
| - cell_measures_var_names = regex.findall( |
272 |
| - r"\b(?:area|volume):\s+(\w+)", |
273 |
| - domain_var.cell_measures, |
274 |
| - ) |
275 |
| - # check exist |
276 |
| - for var_name in cell_measures_var_names: |
277 |
| - try: |
278 |
| - cell_measures_variable = ds.variables[var_name] |
279 |
| - except ValueError: |
280 |
| - # TODO: what to do here? |
281 |
| - continue |
282 |
| - domain_coord_var_names = { |
283 |
| - var_like.name for var_like in domain_coord_vars |
284 |
| - } |
285 |
| - domain_valid.assert_true( |
286 |
| - set(cell_measures_variable.dimensions).issubset( |
287 |
| - domain_coord_var_names, |
288 |
| - ), |
289 |
| - "Variables named in the cell_measures attributes must have a dimensions attribute with " |
290 |
| - "values that are a subset of the referring domain variable's dimension attribute", |
| 271 | + domain_valid.out_of += 1 |
| 272 | + if appendix_a_not_recommended_attrs: |
| 273 | + domain_valid.messages.append( |
| 274 | + f"The following attributes appear in variable {domain_var.name} " |
| 275 | + "and CF Appendix A, but are not for use in domain variables: " |
| 276 | + f"{appendix_a_not_recommended_attrs}", |
291 | 277 | )
|
| 278 | + else: |
| 279 | + # no errors occurred |
| 280 | + domain_valid.score += 1 |
| 281 | + |
| 282 | + # IMPLEMENTATION CONFORMANCE 5.8 REQUIRED 4/4 |
| 283 | + # variables named by domain variable's cell_measures attributes must themselves be a subset |
| 284 | + # of dimensions named by domain variable's dimensions NetCDF attribute |
| 285 | + if hasattr(domain_var, "cell_measures"): |
| 286 | + cell_measures_var_names = regex.findall( |
| 287 | + r"\b(?:area|volume):\s+(\w+)", |
| 288 | + domain_var.cell_measures, |
| 289 | + ) |
| 290 | + # check exist |
| 291 | + for var_name in cell_measures_var_names: |
| 292 | + try: |
| 293 | + cell_measures_variable = ds.variables[var_name] |
| 294 | + except ValueError: |
| 295 | + # TODO: what to do here? |
| 296 | + continue |
| 297 | + domain_valid.assert_true( |
| 298 | + set(cell_measures_variable.dimensions).issubset( |
| 299 | + {dd.name for dd in domain_dims}, |
| 300 | + ), |
| 301 | + "Variables named in the cell_measures attributes must have dimensions with " |
| 302 | + "values that are a subset of the referring domain variable's dimensions attribute", |
| 303 | + ) |
292 | 304 |
|
293 | 305 | results.append(domain_valid.to_result())
|
294 | 306 |
|
|
0 commit comments