Skip to content

Commit a2320a0

Browse files
committed
Unify metadata v2 fill value parsing
1 parent 260cfbc commit a2320a0

File tree

1 file changed

+38
-46
lines changed
  • src/zarr/core/metadata

1 file changed

+38
-46
lines changed

src/zarr/core/metadata/v2.py

Lines changed: 38 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,29 @@ def shards(self) -> ChunkCoords | None:
109109
return None
110110

111111
def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
112+
def _serialize_fill_value(fv: Any) -> JSON:
113+
if self.fill_value is None:
114+
pass
115+
elif self.dtype.kind in "SV":
116+
# There's a relationship between self.dtype and self.fill_value
117+
# that mypy isn't aware of. The fact that we have S or V dtype here
118+
# means we should have a bytes-type fill_value.
119+
fv = base64.standard_b64encode(cast(bytes, self.fill_value)).decode("ascii")
120+
elif isinstance(fv, np.datetime64):
121+
if np.isnat(fv):
122+
fv = "NaT"
123+
else:
124+
fv = np.datetime_as_string(fv)
125+
elif isinstance(fv, numbers.Real):
126+
float_fv = float(fv)
127+
if np.isnan(float_fv):
128+
fv = "NaN"
129+
elif np.isinf(float_fv):
130+
fv = "Infinity" if float_fv > 0 else "-Infinity"
131+
elif isinstance(fv, numbers.Complex):
132+
fv = [_serialize_fill_value(fv.real), _serialize_fill_value(fv.imag)]
133+
return cast(JSON, fv)
134+
112135
def _json_convert(
113136
o: Any,
114137
) -> Any:
@@ -147,6 +170,7 @@ def _json_convert(
147170
raise TypeError
148171

149172
zarray_dict = self.to_dict()
173+
zarray_dict["fill_value"] = _serialize_fill_value(zarray_dict["fill_value"])
150174
zattrs_dict = zarray_dict.pop("attributes", {})
151175
json_indent = config.get("json_indent")
152176
return {
@@ -166,26 +190,7 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
166190
_data = data.copy()
167191
# Check that the zarr_format attribute is correct.
168192
_ = parse_zarr_format(_data.pop("zarr_format"))
169-
dtype = parse_dtype(_data["dtype"])
170193

171-
if dtype.kind in "SV":
172-
fill_value_encoded = _data.get("fill_value")
173-
if fill_value_encoded is not None:
174-
fill_value: Any = base64.standard_b64decode(fill_value_encoded)
175-
_data["fill_value"] = fill_value
176-
else:
177-
fill_value = _data.get("fill_value")
178-
if fill_value is not None:
179-
if np.issubdtype(dtype, np.datetime64):
180-
if fill_value == "NaT":
181-
_data["fill_value"] = np.array("NaT", dtype=dtype)[()]
182-
else:
183-
_data["fill_value"] = np.array(fill_value, dtype=dtype)[()]
184-
elif dtype.kind == "c" and isinstance(fill_value, list) and len(fill_value) == 2:
185-
val = complex(float(fill_value[0]), float(fill_value[1]))
186-
_data["fill_value"] = np.array(val, dtype=dtype)[()]
187-
elif dtype.kind in "f" and fill_value in {"NaN", "Infinity", "-Infinity"}:
188-
_data["fill_value"] = np.array(fill_value, dtype=dtype)[()]
189194
# zarr v2 allowed arbitrary keys in the metadata.
190195
# Filter the keys to only those expected by the constructor.
191196
expected = {x.name for x in fields(cls)}
@@ -206,33 +211,8 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
206211
return cls(**_data)
207212

208213
def to_dict(self) -> dict[str, JSON]:
209-
def _sanitize_fill_value(fv: Any) -> JSON:
210-
if fv is None:
211-
return fv
212-
elif isinstance(fv, np.datetime64):
213-
if np.isnat(fv):
214-
return "NaT"
215-
return np.datetime_as_string(fv)
216-
elif isinstance(fv, numbers.Real):
217-
float_fv = float(fv)
218-
if np.isnan(float_fv):
219-
fv = "NaN"
220-
elif np.isinf(float_fv):
221-
fv = "Infinity" if float_fv > 0 else "-Infinity"
222-
elif isinstance(fv, numbers.Complex):
223-
fv = [_sanitize_fill_value(fv.real), _sanitize_fill_value(fv.imag)]
224-
return cast(JSON, fv)
225-
226214
zarray_dict = super().to_dict()
227215

228-
if self.dtype.kind in "SV" and self.fill_value is not None:
229-
# There's a relationship between self.dtype and self.fill_value
230-
# that mypy isn't aware of. The fact that we have S or V dtype here
231-
# means we should have a bytes-type fill_value.
232-
fill_value = base64.standard_b64encode(cast(bytes, self.fill_value)).decode("ascii")
233-
zarray_dict["fill_value"] = fill_value
234-
235-
zarray_dict["fill_value"] = _sanitize_fill_value(zarray_dict["fill_value"])
236216
_ = zarray_dict.pop("dtype")
237217
dtype_json: JSON
238218
# In the case of zarr v2, the simplest i.e., '|VXX' dtype is represented as a string
@@ -330,7 +310,7 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
330310
return data
331311

332312

333-
def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
313+
def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
334314
"""
335315
Parse a potential fill value into a value that is compatible with the provided dtype.
336316
@@ -345,14 +325,14 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
345325
-------
346326
An instance of `dtype`, or `None`, or any python object (in the case of an object dtype)
347327
"""
328+
348329
if fill_value is None or dtype.hasobject:
349330
# no fill value
350331
pass
351332
elif not isinstance(fill_value, np.void) and fill_value == 0:
352333
# this should be compatible across numpy versions for any array type, including
353334
# structured arrays
354335
fill_value = np.zeros((), dtype=dtype)[()]
355-
356336
elif dtype.kind == "U":
357337
# special case unicode because of encoding issues on Windows if passed through numpy
358338
# https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713
@@ -361,6 +341,18 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
361341
raise ValueError(
362342
f"fill_value {fill_value!r} is not valid for dtype {dtype}; must be a unicode string"
363343
)
344+
elif dtype.kind in "SV" and isinstance(fill_value, str):
345+
fill_value = base64.standard_b64decode(fill_value)
346+
elif np.issubdtype(dtype, np.datetime64):
347+
if fill_value == "NaT":
348+
fill_value = np.array("NaT", dtype=dtype)[()]
349+
else:
350+
fill_value = np.array(fill_value, dtype=dtype)[()]
351+
elif dtype.kind == "c" and isinstance(fill_value, list) and len(fill_value) == 2:
352+
complex_val = complex(float(fill_value[0]), float(fill_value[1]))
353+
fill_value = np.array(complex_val, dtype=dtype)[()]
354+
elif dtype.kind in "f" and fill_value in {"NaN", "Infinity", "-Infinity"}:
355+
fill_value = np.array(fill_value, dtype=dtype)[()]
364356
else:
365357
try:
366358
if isinstance(fill_value, bytes) and dtype.kind == "V":

0 commit comments

Comments
 (0)