Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xarray integration tests #2559

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ test = [
"mypy",
"hypothesis",
"universal-pathlib",
"xarray",
]

jupyter = [
Expand Down Expand Up @@ -211,6 +212,26 @@ run-mypy = "mypy src"
run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*"
list-env = "pip list"

[tool.hatch.envs.downstream]
python = "3.13"
dependencies = [
'xarray @ git+https://github.com/pydata/xarray.git', # TODO from main until xarray>2024.11.0
'numpy',
'numcodecs',
'typing_extensions',
'donfig',
# test deps
'hypothesis',
'pytest',
'pytest-cov',
'pytest-asyncio',
]

[tool.hatch.envs.downstream.scripts]
run = "pytest --verbose"
run-mypy = "mypy src"
list-env = "pip list"

[tool.hatch.envs.min_deps]
description = """Test environment for minimum supported dependencies

Expand Down
4 changes: 2 additions & 2 deletions tests/test_store/test_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import TYPE_CHECKING

import pytest
from botocore.session import Session

import zarr.api.asynchronous
from zarr.core.buffer import Buffer, cpu, default_buffer_prototype
Expand All @@ -24,6 +23,7 @@
requests = pytest.importorskip("requests")
moto_server = pytest.importorskip("moto.moto_server.threaded_moto_server")
moto = pytest.importorskip("moto")
botocore_session = pytest.importorskip("botocore.session")

# ### amended from s3fs ### #
test_bucket_name = "test"
Expand All @@ -50,7 +50,7 @@ def s3_base() -> Generator[None, None, None]:

def get_boto3_client() -> botocore.client.BaseClient:
# NB: we use the sync botocore client for setup
session = Session()
session = botocore_session.Session()
return session.create_client("s3", endpoint_url=endpoint_url)


Expand Down
78 changes: 78 additions & 0 deletions tests/test_xarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import string

import numpy as np
import pandas as pd
import pytest
import xarray as xr
from numcodecs import LZ4, Blosc, Zlib, Zstd
from numcodecs.abc import Codec

import zarr

_DEFAULT_TEST_DIM_SIZES = (8, 9, 10)


@pytest.fixture
def store() -> zarr.abc.store.Store:
return zarr.storage.MemoryStore()


@pytest.fixture
def dataset(
seed: int = 12345,
add_attrs: bool = True,
dim_sizes: tuple[int, int, int] = _DEFAULT_TEST_DIM_SIZES,
use_extension_array: bool = False,
) -> xr.Dataset:
rs = np.random.default_rng(seed)
_vars = {
"var1": ["dim1", "dim2"],
"var2": ["dim1", "dim2"],
"var3": ["dim3", "dim1"],
}
_dims = {"dim1": dim_sizes[0], "dim2": dim_sizes[1], "dim3": dim_sizes[2]}

obj = xr.Dataset()
obj["dim2"] = ("dim2", 0.5 * np.arange(_dims["dim2"]))
if _dims["dim3"] > 26:
raise RuntimeError(f'Not enough letters for filling this dimension size ({_dims["dim3"]})')
obj["dim3"] = ("dim3", list(string.ascii_lowercase[0 : _dims["dim3"]]))
obj["time"] = ("time", pd.date_range("2000-01-01", periods=20))
for v, dims in sorted(_vars.items()):
data = rs.normal(size=tuple(_dims[d] for d in dims))
obj[v] = (dims, data)
if add_attrs:
obj[v].attrs = {"foo": "variable"}
if use_extension_array:
obj["var4"] = (
"dim1",
pd.Categorical(
rs.choice(
list(string.ascii_lowercase[: rs.integers(1, 5)]),
size=dim_sizes[0],
)
),
)
if dim_sizes == _DEFAULT_TEST_DIM_SIZES:
numbers_values = np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64")
else:
numbers_values = rs.integers(0, 3, _dims["dim3"], dtype="int64")
obj.coords["numbers"] = ("dim3", numbers_values)
obj.encoding = {"foo": "bar"}
return obj


@pytest.mark.parametrize("compressor", [Zstd(level=1), LZ4(), Blosc(), Zlib()])
def test_roundtrip_v2(store: zarr.abc.store.Store, dataset: xr.Dataset, compressor: Codec) -> None:
encoding = {
"var1": {
"compressor": compressor,
}
}
dataset.to_zarr(store, encoding=encoding, zarr_format=2)
other_dataset = xr.open_dataset(store, engine="zarr")
assert dataset.identical(other_dataset)
assert isinstance(other_dataset.var1.encoding["compressor"], compressor.__class__)

other_dataset = xr.open_zarr(store)
assert dataset.identical(other_dataset)
Loading