Skip to content

Support async FSMap objects in zarr.open #2774

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 51 commits into from
Jun 16, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
656a565
WIP: Support fsspec mutable mapping objects in zarr.open
maxrjones Jan 28, 2025
877eb80
Simplify library availability checking
maxrjones Feb 6, 2025
90cc08d
Merge branch 'main' into support-FSMap
maxrjones Feb 6, 2025
f04145c
Improve test coverage
maxrjones Feb 13, 2025
825cd6e
Merge branch 'main' into support-FSMap
maxrjones Feb 13, 2025
c4bfb06
Improve error messages
maxrjones Feb 13, 2025
06f35f2
Consolidate code
maxrjones Feb 13, 2025
e792e01
Make test more readable
maxrjones Feb 13, 2025
ed11018
Make async instances from sync fsmap objects
maxrjones Feb 13, 2025
e586001
Move test to fsspec store
maxrjones Feb 13, 2025
3f9a34c
Re-add type ignore
maxrjones Feb 13, 2025
4d1bd26
"Update docstring"
maxrjones Feb 13, 2025
4b7a5eb
Merge branch 'main' into support-FSMap
maxrjones Feb 13, 2025
abc5fdf
Add another test
maxrjones Feb 13, 2025
5d8e8ca
Merge branch 'main' into support-FSMap
d-v-b Feb 14, 2025
cb2db7d
Require auto_mkdir for LocalFileSystem
maxrjones Feb 14, 2025
ed9639f
Merge branch 'main' into support-FSMap
maxrjones Feb 14, 2025
46e8bff
Update test location
maxrjones Feb 14, 2025
a126d4b
Merge branch 'main' into support-FSMap
dcherian Feb 14, 2025
50c18f0
Merge branch 'main' into support-FSMap
maxrjones Feb 27, 2025
7517f72
Convert older filesystems to async
maxrjones Feb 27, 2025
3ae719b
Use if on fsspec versions rather than try; else
maxrjones Feb 27, 2025
d4d2256
Always use asynchronous=True in _make_async
maxrjones Feb 27, 2025
b4a2bd1
Merge branch 'main' into support-FSMap
maxrjones May 30, 2025
28f8420
Improve tests
maxrjones May 30, 2025
b782704
Apply suggestions from code review
maxrjones May 30, 2025
466bdd2
Apply more code suggestions
maxrjones May 30, 2025
b89a1a5
Fix typing error
maxrjones May 30, 2025
95ddf7c
Merge branch 'main' into support-FSMap
maxrjones May 30, 2025
9dcb558
Test remote stores in min_deps env
maxrjones May 30, 2025
f2b076a
Remove redundant import
maxrjones May 30, 2025
7032ca1
Merge branch 'main' into support-FSMap
maxrjones Jun 2, 2025
ac4c64c
Test warning
maxrjones Jun 2, 2025
27441e9
Lint
maxrjones Jun 2, 2025
696761e
Add pytest pin
dstansby Jun 3, 2025
9326025
Merge branch 'pin-pytest' into support-FSMap
maxrjones Jun 3, 2025
7441486
Add release note
maxrjones Jun 3, 2025
18ee24d
Generate coverage on min_deps and upstream jobs
maxrjones Jun 3, 2025
c1acdcf
Update src/zarr/storage/_fsspec.py
maxrjones Jun 3, 2025
7f48751
More useful error messages
maxrjones Jun 3, 2025
a02b259
Add TypeAlias
maxrjones Jun 3, 2025
7e0f2d6
Fix typing for no fsspec installation
maxrjones Jun 3, 2025
c05bd1f
Merge branch 'main' into support-FSMap
maxrjones Jun 5, 2025
ec38155
Merge branch 'main' into support-FSMap
d-v-b Jun 6, 2025
9ca4781
Merge branch 'main' into support-FSMap
d-v-b Jun 7, 2025
21e6493
Merge branch 'main' into support-FSMap
d-v-b Jun 7, 2025
d0a753d
Merge branch 'main' into support-FSMap
maxrjones Jun 9, 2025
3b06c53
Move imports
maxrjones Jun 9, 2025
98f00dd
Don't mutate FSMap object
maxrjones Jun 9, 2025
9039c65
Merge branch 'main' into support-FSMap
maxrjones Jun 12, 2025
86aa1e4
Merge branch 'main' into support-FSMap
maxrjones Jun 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions src/zarr/storage/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,27 @@
# By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate.
store = await MemoryStore.open(store_dict=store_like, read_only=_read_only)
else:
msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable]
raise TypeError(msg)
try: # type: ignore[unreachable]
import fsspec

if isinstance(store_like, fsspec.mapping.FSMap):
if path:
raise TypeError(

Check warning on line 320 in src/zarr/storage/_common.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_common.py#L320

Added line #L320 was not covered by tests
"'path' was provided but is not used for FSMap store_like objects"
)
if storage_options:
raise TypeError(

Check warning on line 324 in src/zarr/storage/_common.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_common.py#L324

Added line #L324 was not covered by tests
"'storage_options was provided but is not used for FSMap store_like objects"
)
store = FsspecStore.from_mapper(store_like, read_only=_read_only)
else:
raise (
TypeError(f"Unsupported type for store_like: '{type(store_like).__name__}'")
)
except ImportError:
raise (

Check warning on line 333 in src/zarr/storage/_common.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_common.py#L333

Added line #L333 was not covered by tests
TypeError(f"Unsupported type for store_like: '{type(store_like).__name__}'")
) from None

result = await StorePath.open(store, path=path_normalized, mode=mode)

Expand Down
58 changes: 48 additions & 10 deletions src/zarr/storage/_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
if TYPE_CHECKING:
from collections.abc import AsyncIterator, Iterable

from fsspec import AbstractFileSystem
from fsspec.asyn import AsyncFileSystem
from fsspec.mapping import FSMap

from zarr.core.buffer import BufferPrototype
from zarr.core.common import BytesLike
Expand All @@ -29,6 +31,20 @@
)


def _make_async(fs: AbstractFileSystem) -> AsyncFileSystem:
try:
from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper

fs = AsyncFileSystemWrapper(fs)
except ImportError as e:
raise ImportError(

Check warning on line 40 in src/zarr/storage/_fsspec.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_fsspec.py#L39-L40

Added lines #L39 - L40 were not covered by tests
f"The filesystem '{fs}' is synchronous, and the required "
"AsyncFileSystemWrapper is not available. Upgrade fsspec to version "
"2024.12.0 or later to enable this functionality."
) from e
return fs


class FsspecStore(Store):
"""
A remote Store based on FSSpec
Expand Down Expand Up @@ -136,6 +152,37 @@
allowed_exceptions=allowed_exceptions,
)

@classmethod
def from_mapper(
cls,
fs_map: FSMap,
read_only: bool = False,
allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS,
) -> FsspecStore:
"""
Create a FsspecStore from an upath object.

Parameters
----------
read_only : bool
Whether the store is read-only, defaults to False.
allowed_exceptions : tuple, optional
The exceptions that are allowed to be raised when accessing the
store. Defaults to ALLOWED_EXCEPTIONS.

Returns
-------
FsspecStore
"""
if not fs_map.fs.async_impl or not fs_map.fs.asynchronous:
raise TypeError("Filesystem needs to support async operations.")
return cls(
fs=fs_map.fs,
path=fs_map.root,
read_only=read_only,
allowed_exceptions=allowed_exceptions,
)

@classmethod
def from_url(
cls,
Expand Down Expand Up @@ -174,16 +221,7 @@

fs, path = url_to_fs(url, **opts)
if not fs.async_impl:
try:
from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper

fs = AsyncFileSystemWrapper(fs)
except ImportError as e:
raise ImportError(
f"The filesystem for URL '{url}' is synchronous, and the required "
"AsyncFileSystemWrapper is not available. Upgrade fsspec to version "
"2024.12.0 or later to enable this functionality."
) from e
fs = _make_async(fs)

# fsspec is not consistent about removing the scheme from the path, so check and strip it here
# https://github.com/fsspec/filesystem_spec/issues/1722
Expand Down
17 changes: 17 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,23 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None:
zarr.open(store=tmp_path, mode="w-")


@pytest.mark.xfail(
reason="Automatic sync -> async filesystems not implemented yet for FSMap objects."
)
def test_open_fsmap_file(tmp_path: pathlib.Path) -> None:
fsspec = pytest.importorskip("fsspec")
fs = fsspec.filesystem("file")
mapper = fs.get_mapper(tmp_path)
arr = zarr.open(store=mapper, mode="w", shape=(3, 3))
assert isinstance(arr, Array)

arr[...] = 3
z2 = zarr.open(store=mapper, mode="w", shape=(3, 3))
assert isinstance(z2, Array)
assert not (z2[:] == 3).all()
z2[:] = 3


@pytest.mark.parametrize("zarr_format", [2, 3])
def test_array_order(zarr_format: ZarrFormat) -> None:
arr = zarr.ones(shape=(2, 2), order=None, zarr_format=zarr_format)
Expand Down
14 changes: 14 additions & 0 deletions tests/test_store/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from packaging.version import parse as parse_version

import zarr.api.asynchronous
from zarr import Array
from zarr.abc.store import OffsetByteRequest
from zarr.core.buffer import Buffer, cpu, default_buffer_prototype
from zarr.core.sync import _collect_aiterator, sync
Expand Down Expand Up @@ -104,6 +105,19 @@ async def test_basic() -> None:
assert out[0].to_bytes() == data[1:]


def test_open_s3map() -> None:
s3_filesystem = s3fs.S3FileSystem(asynchronous=True, endpoint_url=endpoint_url, anon=False)
mapper = s3_filesystem.get_mapper(f"s3://{test_bucket_name}/map/foo/")
arr = zarr.open(store=mapper, mode="w", shape=(3, 3))
assert isinstance(arr, Array)

arr[...] = 3
z2 = zarr.open(store=mapper, mode="w", shape=(3, 3))
assert isinstance(z2, Array)
assert not (z2[:] == 3).all()
z2[:] = 3


class TestFsspecStoreS3(StoreTests[FsspecStore, cpu.Buffer]):
store_cls = FsspecStore
buffer_cls = cpu.Buffer
Expand Down
Loading