Skip to content

Commit 77fb91a

Browse files
committed
fixups
1 parent 3187229 commit 77fb91a

File tree

7 files changed

+487
-35
lines changed

7 files changed

+487
-35
lines changed

src/zarr/core/array.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,12 +1262,24 @@ def _chunk_grid_shape(self) -> tuple[int, ...]:
12621262
"""
12631263
The shape of the chunk grid for this array.
12641264
1265+
For arrays with sharding, this returns the grid of inner chunks, not shards.
1266+
For arrays with RectilinearChunkGrid, this returns the grid shape.
1267+
12651268
Returns
12661269
-------
12671270
tuple[int, ...]
12681271
The shape of the chunk grid for this array.
12691272
"""
1270-
return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=True)))
1273+
chunks = self.chunks
1274+
# Handle 0-dimensional arrays
1275+
if len(chunks) == 0:
1276+
return ()
1277+
# For RegularChunkGrid (or sharded with RegularChunkGrid), chunks is tuple[int, ...]
1278+
if isinstance(chunks[0], int):
1279+
return tuple(starmap(ceildiv, zip(self.shape, chunks, strict=True)))
1280+
# For RectilinearChunkGrid, chunks is tuple[tuple[int, ...], ...]
1281+
# Use the chunk_grid method
1282+
return self.metadata.chunk_grid.get_chunk_grid_shape(self.shape)
12711283

12721284
@property
12731285
def _shard_grid_shape(self) -> tuple[int, ...]:
@@ -5612,7 +5624,7 @@ def _iter_shard_regions(
56125624
If the array uses RectilinearChunkGrid (variable-sized chunks).
56135625
"""
56145626
chunks = array.chunks
5615-
if not isinstance(chunks[0], int):
5627+
if chunks and not isinstance(chunks[0], int):
56165628
raise NotImplementedError(
56175629
"_iter_shard_regions is not supported for arrays with variable-sized chunks "
56185630
"(RectilinearChunkGrid). Use the chunk_grid API directly for variable chunk access."
@@ -5661,7 +5673,7 @@ def _iter_chunk_regions(
56615673
If the array uses RectilinearChunkGrid (variable-sized chunks).
56625674
"""
56635675
chunks = array.chunks
5664-
if not isinstance(chunks[0], int):
5676+
if chunks and not isinstance(chunks[0], int):
56655677
raise NotImplementedError(
56665678
"_iter_chunk_regions is not supported for arrays with variable-sized chunks "
56675679
"(RectilinearChunkGrid). Use the chunk_grid API directly for variable chunk access."

src/zarr/core/chunk_grids.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,8 @@ def _normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tup
300300
chunks = tuple(int(chunks) for _ in shape)
301301

302302
# handle dask-style chunks (iterable of iterables)
303-
# TODO
303+
# Note: Only regular chunks are supported here. Irregular chunks will trigger a warning
304+
# and use only the first chunk size per dimension. For true variable chunks, use RectilinearChunkGrid.
304305
if all(isinstance(c, (tuple | list)) for c in chunks):
305306
# Check for irregular chunks and warn user
306307
for dim_idx, c in enumerate(chunks):
@@ -346,7 +347,7 @@ def from_dict(cls, data: dict[str, JSON] | ChunkGrid | NamedConfig[str, Any]) ->
346347

347348
# After isinstance check, data must be dict[str, JSON]
348349
# Cast needed for older mypy versions that don't narrow types properly
349-
data_dict = cast(dict[str, JSON], data) # type: ignore[redundant-cast]
350+
data_dict = cast(dict[str, JSON], data)
350351
name_parsed, _ = parse_named_configuration(data_dict)
351352
if name_parsed == "regular":
352353
return RegularChunkGrid._from_dict(data_dict)
@@ -662,7 +663,46 @@ def to_dict(self) -> dict[str, JSON]:
662663
}
663664

664665
def update_shape(self, new_shape: tuple[int, ...]) -> Self:
665-
"""TODO - write docstring"""
666+
"""
667+
Update the RectilinearChunkGrid to accommodate a new array shape.
668+
669+
When resizing an array, this method adjusts the chunk grid to match the new shape.
670+
For dimensions that grow, a new chunk is added with size equal to the size difference.
671+
For dimensions that shrink, chunks are truncated or removed to fit the new shape.
672+
673+
Parameters
674+
----------
675+
new_shape : tuple[int, ...]
676+
The new shape of the array. Must have the same number of dimensions as the chunk grid.
677+
678+
Returns
679+
-------
680+
Self
681+
A new RectilinearChunkGrid instance with updated chunk shapes
682+
683+
Raises
684+
------
685+
ValueError
686+
If the number of dimensions in new_shape doesn't match the number of dimensions
687+
in the chunk grid
688+
689+
Examples
690+
--------
691+
>>> grid = RectilinearChunkGrid(chunk_shapes=[[10, 20], [15, 15]])
692+
>>> grid.update_shape((50, 40)) # Grow both dimensions
693+
RectilinearChunkGrid(chunk_shapes=((10, 20, 20), (15, 15, 10)))
694+
695+
>>> grid = RectilinearChunkGrid(chunk_shapes=[[10, 20, 30], [25, 25]])
696+
>>> grid.update_shape((25, 30)) # Shrink first dimension
697+
RectilinearChunkGrid(chunk_shapes=((10, 20), (25, 25)))
698+
699+
Notes
700+
-----
701+
This method is automatically called when an array is resized. The chunk size
702+
strategy for growing dimensions adds a single new chunk with size equal to
703+
the growth amount. This may not be optimal for all use cases, and users may
704+
want to manually adjust chunk shapes after resizing.
705+
"""
666706

667707
if len(new_shape) != len(self.chunk_shapes):
668708
raise ValueError(

src/zarr/core/metadata/v3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: ZDType[TBaseDType, TBaseSc
104104
# we need to have special codecs if we are decoding vlen strings or bytestrings
105105
# TODO: use codec ID instead of class name
106106
codec_class_name = abc.__class__.__name__
107-
# TODO: Fix typing here
108-
if isinstance(dtype, VariableLengthUTF8) and not codec_class_name == "VLenUTF8Codec":
107+
# TODO: Fix typing here - mypy cannot express the relationship between ZDType and VariableLengthUTF8
108+
if isinstance(dtype, VariableLengthUTF8) and not codec_class_name == "VLenUTF8Codec": # type: ignore[unreachable]
109109
raise ValueError(
110110
f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`."
111111
)

src/zarr/testing/strategies.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,17 @@ def rectilinear_chunks(draw: st.DrawFn, *, shape: tuple[int, ...]) -> list[list[
225225
226226
For each dimension, generate a list of chunk sizes that sum to the dimension size.
227227
Sometimes uses uniform chunks, sometimes uses variable-sized chunks.
228+
229+
Note: We limit the number of chunks to max 20 per dimension to avoid performance issues
230+
in property tests. With higher dimensions, the total chunk count grows multiplicatively.
228231
"""
229232
chunk_shapes: list[list[int]] = []
230233
for size in shape:
231234
assert size > 0
232235
if size > 1:
233-
nchunks = draw(st.integers(min_value=1, max_value=size - 1))
236+
# Limit max chunks to 20 to avoid performance issues with large chunk grids
237+
max_chunks = min(size - 1, 20)
238+
nchunks = draw(st.integers(min_value=1, max_value=max_chunks))
234239
dividers = sorted(
235240
draw(
236241
st.lists(
@@ -486,10 +491,8 @@ def basic_indices(
486491
allow_ellipsis=allow_ellipsis,
487492
).filter(
488493
lambda idxr: (
489-
not (
490-
is_negative_slice(idxr)
491-
or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
492-
)
494+
not is_negative_slice(idxr)
495+
and not (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) # type: ignore[redundant-expr]
493496
)
494497
)
495498
if math.prod(shape) >= 3:
@@ -575,27 +578,26 @@ def chunk_paths(draw: st.DrawFn, ndim: int, numblocks: tuple[int, ...], subset:
575578
@st.composite
576579
def complex_chunk_grids(draw: st.DrawFn) -> RectilinearChunkGrid:
577580
ndim = draw(st.integers(min_value=1, max_value=3))
578-
nchunks = draw(st.integers(min_value=10, max_value=100))
579-
# Don't require unique chunk sizes - rectilinear grids can have repeated sizes
580-
dim_chunks = st.lists(
581-
st.integers(min_value=1, max_value=10), min_size=nchunks, max_size=nchunks
582-
)
581+
# Limit to 5-10 chunks per dimension with small sizes to keep array size reasonable
582+
# Max array size: 10 chunks * 5 size = 50 elements per dim, 50^3 = 125k elements max
583+
nchunks = draw(st.integers(min_value=5, max_value=10))
584+
# Keep chunk sizes small (1-5) to avoid creating huge arrays
585+
dim_chunks = st.lists(st.integers(min_value=1, max_value=5), min_size=nchunks, max_size=nchunks)
583586
if draw(st.booleans()):
584587
event("using RectilinearChunkGrid")
585588
chunk_shapes = draw(st.lists(dim_chunks, min_size=ndim, max_size=ndim))
586589
return RectilinearChunkGrid(chunk_shapes=chunk_shapes)
587590

588591
else:
589592
event("using RectilinearChunkGrid (run length encoded)")
590-
# For RLE, we need to carefully control the total expanded chunks
591-
# to avoid creating arrays that are too large
592-
# Use a small number of RLE entries with small repeat counts
593-
num_rle_entries = draw(st.integers(min_value=5, max_value=20))
593+
# For RLE, keep total expanded chunks small: 3-5 entries * 2-3 repeats = 6-15 chunks
594+
# With chunk sizes 1-5, max array size: 15 * 5 = 75 per dim, 75^3 = 421k elements max
595+
num_rle_entries = draw(st.integers(min_value=3, max_value=5))
594596
chunk_shapes_rle = [
595597
[
596598
[
597-
draw(st.integers(min_value=1, max_value=10)), # chunk size
598-
draw(st.integers(min_value=1, max_value=3)), # repeat count
599+
draw(st.integers(min_value=1, max_value=5)), # chunk size
600+
draw(st.integers(min_value=2, max_value=3)), # repeat count
599601
]
600602
for _ in range(num_rle_entries)
601603
]
@@ -613,7 +615,7 @@ def complex_chunked_arrays(
613615
draw: st.DrawFn,
614616
*,
615617
stores: st.SearchStrategy[StoreLike] = stores,
616-
) -> Array:
618+
) -> AnyArray:
617619
store = draw(stores, label="store")
618620
chunks = draw(complex_chunk_grids(), label="chunk grid")
619621
assert isinstance(chunks, RectilinearChunkGrid)

tests/test_array.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
from zarr.core.group import AsyncGroup
6868
from zarr.core.indexing import BasicIndexer, _iter_grid, _iter_regions
6969
from zarr.core.metadata.v2 import ArrayV2Metadata
70+
from zarr.core.metadata.v3 import ArrayV3Metadata
7071
from zarr.core.sync import sync
7172
from zarr.errors import (
7273
ContainsArrayError,

0 commit comments

Comments
 (0)