Skip to content

ak.concatenate for axis=0 can fail for two files opened using uproot.dask #456

Open
@gordonwatts

Description

@gordonwatts

I am opening two files using uproot.dask and then using ak.concatenate to combine them end-to-end (along axis=0). With modern dask_awkward this fails (in older versions it worked).

Versions

awkward                   2.5.2
awkward-cpp               28
dask-awkward              2024.1.2
dask                      2024.1.0
uproot                    5.2.1

Repro code:

import uproot
from pathlib import Path
import awkward as ak
import shutil


root_path_1 = Path(r"./file1.root")
root_path_2 = Path(r"./file2.root")
# If you use the same file, without copying, then everything works!
shutil.copy(root_path_1, root_path_2)



f1 = uproot.dask({root_path_1: "atlas_xaod_tree"})
f2 = uproot.dask({root_path_2: "atlas_xaod_tree"})

combined = ak.concatenate([f1, f2], axis=0)

combined.run_number.compute()

And file1 (it is a root file, but zipped up):
file1.zip

The stack dump

This is from a Jupyter notebook; I tested it in a command prompt.

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\awkward\_dispatch.py:62](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:62), in named_high_level_function.<locals>.dispatch(*args, **kwargs)
     [61](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:61) try:
---> [62](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:62)     next(gen_or_result)
     [63](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:63) except StopIteration as err:

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\awkward\operations\ak_concatenate.py:66](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:66), in concatenate(arrays, axis, mergebool, highlevel, behavior, attrs)
     [65](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:65) # Implementation
---> [66](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:66) return _impl(arrays, axis, mergebool, highlevel, behavior, attrs)

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\awkward\operations\ak_concatenate.py:162](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:162), in _impl(arrays, axis, mergebool, highlevel, behavior, attrs)
    [160](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:160)         batches.append([x])
--> [162](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:162) contents = [ak._do.mergemany(b) for b in batches]
    [163](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:163) if len(contents) > 1:

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\awkward\operations\ak_concatenate.py:162](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:162), in <listcomp>(.0)
    [160](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:160)         batches.append([x])
--> [162](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:162) contents = [ak._do.mergemany(b) for b in batches]
    [163](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/operations/ak_concatenate.py:163) if len(contents) > 1:

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\awkward\_do.py:255](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_do.py:255), in mergemany(contents)
    [254](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_do.py:254) assert len(contents) != 0
--> [255](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_do.py:255) return contents[0]._mergemany(contents[1:])

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\awkward\contents\numpyarray.py:526](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:526), in NumpyArray._mergemany(self, others)
    [519](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:519)         raise AssertionError(
    [520](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:520)             "cannot merge "
    [521](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:521)             + type(self).__name__
    [522](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:522)             + " with "
    [523](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:523)             + type(array).__name__
    [524](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:524)         )
--> [526](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:526) contiguous_arrays = self._backend.nplike.concat(contiguous_arrays)
    [528](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:528) next = NumpyArray(
    [529](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:529)     contiguous_arrays, parameters=parameters, backend=self._backend
    [530](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/contents/numpyarray.py:530) )

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\awkward\_nplikes\array_module.py:434](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_nplikes/array_module.py:434), in ArrayModuleNumpyLike.concat(self, arrays, axis)
    [428](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_nplikes/array_module.py:428) def concat(
    [429](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_nplikes/array_module.py:429)     self,
    [430](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_nplikes/array_module.py:430)     arrays: list[ArrayLikeT] | tuple[ArrayLikeT, ...],
    [431](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_nplikes/array_module.py:431)     *,
    [432](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_nplikes/array_module.py:432)     axis: int | None = 0,
    [433](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_nplikes/array_module.py:433) ) -> ArrayLikeT:
--> [434](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_nplikes/array_module.py:434)     assert not any(isinstance(x, PlaceholderArray) for x in arrays)
    [435](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_nplikes/array_module.py:435)     if _nplike_concatenate_has_casting(self._module):

AssertionError: 

The above exception was the direct cause of the following exception:

AssertionError                            Traceback (most recent call last)
Cell In[1], [line 19](vscode-notebook-cell:?execution_count=1&line=19)
     [15](vscode-notebook-cell:?execution_count=1&line=15) f2 = uproot.dask({root_path_2: "atlas_xaod_tree"})
     [17](vscode-notebook-cell:?execution_count=1&line=17) combined = ak.concatenate([f1, f2], axis=0)
---> [19](vscode-notebook-cell:?execution_count=1&line=19) combined.run_number.compute()

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\dask\base.py:379](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:379), in DaskMethodsMixin.compute(self, **kwargs)
    [355](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:355) def compute(self, **kwargs):
    [356](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:356)     """Compute this dask collection
    [357](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:357) 
    [358](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:358)     This turns a lazy Dask collection into its in-memory equivalent.
   (...)
    [377](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:377)     dask.compute
    [378](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:378)     """
--> [379](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:379)     (result,) = compute(self, traverse=False, **kwargs)
    [380](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:380)     return result

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\dask\base.py:667](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:667), in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    [664](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:664) with shorten_traceback():
    [665](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:665)     results = schedule(dsk, keys, **kwargs)
--> [667](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:667) return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\dask\base.py:667](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:667), in <listcomp>(.0)
    [664](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:664) with shorten_traceback():
    [665](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:665)     results = schedule(dsk, keys, **kwargs)
--> [667](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask/base.py:667) return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\dask_awkward\lib\core.py:826](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask_awkward/lib/core.py:826), in _finalize_array(results)
    [824](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask_awkward/lib/core.py:824) # a sequence of arrays that need to be concatenated.
    [825](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask_awkward/lib/core.py:825) elif any(isinstance(r, ak.Array) for r in results):
--> [826](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask_awkward/lib/core.py:826)     return ak.concatenate(results)
    [828](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask_awkward/lib/core.py:828) # a sequence of scalars that are stored as np.ndarray(N) where N
    [829](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask_awkward/lib/core.py:829) # is a number (i.e. shapeless numpy array)
    [830](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask_awkward/lib/core.py:830) elif any(_is_numpy_or_cupy_like(r) for r in results) and any(
    [831](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask_awkward/lib/core.py:831)     r.shape == () for r in results
    [832](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/dask_awkward/lib/core.py:832) ):

File [c:\Users\gordo\Code\calratio\sx_training_fetch\.venv\lib\site-packages\awkward\_dispatch.py:38](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:38), in named_high_level_function.<locals>.dispatch(*args, **kwargs)
     [35](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:35) @wraps(func)
     [36](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:36) def dispatch(*args, **kwargs):
     [37](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:37)     # NOTE: this decorator assumes that the operation is exposed under `ak.`
---> [38](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:38)     with OperationErrorContext(name, args, kwargs):
     [39](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:39)         gen_or_result = func(*args, **kwargs)
     [40](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_dispatch.py:40)         if isgenerator(gen_or_result):
...
---> [95](file:///C:/Users/gordo/Code/calratio/sx_training_fetch/.venv/lib/site-packages/awkward/_errors.py:95)     raise self.decorate_exception(cls, exception)

AssertionError: 

See if this has been reported at https://github.com/scikit-hep/awkward/issues

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions