From 8299fed7c598fb443c3cbdf39acd0eaf73f07118 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 27 Jan 2025 09:37:44 -0500 Subject: [PATCH 01/11] Import Tree from dask-awkward if not in dask --- src/dask_histogram/layers.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/dask_histogram/layers.py b/src/dask_histogram/layers.py index 0b882f2..c9e21ac 100644 --- a/src/dask_histogram/layers.py +++ b/src/dask_histogram/layers.py @@ -1,4 +1,14 @@ -from dask.layers import DataFrameTreeReduction +try: + from dask.layers import DataFrameTreeReduction +except ImportError: + try: + from dask_awkward.layers import AwkwardTreeReductionLayer as DataFrameTreeReduction + except ImportError: + DataFrameTreeReduction = None + +if DataFrameTreeReduction is None: + raise ImportError("DataFrameReduction is unimportable - either downgrade dask to <2025" + "or install dask-awkward >=2025.") class MockableDataFrameTreeReduction(DataFrameTreeReduction): From 19e0cd48179462bdc2986b11c5f4604218f8ef08 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 27 Jan 2025 14:38:04 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/dask_histogram/layers.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/dask_histogram/layers.py b/src/dask_histogram/layers.py index c9e21ac..4a4e182 100644 --- a/src/dask_histogram/layers.py +++ b/src/dask_histogram/layers.py @@ -2,13 +2,17 @@ from dask.layers import DataFrameTreeReduction except ImportError: try: - from dask_awkward.layers import AwkwardTreeReductionLayer as DataFrameTreeReduction + from dask_awkward.layers import ( + AwkwardTreeReductionLayer as DataFrameTreeReduction, + ) except ImportError: DataFrameTreeReduction = None if DataFrameTreeReduction is None: - raise ImportError("DataFrameReduction is unimportable - either downgrade dask to <2025" - "or install dask-awkward >=2025.") + raise ImportError( + "DataFrameReduction is unimportable - either downgrade dask to <2025" + "or install dask-awkward >=2025." + ) class MockableDataFrameTreeReduction(DataFrameTreeReduction): From 298994ec46febf4839fd0eceb8f0b786fab7634b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 27 Jan 2025 09:41:49 -0500 Subject: [PATCH 03/11] TEMP: install dask-awkward from main --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 634cf7b..3520faa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,7 @@ jobs: run: | python3 -m pip install pip wheel python3 -m pip install -q --no-cache-dir -e .[complete] + python3 -m pip install git+https://github.com/dask-contrib/dask-awkward python3 -m pip list - name: test env: {"DASK_DATAFRAME__QUERY_PLANNING": "False"} From 7d728fd8be4d29a1c56ed12075a3bdd747cc85ec Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 27 Jan 2025 14:49:16 -0500 Subject: [PATCH 04/11] rewrite is_dataframe/series_like --- src/dask_histogram/core.py | 4 ++-- src/dask_histogram/typing.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dask_histogram/core.py b/src/dask_histogram/core.py index 1ea0fcb..f510401 100644 --- a/src/dask_histogram/core.py +++ b/src/dask_histogram/core.py @@ -920,14 +920,14 @@ def _weight_sample_check( def _is_dask_dataframe(obj): return ( - obj.__class__.__module__ == "dask.dataframe.core" + type(obj).__module__.startswith("dask.dataframe") and obj.__class__.__name__ == "DataFrame" ) def _is_dask_series(obj): return ( - obj.__class__.__module__ == "dask.dataframe.core" + type(obj).__module__.startswith("dask.dataframe") and obj.__class__.__name__ == "Series" ) diff --git a/src/dask_histogram/typing.py b/src/dask_histogram/typing.py index 019156e..7515479 100644 --- a/src/dask_histogram/typing.py +++ b/src/dask_histogram/typing.py @@ -3,7 +3,7 @@ from typing import Optional, Sequence, Tuple, Union from dask.array.core import Array -from dask.dataframe.core import DataFrame, Series +from dask.dataframe import DataFrame, Series from numpy.typing import ArrayLike BinType = Union[int, ArrayLike] From 4d0007be2c377b3afe0d85254ae7529f94a4e39f Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 8 Feb 2025 12:46:12 -0600 Subject: [PATCH 05/11] xfail dataframe tests for dask >=2025 --- tests/test_boost.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_boost.py b/tests/test_boost.py index de7df9e..8a76a0c 100644 --- a/tests/test_boost.py +++ b/tests/test_boost.py @@ -2,6 +2,7 @@ import boost_histogram.numpy as bhnp import dask.array as da import numpy as np +from packaging.version import parse as parse_version import pytest import dask_histogram.boost as dhb @@ -247,6 +248,10 @@ def test_histogramdd_multicolumn_input(): np.testing.assert_array_almost_equal(h1.view(), h2.view()) +@pytest.mark.xfail( + parse_version(dask.__version__) >= parse_version("2025"), + reason="to_dataframe is broken with dask 2025.1.0", +) def test_histogramdd_series(): pytest.importorskip("pandas") @@ -276,6 +281,10 @@ def test_histogramdd_series(): np.testing.assert_array_almost_equal(h1.view()["variance"], h2.view()["variance"]) +@pytest.mark.xfail( + parse_version(dask.__version__) >= parse_version("2025"), + reason="to_dataframe is broken with dask 2025.1.0", +) def test_histogramdd_arrays_and_series(): pytest.importorskip("pandas") @@ -305,6 +314,10 @@ def test_histogramdd_arrays_and_series(): np.testing.assert_array_almost_equal(h1.view()["variance"], h2.view()["variance"]) +@pytest.mark.xfail( + parse_version(dask.__version__) >= parse_version("2025"), + reason="to_dataframe is broken with dask 2025.1.0", +) def test_histogramdd_dataframe(): pytest.importorskip("pandas") x = da.random.standard_normal(size=(1000, 3), chunks=(200, 3)) From dc87ab75cdc1cec90b1de7a55050a4fbf6708a41 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 8 Feb 2025 12:48:04 -0600 Subject: [PATCH 06/11] more xfails --- tests/test_core.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index e76061c..0869a3f 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -6,6 +6,7 @@ import numpy as np import pytest from dask.delayed import delayed +from packaging.version import parse as parse_version import dask_histogram.core as dhc @@ -124,6 +125,10 @@ def test_nd_array(weights): np.testing.assert_allclose(h.counts(flow=True), dh.compute().counts(flow=True)) +@pytest.mark.xfail( + parse_version(dask.__version__) >= parse_version("2025"), + reason="dask dataframe changed substantially in 2025.1.0", +) @pytest.mark.parametrize("weights", [True, None]) def test_df_input(weights): pytest.importorskip("pandas") From 4dfccd16f5a7696ccfd7a0064bc2dc470baf0a66 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 8 Feb 2025 12:48:46 -0600 Subject: [PATCH 07/11] import dask in test --- tests/test_boost.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_boost.py b/tests/test_boost.py index 8a76a0c..3bb579b 100644 --- a/tests/test_boost.py +++ b/tests/test_boost.py @@ -1,5 +1,6 @@ import boost_histogram as bh import boost_histogram.numpy as bhnp +import dask import dask.array as da import numpy as np from packaging.version import parse as parse_version From e585806668cea32749c95916e26538b8feb6d805 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 8 Feb 2025 12:49:07 -0600 Subject: [PATCH 08/11] import dask in test --- tests/test_core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_core.py b/tests/test_core.py index 0869a3f..6593aef 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,6 +1,7 @@ from __future__ import annotations import boost_histogram as bh +import dask import dask.array as da import dask.array.utils as dau import numpy as np From 5551e288ab1946b480ae4b115b7fc193733e9515 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 8 Feb 2025 12:50:11 -0600 Subject: [PATCH 09/11] bump ruff, let it fix things --- .pre-commit-config.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f87b555..ea1b525 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.1 + rev: v0.9.3 hooks: - id: ruff + args: [--fix, --show-fixes] - id: ruff-format From 982d2a1a9f717cb132d03b8ef598e6b03f0343fb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 8 Feb 2025 18:50:16 +0000 Subject: [PATCH 10/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_boost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_boost.py b/tests/test_boost.py index 3bb579b..d422c51 100644 --- a/tests/test_boost.py +++ b/tests/test_boost.py @@ -3,8 +3,8 @@ import dask import dask.array as da import numpy as np -from packaging.version import parse as parse_version import pytest +from packaging.version import parse as parse_version import dask_histogram.boost as dhb import dask_histogram.core as dhc From 4184ef985fa225d162987dcf8fc959e0f2ac0584 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 8 Feb 2025 12:53:15 -0600 Subject: [PATCH 11/11] add dask-awkward dependency in pyproject.toml --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0698256..5e60a38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ classifiers = [ dependencies = [ "boost-histogram>=1.3.2", "dask>=2021.03.0", + "dask-awkward >=2025", ] dynamic = ["version"] @@ -39,7 +40,6 @@ complete = [ docs = [ "dask-sphinx-theme >=3.0.2", "dask[array,dataframe]", - "dask-awkward >=2023.10.0", # FIXME: `sphinxcontrib-*` pins are a workaround until we have sphinx>=5. # See https://github.com/dask/dask-sphinx-theme/issues/68. "sphinx >=4.0.0", @@ -51,7 +51,6 @@ docs = [ ] test = [ "dask[array,dataframe]", - "dask-awkward >=2023.10.0", "hist", "pytest", ]