Skip to content

Commit 002125b

Browse files
authored
FIX-#7277: Remove Cudf storage format as unmaintained (#7290)
Signed-off-by: Anatoly Myachev <[email protected]>
1 parent 2006292 commit 002125b

File tree

31 files changed

+3
-1931
lines changed

31 files changed

+3
-1931
lines changed

docs/conf.py

-6
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ def noop_decorator(*args, **kwargs):
2727

2828
# fake modules if they're missing
2929
for mod_name in (
30-
"cudf",
31-
"cupy",
3230
"xgboost",
3331
"unidist",
3432
"unidist.config",
@@ -39,10 +37,6 @@ def noop_decorator(*args, **kwargs):
3937
sys.modules[mod_name] = types.ModuleType(
4038
mod_name, f"fake {mod_name} for building docs"
4139
)
42-
if not hasattr(sys.modules["cudf"], "DataFrame"):
43-
sys.modules["cudf"].DataFrame = type("DataFrame", (object,), {})
44-
if not hasattr(sys.modules["cupy"], "ndarray"):
45-
sys.modules["cupy"].ndarray = type("ndarray", (object,), {})
4640
if not hasattr(sys.modules["xgboost"], "Booster"):
4741
sys.modules["xgboost"].Booster = type("Booster", (object,), {})
4842
if not hasattr(sys.modules["unidist"], "remote"):

docs/development/architecture.rst

-6
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,6 @@ documentation page on :doc:`contributing </development/contributing>`.
223223
- Uses native python execution - mainly used for debugging.
224224
- The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.
225225
- For more information on the execution path, see the :doc:`pandas on Python </flow/modin/core/execution/python/implementations/pandas_on_python/index>` page.
226-
- cuDF on Ray (experimental)
227-
- Uses the Ray_ execution framework.
228-
- The storage format is `cudf` and the in-memory partition type is a cuDF DataFrame.
229-
- For more information on the execution path, see the :doc:`cuDF on Ray </flow/modin/core/execution/ray/implementations/cudf_on_ray/index>` page.
230226

231227
.. _directory-tree:
232228

@@ -300,7 +296,6 @@ details. The documentation covers most modules, with more docs being added every
300296
│ │ │ │ ├───common
301297
│ │ │ │ ├─── :doc:`generic </flow/modin/core/execution/ray/generic>`
302298
│ │ │ │ └───implementations
303-
│ │ │ │ ├─── :doc:`cudf_on_ray </flow/modin/core/execution/ray/implementations/cudf_on_ray/index>`
304299
│ │ │ │ └─── :doc:`pandas_on_ray </flow/modin/core/execution/ray/implementations/pandas_on_ray/index>`
305300
│ │ │ └───unidist
306301
│ │ │ ├───common
@@ -310,7 +305,6 @@ details. The documentation covers most modules, with more docs being added every
310305
│ │ ├─── :doc:`io </flow/modin/core/io/index>`
311306
│ │ └─── :doc:`storage_formats </flow/modin/core/storage_formats/index>`
312307
│ │ ├─── :doc:`base </flow/modin/core/storage_formats/base/query_compiler>`
313-
│ │ ├───cudf
314308
│ │ └─── :doc:`pandas </flow/modin/core/storage_formats/pandas/index>`
315309
│ ├───distributed
316310
│ │ ├───dataframe

docs/flow/modin/core/dataframe/index.rst

-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ Engine specific:
2929
Execution system specific:
3030

3131
* :doc:`Modin PandasOnRayDataframe </flow/modin/core/execution/ray/implementations/pandas_on_ray/index>` is a specialization of the Core Modin Dataframe for ``PandasOnRay`` execution.
32-
* :doc:`Modin cuDFOnRayDataframe </flow/modin/core/execution/ray/implementations/cudf_on_ray/index>` is a specialization of the Core Modin Dataframe for ``cuDFOnRay`` execution.
3332
* :doc:`Modin PandasOnDaskDataframe </flow/modin/core/execution/dask/implementations/pandas_on_dask/index>` is specialization of the Core Modin Dataframe for ``PandasOnDask`` execution.
3433
* :doc:`Modin PandasOnPythonDataframe </flow/modin/core/execution/python/implementations/pandas_on_python/index>` is a specialization of the Core Modin Dataframe for ``PandasOnPython`` execution.
3534
* :doc:`Modin PandasOnUnidistDataframe </flow/modin/core/execution/unidist/implementations/pandas_on_unidist/index>` is a specialization of the Core Modin Dataframe for ``PandasOnUnidist`` execution.

docs/flow/modin/core/execution/ray/implementations/cudf_on_ray/dataframe.rst

-13
This file was deleted.

docs/flow/modin/core/execution/ray/implementations/cudf_on_ray/index.rst

-22
This file was deleted.

docs/flow/modin/core/execution/ray/implementations/cudf_on_ray/io.rst

-29
This file was deleted.

docs/flow/modin/core/execution/ray/implementations/cudf_on_ray/partitioning/axis_partition.rst

-30
This file was deleted.

docs/flow/modin/core/execution/ray/implementations/cudf_on_ray/partitioning/gpu_manager.rst

-10
This file was deleted.

docs/flow/modin/core/execution/ray/implementations/cudf_on_ray/partitioning/partition.rst

-21
This file was deleted.

docs/flow/modin/core/execution/ray/implementations/cudf_on_ray/partitioning/partition_manager.rst

-14
This file was deleted.

docs/flow/modin/core/storage_formats/index.rst

-4
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,5 @@ High-level module overview
6464

6565
This module houses submodules of all of the stable storage formats:
6666

67-
..
68-
TODO: Insert a link to <cuDF module> when it is added (issue #3323)
69-
7067
- :doc:`Base module <base/query_compiler>` contains an abstract query compiler class which defines common API.
7168
- :doc:`Pandas module <pandas/index>` contains query compiler and text parsers for pandas storage format.
72-
- cuDF module contains query compiler and text parsers for cuDF storage format.

modin/config/envvars.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ class StorageFormat(EnvironmentVariable, type=str):
257257

258258
varname = "MODIN_STORAGE_FORMAT"
259259
default = "Pandas"
260-
choices = ("Pandas", "Cudf")
260+
choices = ("Pandas",)
261261

262262

263263
class IsExperimental(EnvironmentVariable, type=bool):
@@ -412,10 +412,7 @@ def _get_default(cls) -> int:
412412
-------
413413
int
414414
"""
415-
if StorageFormat.get() == "Cudf":
416-
return GpuCount.get()
417-
else:
418-
return CpuCount.get()
415+
return CpuCount.get()
419416

420417
@classmethod
421418
def get(cls) -> int:

modin/core/execution/dispatching/factories/factories.py

-17
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import pandas
2727
from pandas.util._decorators import doc
2828

29-
from modin.config import IsExperimental
3029
from modin.core.io import BaseIO
3130
from modin.utils import get_current_execution
3231

@@ -778,19 +777,3 @@ def prepare(cls):
778777
)
779778

780779
cls.io_cls = PandasOnUnidistIO
781-
782-
783-
# EXPERIMENTAL FACTORIES
784-
# Factories that operate only in experimental mode. They provide access to executions
785-
# that have little coverage of implemented functionality or are not stable enough.
786-
@doc(_doc_factory_class, execution_name="cuDFOnRay")
787-
class ExperimentalCudfOnRayFactory(BaseFactory):
788-
@classmethod
789-
@doc(_doc_factory_prepare_method, io_module_name="``cuDFOnRayIO``")
790-
def prepare(cls):
791-
from modin.core.execution.ray.implementations.cudf_on_ray.io import cuDFOnRayIO
792-
793-
if not IsExperimental.get():
794-
raise ValueError("'CudfOnRay' only works in experimental mode.")
795-
796-
cls.io_cls = cuDFOnRayIO

modin/core/execution/ray/common/utils.py

+1-17
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
RayInitCustomResources,
3535
RayRedisAddress,
3636
RayRedisPassword,
37-
StorageFormat,
3837
ValueSource,
3938
)
4039
from modin.core.execution.utils import set_env
@@ -139,17 +138,6 @@ def initialize_ray(
139138
with set_env(**env_vars):
140139
ray.init(**ray_init_kwargs)
141140

142-
if StorageFormat.get() == "Cudf":
143-
from modin.core.execution.ray.implementations.cudf_on_ray.partitioning import (
144-
GPU_MANAGERS,
145-
GPUManager,
146-
)
147-
148-
# Check that GPU_MANAGERS is empty because _update_engine can be called multiple times
149-
if not GPU_MANAGERS:
150-
for i in range(GpuCount.get()):
151-
GPU_MANAGERS.append(GPUManager.remote(i))
152-
153141
# Now ray is initialized, check runtime env config - especially useful if we join
154142
# an externally pre-configured cluster
155143
runtime_env_vars = ray.get_runtime_context().runtime_env.get("env_vars", {})
@@ -162,11 +150,7 @@ def initialize_ray(
162150
)
163151

164152
num_cpus = int(ray.cluster_resources()["CPU"])
165-
num_gpus = int(ray.cluster_resources().get("GPU", 0))
166-
if StorageFormat.get() == "Cudf":
167-
NPartitions._put(num_gpus)
168-
else:
169-
NPartitions._put(num_cpus)
153+
NPartitions._put(num_cpus)
170154

171155
# TODO(https://github.com/ray-project/ray/issues/28216): remove this
172156
# workaround once Ray gives a better way to suppress task errors.

modin/core/execution/ray/implementations/cudf_on_ray/__init__.py

-14
This file was deleted.

modin/core/execution/ray/implementations/cudf_on_ray/dataframe/__init__.py

-20
This file was deleted.

0 commit comments

Comments
 (0)