diff --git a/nemo_curator/__init__.py b/nemo_curator/__init__.py index 3c1787641..c9e79ff74 100644 --- a/nemo_curator/__init__.py +++ b/nemo_curator/__init__.py @@ -14,6 +14,18 @@ import dask +# Disable query planning if possible +# https://github.com/NVIDIA/NeMo-Curator/issues/73 +if dask.config.get("dataframe.query-planning") is True: + raise NotImplementedError( + "NeMo Curator does not support query planning yet. " + "Please disable query planning before importing " + "`nemo_curator`, `dask.dataframe` or `dask_cudf`." + ) +else: + dask.config.set({"dataframe.query-planning": False}) + + from .modules import * from .utils.distributed_utils import get_client @@ -21,12 +33,4 @@ # to a string without this option. # See https://github.com/NVIDIA/NeMo-Curator/issues/33 # This also happens when reading and writing to files - -# Disable query planning -# https://github.com/NVIDIA/NeMo-Curator/issues/73 -dask.config.set( - { - "dataframe.convert-string": False, - "dataframe.query-planning": False, - } -) +dask.config.set({"dataframe.convert-string": False}) diff --git a/tests/__init__.py b/tests/__init__.py index d9155f923..1950868ef 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,3 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import dask + +# Disable query planning before any tests are loaded +# https://github.com/NVIDIA/NeMo-Curator/issues/73 +if dask.config.get("dataframe.query-planning") is True: + raise NotImplementedError( + "NeMo Curator does not support query planning yet. " + "Please disable query planning before importing " + "`nemo_curator`, `dask.dataframe` or `dask_cudf`." + ) +else: + dask.config.set({"dataframe.query-planning": False})