Skip to content

Commit

Permalink
make sure query-planning is disabled (#97)
Browse files Browse the repository at this point in the history
Signed-off-by: rjzamora <[email protected]>
  • Loading branch information
rjzamora authored Jun 6, 2024
1 parent 4eb8fe0 commit 4f851da
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 9 deletions.
22 changes: 13 additions & 9 deletions nemo_curator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,23 @@

import dask

# Disable query planning if possible
# https://github.com/NVIDIA/NeMo-Curator/issues/73
if dask.config.get("dataframe.query-planning") is True:
raise NotImplementedError(
"NeMo Curator does not support query planning yet. "
"Please disable query planning before importing "
"`nemo_curator`, `dask.dataframe` or `dask_cudf`."
)
else:
dask.config.set({"dataframe.query-planning": False})


from .modules import *
from .utils.distributed_utils import get_client

# Dask will automatically convert the list score type
# to a string without this option.
# See https://github.com/NVIDIA/NeMo-Curator/issues/33
# This also happens when reading and writing to files

# Disable query planning
# https://github.com/NVIDIA/NeMo-Curator/issues/73
dask.config.set(
{
"dataframe.convert-string": False,
"dataframe.query-planning": False,
}
)
dask.config.set({"dataframe.convert-string": False})
13 changes: 13 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dask

# Disable query planning before any tests are loaded
# https://github.com/NVIDIA/NeMo-Curator/issues/73
if dask.config.get("dataframe.query-planning") is True:
raise NotImplementedError(
"NeMo Curator does not support query planning yet. "
"Please disable query planning before importing "
"`nemo_curator`, `dask.dataframe` or `dask_cudf`."
)
else:
dask.config.set({"dataframe.query-planning": False})

0 comments on commit 4f851da

Please sign in to comment.