Skip to content

Commit

Permalink
adjust name
Browse files Browse the repository at this point in the history
Signed-off-by: rjzamora <[email protected]>
  • Loading branch information
rjzamora committed May 31, 2024
1 parent c7a2e0a commit 0f80085
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions nemo_curator/utils/distributed_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,13 @@ def read_single_partition(
elif filetype == "jsonl":
fs = fsspec.core.get_fs_token_paths(files[0])[0]
token = tokenize(files)
chunk_name = f"read-chunk-{token}"
dsk = {(chunk_name, i): (fs.cat_file, path) for i, path in enumerate(files)}
dsk[chunk_name] = (
name = f"get_bytes-{token}"
dsk = {(name, i): (fs.cat_file, path) for i, path in enumerate(files)}
dsk[name] = (
lambda x: x if backend == "cudf" else b"".join,
list(dsk.keys()),
)
df = read_f(dask.threaded.get(dsk, chunk_name), **read_kwargs)
df = read_f(dask.threaded.get(dsk, name), **read_kwargs)
else:
df = read_f(files, **read_kwargs)
df = df[sorted(df.columns)]
Expand Down

0 comments on commit 0f80085

Please sign in to comment.