Skip to content

Commit 75e6b74

Browse files
authored
Drop Python 3.6 support (huggingface#4460)
* Remove python 3.6 code * Update requirements * Style * Update audio gh action * Benchmarks fix attempt huggingface#1 * Benchmarks fix attempt no.2 * Use newer image * Remove backticks * Add suggested command to benchmark action * Avoid some FutureWarnings and DeprecationWarnings * Disable test * Remove 3.6 pickling test * CI test * Use python 3.7 in ubuntu-latest * Disable s3 test on Linux * Remove weird json file * Remove cloudpickle stuff * Use lower torchaudio version * Try to fix s3 errors * Another attempt * Disable test
1 parent 10b1355 commit 75e6b74

File tree

15 files changed

+55
-168
lines changed

15 files changed

+55
-168
lines changed

.github/hub/update_hub_repositories.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import base64
21
import distutils.dir_util
32
import logging
43
import os

.github/workflows/benchmarks.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@ on: [push]
33
jobs:
44
run:
55
runs-on: [ubuntu-latest]
6-
container: docker://dvcorg/cml-py3:latest
6+
container: docker://dvcorg/cml:latest
77
steps:
88
- uses: actions/checkout@v2
99
- name: cml_run
1010
env:
1111
repo_token: ${{ secrets.GITHUB_TOKEN }}
1212
run: |
13+
# See https://github.com/actions/checkout/issues/760
14+
git config --global --add safe.directory /__w/datasets/datasets
15+
1316
# Your ML workflow goes here
1417
1518
pip install --upgrade pip

.github/workflows/ci.yml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
- name: Set up Python
2222
uses: actions/setup-python@v4
2323
with:
24-
python-version: "3.6"
24+
python-version: "3.7"
2525
- name: Install dependencies
2626
run: |
2727
python -m pip install --upgrade pip
@@ -49,21 +49,15 @@ jobs:
4949
- uses: actions/checkout@v3
5050
with:
5151
fetch-depth: 0
52-
- name: Set up Python 3.6
53-
if: ${{ matrix.os == 'ubuntu-latest' }}
54-
uses: actions/setup-python@v4
55-
with:
56-
python-version: 3.6
5752
- name: Set up Python 3.7
58-
if: ${{ matrix.os == 'windows-latest' }}
5953
uses: actions/setup-python@v4
6054
with:
6155
python-version: 3.7
6256
- name: Upgrade pip
6357
run: python -m pip install --upgrade pip
6458
- name: Pin setuptools-scm
6559
if: ${{ matrix.os == 'ubuntu-latest' }}
66-
run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.6" && pip install "setuptools-scm==6.4.2"
60+
run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2"
6761
- name: Install dependencies
6862
run: |
6963
pip install .[tests]

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
# Check that source code meets quality standards
44

55
quality:
6-
black --check --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics
6+
black --check --line-length 119 --target-version py37 tests src benchmarks datasets/**/*.py metrics
77
isort --check-only tests src benchmarks datasets/**/*.py metrics
88
flake8 tests src benchmarks datasets/**/*.py metrics
99

1010
# Format source code automatically
1111

1212
style:
13-
black --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics
13+
black --line-length 119 --target-version py37 tests src benchmarks datasets/**/*.py metrics
1414
isort tests src benchmarks datasets/**/*.py metrics
1515

1616
# Run tests for the library

additional-tests-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
unbabel-comet>=1.0.0;python_version>'3.6'
1+
unbabel-comet>=1.0.0
22
git+https://github.com/google-research/bleurt.git
33
git+https://github.com/ns-moosavi/coval.git
44
git+https://github.com/hendrycks/math.git

docs/source/installation.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Installation
22

3-
Before you start, you'll need to setup your environment and install the appropriate packages. 🤗 Datasets is tested on **Python 3.6+**.
3+
Before you start, you'll need to setup your environment and install the appropriate packages. 🤗 Datasets is tested on **Python 3.7+**.
44

55
<Tip>
66

setup.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
Then push the change with a message 'set dev version'
5656
"""
5757

58-
import os
5958

6059
from setuptools import find_packages, setup
6160

@@ -74,8 +73,6 @@
7473
"requests>=2.19.0",
7574
# progress bars in download and scripts
7675
"tqdm>=4.62.1",
77-
# dataclasses for Python versions that don't have it
78-
"dataclasses;python_version<'3.7'",
7976
# for fast hashing
8077
"xxhash",
8178
# for better multiprocessing
@@ -105,7 +102,7 @@
105102
BENCHMARKS_REQUIRE = [
106103
"numpy==1.18.5",
107104
"tensorflow==2.3.0",
108-
"torch==1.6.0",
105+
"torch==1.7.1",
109106
"transformers==3.0.2",
110107
]
111108

@@ -128,7 +125,7 @@
128125
"s3fs>=2021.11.1", # aligned with fsspec[http]>=2021.11.1
129126
"tensorflow>=2.3,!=2.6.0,!=2.6.1",
130127
"torch",
131-
"torchaudio",
128+
"torchaudio<0.12.0",
132129
"soundfile",
133130
"transformers",
134131
# datasets dependencies
@@ -165,8 +162,6 @@
165162
"texttable>=1.6.3",
166163
"Werkzeug>=1.0.1",
167164
"six~=1.15.0",
168-
# metadata validation
169-
"importlib_resources;python_version<'3.7'",
170165
]
171166

172167
TESTS_REQUIRE.extend(VISION_REQURE)
@@ -214,6 +209,7 @@
214209
packages=find_packages("src"),
215210
package_data={"datasets": ["py.typed", "scripts/templates/*"], "datasets.utils.resources": ["*.json", "*.yaml", "*.tsv"]},
216211
entry_points={"console_scripts": ["datasets-cli=datasets.commands.datasets_cli:main"]},
212+
python_requires=">=3.7.0",
217213
install_requires=REQUIRED_PKGS,
218214
extras_require=EXTRAS_REQUIRE,
219215
classifiers=[
@@ -224,7 +220,6 @@
224220
"License :: OSI Approved :: Apache Software License",
225221
"Operating System :: OS Independent",
226222
"Programming Language :: Python :: 3",
227-
"Programming Language :: Python :: 3.6",
228223
"Programming Language :: Python :: 3.7",
229224
"Programming Language :: Python :: 3.8",
230225
"Programming Language :: Python :: 3.9",

src/datasets/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,17 @@
1919

2020
__version__ = "2.4.1.dev0"
2121

22+
import platform
23+
2224
import pyarrow
2325
from packaging import version
2426

2527

28+
if version.parse(platform.python_version()) < version.parse("3.7"):
29+
raise ImportWarning(
30+
"To use `datasets`, Python>=3.7 is required, and the current version of Python doesn't match this condition."
31+
)
32+
2633
if version.parse(pyarrow.__version__).major < 6:
2734
raise ImportWarning(
2835
"To use `datasets`, the module `pyarrow>=6.0.0` is required, and the current version of `pyarrow` doesn't match this condition.\n"
@@ -31,6 +38,7 @@
3138

3239
SCRIPTS_VERSION = "main" if version.parse(__version__).is_devrelease else __version__
3340

41+
del platform
3442
del pyarrow
3543
del version
3644

src/datasets/features/features.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,7 @@ def __getitem__(self, item: Union[int, slice, np.ndarray]) -> Union[np.ndarray,
824824
def take(
825825
self, indices: Sequence_[int], allow_fill: bool = False, fill_value: bool = None
826826
) -> "PandasArrayExtensionArray":
827-
indices: np.ndarray = np.asarray(indices, dtype=np.int)
827+
indices: np.ndarray = np.asarray(indices, dtype=int)
828828
if allow_fill:
829829
fill_value = (
830830
self.dtype.na_value if fill_value is None else np.asarray(fill_value, dtype=self.dtype.value_type)

src/datasets/utils/py_utils.py

Lines changed: 1 addition & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,15 @@
2222
import functools
2323
import itertools
2424
import os
25-
import pickle
2625
import re
27-
import sys
2826
import types
2927
from contextlib import contextmanager
3028
from dataclasses import fields, is_dataclass
3129
from io import BytesIO as StringIO
3230
from multiprocessing import Pool, RLock
3331
from shutil import disk_usage
3432
from types import CodeType, FunctionType
35-
from typing import Callable, ClassVar, Dict, Generic, List, Optional, Tuple, Union
33+
from typing import Dict, List, Optional, Tuple, Union
3634
from urllib.parse import urlparse
3735

3836
import dill
@@ -552,19 +550,6 @@ class Pickler(dill.Pickler):
552550

553551
dispatch = dill._dill.MetaCatchingDict(dill.Pickler.dispatch.copy())
554552

555-
def save_global(self, obj, name=None):
556-
if sys.version_info[:2] < (3, 7) and _CloudPickleTypeHintFix._is_parametrized_type_hint(
557-
obj
558-
): # noqa # pragma: no branch
559-
# Parametrized typing constructs in Python < 3.7 are not compatible
560-
# with type checks and ``isinstance`` semantics. For this reason,
561-
# it is easier to detect them using a duck-typing-based check
562-
# (``_is_parametrized_type_hint``) than to populate the Pickler's
563-
# dispatch with type-specific savers.
564-
_CloudPickleTypeHintFix._save_parametrized_type_hint(self, obj)
565-
else:
566-
dill.Pickler.save_global(self, obj, name=name)
567-
568553
def memoize(self, obj):
569554
# don't memoize strings since two identical strings can have different python ids
570555
if type(obj) != str:
@@ -610,47 +595,6 @@ def proxy(func):
610595
return proxy
611596

612597

613-
class _CloudPickleTypeHintFix:
614-
"""
615-
Type hints can't be properly pickled in python < 3.7
616-
CloudPickle provided a way to make it work in older versions.
617-
This class provide utilities to fix pickling of type hints in older versions.
618-
from https://github.com/cloudpipe/cloudpickle/pull/318/files
619-
"""
620-
621-
def _is_parametrized_type_hint(obj):
622-
# This is very cheap but might generate false positives.
623-
origin = getattr(obj, "__origin__", None) # typing Constructs
624-
values = getattr(obj, "__values__", None) # typing_extensions.Literal
625-
type_ = getattr(obj, "__type__", None) # typing_extensions.Final
626-
return origin is not None or values is not None or type_ is not None
627-
628-
def _create_parametrized_type_hint(origin, args):
629-
return origin[args]
630-
631-
def _save_parametrized_type_hint(pickler, obj):
632-
# The distorted type check sematic for typing construct becomes:
633-
# ``type(obj) is type(TypeHint)``, which means "obj is a
634-
# parametrized TypeHint"
635-
if type(obj) is type(Literal): # pragma: no branch
636-
initargs = (Literal, obj.__values__)
637-
elif type(obj) is type(Final): # pragma: no branch
638-
initargs = (Final, obj.__type__)
639-
elif type(obj) is type(ClassVar):
640-
initargs = (ClassVar, obj.__type__)
641-
elif type(obj) in [type(Union), type(Tuple), type(Generic)]:
642-
initargs = (obj.__origin__, obj.__args__)
643-
elif type(obj) is type(Callable):
644-
args = obj.__args__
645-
if args[0] is Ellipsis:
646-
initargs = (obj.__origin__, args)
647-
else:
648-
initargs = (obj.__origin__, (list(args[:-1]), args[-1]))
649-
else: # pragma: no cover
650-
raise pickle.PicklingError(f"Datasets pickle Error: Unknown type {type(obj)}")
651-
pickler.save_reduce(_CloudPickleTypeHintFix._create_parametrized_type_hint, initargs, obj=obj)
652-
653-
654598
@pklregister(CodeType)
655599
def _save_code(pickler, obj):
656600
"""

0 commit comments

Comments
 (0)