Format code with ruff (#5519)

mariosasko · web-flow · commit 06ae3f678651 · 2023-02-14T17:18:37.000+01:00
* Update config files

* Format code

* Some manual fixes

* Fix
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -28,8 +28,7 @@ jobs:
       - name: Check quality
         run: |
           black --check tests src benchmarks metrics
-          isort --check-only tests src benchmarks metrics
-          flake8 tests src benchmarks metrics
+          ruff tests src benchmarks metrics
 
   test:
     needs: check_code_quality
diff --git a/.gitignore b/.gitignore
@@ -61,4 +61,7 @@ docs/source/_build/
 
 # Benchmark results
 report.json
-report.md
+report.md
+
+# Ruff
+.ruff_cache
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -57,7 +57,7 @@ If you want to add a dataset see specific instructions in the section [*How to a
 
 5. Develop the features on your branch.
 
-6. Format your code. Run black and isort so that your newly added files look nice with the following command:
+6. Format your code. Run black and ruff so that your newly added files look nice with the following command:
 
 	```bash
 	make style
diff --git a/Makefile b/Makefile
@@ -1,17 +1,18 @@
 .PHONY: quality style test
 
+check_dirs := tests src benchmarks metrics
+
 # Check that source code meets quality standards
 
 quality:
-	black --check tests src benchmarks metrics
-	isort --check-only tests src benchmarks metrics
-	flake8 tests src benchmarks metrics
+	black --check $(check_dirs)
+	ruff $(check_dirs)
 
 # Format source code automatically
 
 style:
 	black tests src benchmarks metrics
-	isort tests src benchmarks metrics
+	ruff $(check_dirs) --fix
 
 # Run tests for the library
 
diff --git a/benchmarks/format.py b/benchmarks/format.py
@@ -9,7 +9,6 @@ def format_json_to_md(input_json_file, output_md_file):
     output_md = ["<details>", "<summary>Show updated benchmarks!</summary>", " "]
 
     for benchmark_name in sorted(results):
-
         benchmark_res = results[benchmark_name]
 
         benchmark_file_name = benchmark_name.split("/")[-1]
diff --git a/metrics/bleurt/bleurt.py b/metrics/bleurt/bleurt.py
@@ -78,7 +78,6 @@
 @datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class BLEURT(datasets.Metric):
     def _info(self):
-
         return datasets.MetricInfo(
             description=_DESCRIPTION,
             citation=_CITATION,
@@ -95,7 +94,6 @@ def _info(self):
         )
 
     def _download_and_prepare(self, dl_manager):
-
         # check that config name specifies a valid BLEURT model
         if self.config_name == "default":
             logger.warning(
diff --git a/metrics/code_eval/execute.py b/metrics/code_eval/execute.py
@@ -54,9 +54,7 @@ def check_correctness(check_program, timeout, task_id, completion_id):
 
 
 def unsafe_execute(check_program, result, timeout):
-
     with create_tempdir():
-
         # These system calls are needed when cleaning up tempdir.
         import os
         import shutil
diff --git a/metrics/comet/comet.py b/metrics/comet/comet.py
@@ -108,7 +108,6 @@
 @datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class COMET(datasets.Metric):
     def _info(self):
-
         return datasets.MetricInfo(
             description=_DESCRIPTION,
             citation=_CITATION,
diff --git a/metrics/coval/coval.py b/metrics/coval/coval.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ CoVal metric. """
-import coval  # From: git+https://github.com/ns-moosavi/coval.git noqa: F401
+import coval  # From: git+https://github.com/ns-moosavi/coval.git # noqa: F401
 from coval.conll import reader, util
 from coval.eval import evaluator
 
@@ -167,7 +167,6 @@
 def get_coref_infos(
     key_lines, sys_lines, NP_only=False, remove_nested=False, keep_singletons=True, min_span=False, doc="dummy_doc"
 ):
-
     key_doc_lines = {doc: key_lines}
     sys_doc_lines = {doc: sys_lines}
 
diff --git a/metrics/exact_match/exact_match.py b/metrics/exact_match/exact_match.py
@@ -108,7 +108,6 @@ def _compute(
         ignore_punctuation=False,
         ignore_numbers=False,
     ):
-
         if regexes_to_ignore is not None:
             for s in regexes_to_ignore:
                 predictions = np.array([re.sub(s, "", x) for x in predictions])
diff --git a/metrics/indic_glue/indic_glue.py b/metrics/indic_glue/indic_glue.py
@@ -15,7 +15,6 @@
 
 import numpy as np
 from scipy.spatial.distance import cdist
-from scipy.stats import pearsonr, spearmanr
 from sklearn.metrics import f1_score
 
 import datasets
diff --git a/metrics/mae/mae.py b/metrics/mae/mae.py
@@ -106,7 +106,6 @@ def _get_feature_types(self):
             }
 
     def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average"):
-
         mae_score = mean_absolute_error(references, predictions, sample_weight=sample_weight, multioutput=multioutput)
 
         return {"mae": mae_score}
diff --git a/metrics/mahalanobis/mahalanobis.py b/metrics/mahalanobis/mahalanobis.py
@@ -71,7 +71,6 @@ def _info(self):
         )
 
     def _compute(self, X, reference_distribution):
-
         # convert to numpy arrays
         X = np.array(X)
         reference_distribution = np.array(reference_distribution)
diff --git a/metrics/mauve/mauve.py b/metrics/mauve/mauve.py
@@ -14,11 +14,11 @@
 # limitations under the License.
 """ MAUVE metric from https://github.com/krishnap25/mauve. """
 
-import faiss  # Here to have a nice missing dependency error message early on
-import numpy  # Here to have a nice missing dependency error message early on
-import requests  # Here to have a nice missing dependency error message early on
-import sklearn  # Here to have a nice missing dependency error message early on
-import tqdm  # Here to have a nice missing dependency error message early on
+import faiss  # noqa: F401 # Here to have a nice missing dependency error message early on
+import numpy  # noqa: F401 # Here to have a nice missing dependency error message early on
+import requests  # noqa: F401 # Here to have a nice missing dependency error message early on
+import sklearn  # noqa: F401 # Here to have a nice missing dependency error message early on
+import tqdm  # noqa: F401 # Here to have a nice missing dependency error message early on
 from mauve import compute_mauve  # From: mauve-text
 
 import datasets
diff --git a/metrics/mse/mse.py b/metrics/mse/mse.py
@@ -110,7 +110,6 @@ def _get_feature_types(self):
             }
 
     def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average", squared=True):
-
         mse = mean_squared_error(
             references, predictions, sample_weight=sample_weight, multioutput=multioutput, squared=squared
         )
diff --git a/metrics/perplexity/perplexity.py b/metrics/perplexity/perplexity.py
@@ -101,7 +101,6 @@ def _info(self):
         )
 
     def _compute(self, input_texts, model_id, batch_size: int = 16, add_start_token: bool = True, device=None):
-
         if device is not None:
             assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
             if device == "gpu":
diff --git a/metrics/rouge/rouge.py b/metrics/rouge/rouge.py
@@ -14,10 +14,10 @@
 """ ROUGE metric from Google Research github repo. """
 
 # The dependencies in https://github.com/google-research/google-research/blob/master/rouge/requirements.txt
-import absl  # Here to have a nice missing dependency error message early on
-import nltk  # Here to have a nice missing dependency error message early on
-import numpy  # Here to have a nice missing dependency error message early on
-import six  # Here to have a nice missing dependency error message early on
+import absl  # noqa: F401 # Here to have a nice missing dependency error message early on
+import nltk  # noqa: F401 # Here to have a nice missing dependency error message early on
+import numpy  # noqa: F401 # Here to have a nice missing dependency error message early on
+import six  # noqa: F401 # Here to have a nice missing dependency error message early on
 from rouge_score import rouge_scorer, scoring
 
 import datasets
diff --git a/metrics/sari/sari.py b/metrics/sari/sari.py
@@ -227,7 +227,6 @@ def SARIsent(ssent, csent, rsents):
 
 
 def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_str: bool = True):
-
     # Normalization is requried for the ASSET dataset (one of the primary
     # datasets in sentence simplification) to allow using space
     # to split the sentence. Even though Wiki-Auto and TURK datasets,
@@ -278,7 +277,6 @@ def _info(self):
         )
 
     def _compute(self, sources, predictions, references):
-
         if not (len(sources) == len(predictions) == len(references)):
             raise ValueError("Sources length must match predictions and references lengths.")
         sari_score = 0
diff --git a/metrics/super_glue/super_glue.py b/metrics/super_glue/super_glue.py
@@ -135,7 +135,7 @@ def evaluate_multirc(ids_preds, labels):
         question_preds, question_labels = zip(*preds_labels)
         f1 = f1_score(y_true=question_labels, y_pred=question_preds, average="macro")
         f1s.append(f1)
-        em = int(sum(p == l for p, l in preds_labels) == len(preds_labels))
+        em = int(sum(pred == label for pred, label in preds_labels) == len(preds_labels))
         ems.append(em)
     f1_m = float(sum(f1s) / len(f1s))
     em = sum(ems) / len(ems)
diff --git a/metrics/wiki_split/wiki_split.py b/metrics/wiki_split/wiki_split.py
@@ -254,7 +254,6 @@ def SARIsent(ssent, csent, rsents):
 
 
 def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_str: bool = True):
-
     # Normalization is requried for the ASSET dataset (one of the primary
     # datasets in sentence simplification) to allow using space
     # to split the sentence. Even though Wiki-Auto and TURK datasets,
@@ -284,7 +283,6 @@ def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_s
 
 
 def compute_sari(sources, predictions, references):
-
     if not (len(sources) == len(predictions) == len(references)):
         raise ValueError("Sources length must match predictions and references lengths.")
     sari_score = 0
diff --git a/metrics/xtreme_s/xtreme_s.py b/metrics/xtreme_s/xtreme_s.py
@@ -238,7 +238,6 @@ def _info(self):
         )
 
     def _compute(self, predictions, references, bleu_kwargs=None, wer_kwargs=None):
-
         bleu_kwargs = bleu_kwargs if bleu_kwargs is not None else {}
         wer_kwargs = wer_kwargs if wer_kwargs is not None else {}
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,15 @@
 [tool.black]
 line-length = 119
 target_version = ['py37']
+
+[tool.ruff]
+# Ignored rules:
+#   "E501" -> line length violation
+#   "F821" -> undefined named in type annotation (e.g. Literal["something"])
+ignore = ["E501", "F821"]
+select = ["E", "F", "I", "W"]
+line-length = 119
+
+[tool.ruff.isort]
+lines-after-imports = 2
+known-first-party = ["datasets"]
diff --git a/setup.cfg b/setup.cfg
@@ -1,24 +1,6 @@
 [metadata]
 license_file = LICENSE
 
-[isort]
-ensure_newline_before_comments = True
-force_grid_wrap = 0
-include_trailing_comma = True
-line_length = 119
-lines_after_imports = 2
-multi_line_output = 3
-use_parentheses = True
-
-[flake8]
-ignore = E203, E501, W503
-max-line-length = 119
-exclude =
-    src/datasets/datasets
-    src/datasets/metrics
-per-file-ignores =
-    metrics/*:F401
-
 [tool:pytest]
 markers =
     unit: unit test
diff --git a/setup.py b/setup.py
@@ -211,7 +211,7 @@
 TESTS_REQUIRE.extend(VISION_REQUIRE)
 TESTS_REQUIRE.extend(AUDIO_REQUIRE)
 
-QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.0.0", "pyyaml>=5.3.1"]
+QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241", "pyyaml>=5.3.1"]
 
 DOCS_REQUIRE = [
     # Might need to add doc-builder and some specific deps in the future
diff --git a/src/datasets/commands/dummy_data.py b/src/datasets/commands/dummy_data.py
@@ -394,7 +394,6 @@ def _print_dummy_data_instructions(self, dataset_builder, mock_dl_manager):
         try:
             generator_splits = dataset_builder._split_generators(mock_dl_manager)
         except FileNotFoundError as e:
-
             print(
                 f"Dataset {self._dataset_name} with config {mock_dl_manager.config} seems to already open files in the method `_split_generators(...)`. You might consider to instead only open files in the method `_generate_examples(...)` instead. If this is not possible the dummy data has to be created with less guidance. Make sure you create the file {e.filename}."
             )
diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
@@ -23,9 +23,8 @@
 from dataclasses import InitVar, dataclass, field, fields
 from functools import reduce, wraps
 from operator import mul
-from typing import Any, ClassVar, Dict, List, Optional
+from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union
 from typing import Sequence as Sequence_
-from typing import Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -1763,7 +1762,6 @@ def unsimplify(feature: dict) -> dict:
             return feature
 
         def from_yaml_inner(obj: Union[dict, list]) -> Union[dict, list]:
-
             if isinstance(obj, dict):
                 if not obj:
                     return {}
diff --git a/src/datasets/fingerprint.py b/src/datasets/fingerprint.py
@@ -184,7 +184,6 @@ def get_temporary_cache_files_directory() -> str:
     """Return a directory that is deleted when session closes."""
     global _TEMP_DIR_FOR_TEMP_CACHE_FILES
     if _TEMP_DIR_FOR_TEMP_CACHE_FILES is None:
-
         # Avoids a PermissionError on Windows caused by the datasets referencing
         # the files from the cache directory on clean-up
         def cleanup_func():
@@ -466,7 +465,6 @@ def fingerprint_transform(
     fingerprint_names = fingerprint_names if fingerprint_names is not None else ["new_fingerprint"]
 
     def _fingerprint(func):
-
         if not inplace and not all(name in func.__code__.co_varnames for name in fingerprint_names):
             raise ValueError("function {func} is missing parameters {fingerprint_names} in signature")
 
diff --git a/src/datasets/formatting/np_formatter.py b/src/datasets/formatting/np_formatter.py
@@ -44,7 +44,6 @@ def _consolidate(self, column):
         return column
 
     def _tensorize(self, value):
-
         if isinstance(value, (str, bytes, type(None))):
             return value
         elif isinstance(value, (np.character, np.ndarray)) and np.issubdtype(value.dtype, np.character):
diff --git a/src/datasets/io/csv.py b/src/datasets/io/csv.py
@@ -74,7 +74,6 @@ def __init__(
         num_proc: Optional[int] = None,
         **to_csv_kwargs,
     ):
-
         if num_proc is not None and num_proc <= 0:
             raise ValueError(f"num_proc {num_proc} must be an integer > 0.")
 
diff --git a/src/datasets/io/sql.py b/src/datasets/io/sql.py
@@ -64,7 +64,6 @@ def __init__(
         num_proc: Optional[int] = None,
         **to_sql_kwargs,
     ):
-
         if num_proc is not None and num_proc <= 0:
             raise ValueError(f"num_proc {num_proc} must be an integer > 0.")
 
diff --git a/src/datasets/naming.py b/src/datasets/naming.py
@@ -67,7 +67,6 @@ def filepattern_for_dataset_split(dataset_name, split, data_dir, filetype_suffix
 
 
 def filenames_for_dataset_split(path, dataset_name, split, filetype_suffix=None, shard_lengths=None):
-
     prefix = filename_prefix_for_split(dataset_name, split)
     prefix = os.path.join(path, prefix)
 
diff --git a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py
@@ -133,7 +133,7 @@ def analyze(files_or_archives, downloaded_files_or_dirs, split):
 
                 if metadata_files:
                     # add metadata if `metadata_files` are found and `drop_metadata` is None (default) or False
-                    add_metadata = not (self.config.drop_metadata is True)
+                    add_metadata = not self.config.drop_metadata
                     # if `metadata_files` are found, add labels only if
                     # `drop_labels` is set up to False explicitly (not-default behavior)
                     add_labels = self.config.drop_labels is False
diff --git a/src/datasets/packaged_modules/json/json.py b/src/datasets/packaged_modules/json/json.py
@@ -93,7 +93,6 @@ def _cast_table(self, pa_table: pa.Table) -> pa.Table:
 
     def _generate_tables(self, files):
         for file_idx, file in enumerate(itertools.chain.from_iterable(files)):
-
             # If the file is one json object and if we need to look at the list of items in one specific field
             if self.config.field is not None:
                 with open(file, encoding="utf-8") as f:
diff --git a/src/datasets/utils/file_utils.py b/src/datasets/utils/file_utils.py
@@ -100,7 +100,6 @@ def head_hf_s3(
 
 
 def hf_github_url(path: str, name: str, dataset=True, revision: Optional[str] = None) -> str:
-
     default_revision = "main" if version.parse(__version__).is_devrelease else __version__
     revision = revision or default_revision
     if dataset:
@@ -547,7 +546,6 @@ def get_from_cache(
     # Prevent parallel downloads of the same file with a lock.
     lock_path = cache_path + ".lock"
     with FileLock(lock_path):
-
         if resume_download:
             incomplete_path = cache_path + ".incomplete"
 
diff --git a/src/datasets/utils/filelock.py b/src/datasets/utils/filelock.py
diff --git a/src/datasets/utils/logging.py b/src/datasets/utils/logging.py
diff --git a/src/datasets/utils/metadata.py b/src/datasets/utils/metadata.py
diff --git a/src/datasets/utils/py_utils.py b/src/datasets/utils/py_utils.py
diff --git a/src/datasets/utils/readme.py b/src/datasets/utils/readme.py
diff --git a/tests/features/test_audio.py b/tests/features/test_audio.py
diff --git a/tests/features/test_features.py b/tests/features/test_features.py
diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py
diff --git a/tests/test_builder.py b/tests/test_builder.py
diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py
diff --git a/tests/test_fingerprint.py b/tests/test_fingerprint.py
diff --git a/tests/test_hf_gcp.py b/tests/test_hf_gcp.py
diff --git a/tests/test_iterable_dataset.py b/tests/test_iterable_dataset.py
diff --git a/tests/test_metadata_util.py b/tests/test_metadata_util.py
diff --git a/tests/test_patching.py b/tests/test_patching.py
diff --git a/tests/test_py_utils.py b/tests/test_py_utils.py
diff --git a/tests/utils.py b/tests/utils.py

Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,6 @@ def _get_feature_types(self):`
`106`	`106`	`}`
`107`	`107`
`108`	`108`	`def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average"):`
`109`		`-`
`110`	`109`	`mae_score = mean_absolute_error(references, predictions, sample_weight=sample_weight, multioutput=multioutput)`
`111`	`110`
`112`	`111`	`return {"mae": mae_score}`
Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,6 @@ def _info(self):`
`71`	`71`	`)`
`72`	`72`
`73`	`73`	`def _compute(self, X, reference_distribution):`
`74`		`-`
`75`	`74`	`# convert to numpy arrays`
`76`	`75`	`X = np.array(X)`
`77`	`76`	`reference_distribution = np.array(reference_distribution)`
Original file line number	Diff line number	Diff line change
`@@ -110,7 +110,6 @@ def _get_feature_types(self):`
`110`	`110`	`}`
`111`	`111`
`112`	`112`	`def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average", squared=True):`
`113`		`-`
`114`	`113`	`mse = mean_squared_error(`
`115`	`114`	`references, predictions, sample_weight=sample_weight, multioutput=multioutput, squared=squared`
`116`	`115`	`)`
Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,6 @@ def _info(self):`
`101`	`101`	`)`
`102`	`102`
`103`	`103`	`def _compute(self, input_texts, model_id, batch_size: int = 16, add_start_token: bool = True, device=None):`
`104`		`-`
`105`	`104`	`if device is not None:`
`106`	`105`	`assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."`
`107`	`106`	`if device == "gpu":`
Original file line number	Diff line number	Diff line change
`@@ -238,7 +238,6 @@ def _info(self):`
`238`	`238`	`)`
`239`	`239`
`240`	`240`	`def _compute(self, predictions, references, bleu_kwargs=None, wer_kwargs=None):`
`241`		`-`
`242`	`241`	`bleu_kwargs = bleu_kwargs if bleu_kwargs is not None else {}`
`243`	`242`	`wer_kwargs = wer_kwargs if wer_kwargs is not None else {}`
`244`	`243`
Original file line number	Diff line number	Diff line change
`@@ -394,7 +394,6 @@ def _print_dummy_data_instructions(self, dataset_builder, mock_dl_manager):`
`394`	`394`	`try:`
`395`	`395`	`generator_splits = dataset_builder._split_generators(mock_dl_manager)`
`396`	`396`	`except FileNotFoundError as e:`
`397`		`-`
`398`	`397`	`print(`
`399`	`398`	f"Dataset {self._dataset_name} with config {mock_dl_manager.config} seems to already open files in the method `_split_generators(...)`. You might consider to instead only open files in the method `_generate_examples(...)` instead. If this is not possible the dummy data has to be created with less guidance. Make sure you create the file {e.filename}."
`400`	`399`	`)`