diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py index 85736a9422ab..11bea3c3aa88 100644 --- a/tests/deepspeed/test_deepspeed.py +++ b/tests/deepspeed/test_deepspeed.py @@ -628,7 +628,7 @@ def model_init(): with CaptureStd() as cs: trainer.hyperparameter_search(direction="maximize", n_trials=n_trials) self.assertIn("DeepSpeed info", cl.out, "expected DeepSpeed logger output but got none") - self.assertIn(f"Trial {n_trials-1} finished with value", cs.err, "expected hyperparameter_search output") + self.assertIn(f"Trial {n_trials - 1} finished with value", cs.err, "expected hyperparameter_search output") self.assertIn("Best is trial", cs.err, "expected hyperparameter_search output") # --- These tests need to run on both zero stages --- # diff --git a/tests/models/bart/test_modeling_tf_bart.py b/tests/models/bart/test_modeling_tf_bart.py index 87c3b895f17b..e93ab4479506 100644 --- a/tests/models/bart/test_modeling_tf_bart.py +++ b/tests/models/bart/test_modeling_tf_bart.py @@ -87,7 +87,7 @@ def prepare_config_and_inputs_for_common(self): clip_value_min=self.eos_token_id + 1, clip_value_max=self.vocab_size + 1, ) - # Explicity add "end of sequence" to the inputs + # Explicitly add "end of sequence" to the inputs eos_tensor = tf.expand_dims(tf.constant([self.eos_token_id] * self.batch_size), 1) input_ids = tf.concat([input_ids, eos_tensor], axis=1) @@ -225,7 +225,7 @@ def test_decoder_model_past_large_inputs(self): self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs) # TODO (Joao): fix me - @unittest.skip("Onnx compliancy broke with TF 2.10") + @unittest.skip("Onnx compliance broke with TF 2.10") def test_onnx_compliancy(self): pass diff --git a/tests/models/bert/test_modeling_tf_bert.py b/tests/models/bert/test_modeling_tf_bert.py index 335a184d2929..a8f8b7347767 100644 --- a/tests/models/bert/test_modeling_tf_bert.py +++ b/tests/models/bert/test_modeling_tf_bert.py @@ -735,7 +735,7 @@ def test_custom_load_tf_weights(self): self.assertTrue(layer.split("_")[0] in ["dropout", "classifier"]) # TODO (Joao): fix me - @unittest.skip("Onnx compliancy broke with TF 2.10") + @unittest.skip("Onnx compliance broke with TF 2.10") def test_onnx_compliancy(self): pass diff --git a/tests/models/blip/test_modeling_blip.py b/tests/models/blip/test_modeling_blip.py index 50cc8baae5b8..3dac349fb4fd 100644 --- a/tests/models/blip/test_modeling_blip.py +++ b/tests/models/blip/test_modeling_blip.py @@ -474,7 +474,7 @@ def test_retain_grad_hidden_states_attentions(self): def test_model_get_set_embeddings(self): pass - # override as the `logit_scale` parameter initilization is different for Blip + # override as the `logit_scale` parameter initialization is different for Blip def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -483,7 +483,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), @@ -988,7 +988,7 @@ def test_training_gradient_checkpointing_use_reentrant(self): def test_training_gradient_checkpointing_use_reentrant_false(self): pass - # override as the `logit_scale` parameter initilization is different for Blip + # override as the `logit_scale` parameter initialization is different for Blip def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -997,7 +997,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), @@ -1206,7 +1206,7 @@ def test_training_gradient_checkpointing(self): loss = model(**inputs).loss loss.backward() - # override as the `logit_scale` parameter initilization is different for Blip + # override as the `logit_scale` parameter initialization is different for Blip def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -1215,7 +1215,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py index 6815b5757115..b55ec4a23c05 100644 --- a/tests/models/blip_2/test_modeling_blip_2.py +++ b/tests/models/blip_2/test_modeling_blip_2.py @@ -521,7 +521,7 @@ def test_save_load_fast_init_to_base(self): def test_sdpa_can_dispatch_composite_models(self): """ Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model. - This tests only by looking at layer names, as usually SDPA layers are calles "SDPAAttention". + This tests only by looking at layer names, as usually SDPA layers are called "SDPAAttention". In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model is loaded, because we manually replicate requested attn implementation on each sub-config when loading. See https://github.com/huggingface/transformers/pull/32238 for more info @@ -970,7 +970,7 @@ def test_cpu_offload(self): def test_sdpa_can_dispatch_composite_models(self): """ Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model. - This tests only by looking at layer names, as usually SDPA layers are calles "SDPAAttention". + This tests only by looking at layer names, as usually SDPA layers are called "SDPAAttention". In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model is loaded, because we manually replicate requested attn implementation on each sub-config when loading. See https://github.com/huggingface/transformers/pull/32238 for more info @@ -1647,7 +1647,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/bloom/test_tokenization_bloom.py b/tests/models/bloom/test_tokenization_bloom.py index 71318d9dd174..6c2fffef64f1 100644 --- a/tests/models/bloom/test_tokenization_bloom.py +++ b/tests/models/bloom/test_tokenization_bloom.py @@ -135,7 +135,7 @@ def test_encodings_from_xnli_dataset(self): @require_jinja def test_tokenization_for_chat(self): tokenizer = self.get_rust_tokenizer() - tokenizer.chat_template = "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" + tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}" test_chats = [ [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], [ diff --git a/tests/models/canine/test_tokenization_canine.py b/tests/models/canine/test_tokenization_canine.py index e7e19c63ce93..13fcbb049991 100644 --- a/tests/models/canine/test_tokenization_canine.py +++ b/tests/models/canine/test_tokenization_canine.py @@ -64,7 +64,7 @@ def test_prepare_batch_integration(self): @require_torch def test_encoding_keys(self): tokenizer = self.canine_tokenizer - src_text = ["Once there was a man.", "He wrote a test in HuggingFace Tranformers."] + src_text = ["Once there was a man.", "He wrote a test in HuggingFace Transformers."] batch = tokenizer(src_text, padding=True, return_tensors="pt") # check if input_ids, attention_mask and token_type_ids are returned self.assertIn("input_ids", batch) diff --git a/tests/models/chinese_clip/test_modeling_chinese_clip.py b/tests/models/chinese_clip/test_modeling_chinese_clip.py index 4a27e7292ec6..bc14d80524c4 100644 --- a/tests/models/chinese_clip/test_modeling_chinese_clip.py +++ b/tests/models/chinese_clip/test_modeling_chinese_clip.py @@ -596,7 +596,7 @@ def test_retain_grad_hidden_states_attentions(self): def test_model_get_set_embeddings(self): pass - # override as the `logit_scale` parameter initilization is different for CHINESE_CLIP + # override as the `logit_scale` parameter initialization is different for CHINESE_CLIP def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -608,7 +608,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/clap/test_modeling_clap.py b/tests/models/clap/test_modeling_clap.py index 559db26206b6..c8250648c670 100644 --- a/tests/models/clap/test_modeling_clap.py +++ b/tests/models/clap/test_modeling_clap.py @@ -543,7 +543,7 @@ def test_retain_grad_hidden_states_attentions(self): def test_model_get_set_embeddings(self): pass - # override as the `logit_scale` parameter initilization is different for CLAP + # override as the `logit_scale` parameter initialization is different for CLAP def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -552,7 +552,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/clip/test_modeling_clip.py b/tests/models/clip/test_modeling_clip.py index 6b75769b5396..66f741e6f4b2 100644 --- a/tests/models/clip/test_modeling_clip.py +++ b/tests/models/clip/test_modeling_clip.py @@ -761,7 +761,7 @@ def test_retain_grad_hidden_states_attentions(self): def test_model_get_set_embeddings(self): pass - # override as the `logit_scale` parameter initilization is different for CLIP + # override as the `logit_scale` parameter initialization is different for CLIP def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -770,7 +770,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/clipseg/test_modeling_clipseg.py b/tests/models/clipseg/test_modeling_clipseg.py index c2f77e30667a..a116b82f5f4e 100644 --- a/tests/models/clipseg/test_modeling_clipseg.py +++ b/tests/models/clipseg/test_modeling_clipseg.py @@ -519,7 +519,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if "logit_scale" in name: self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/clvp/test_modeling_clvp.py b/tests/models/clvp/test_modeling_clvp.py index 0f59b91871c9..8064d4059ec6 100644 --- a/tests/models/clvp/test_modeling_clvp.py +++ b/tests/models/clvp/test_modeling_clvp.py @@ -500,7 +500,7 @@ def test_inputs_embeds(self): def test_model_get_set_embeddings(self): pass - # override as the `logit_scale` parameter initilization is different for Clvp + # override as the `logit_scale` parameter initialization is different for Clvp def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -509,7 +509,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": expected_value = np.log(1 / 0.07) returned_value = param.data.item() diff --git a/tests/models/conditional_detr/test_modeling_conditional_detr.py b/tests/models/conditional_detr/test_modeling_conditional_detr.py index db5afffe2131..118552fce5fd 100644 --- a/tests/models/conditional_detr/test_modeling_conditional_detr.py +++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py @@ -465,13 +465,13 @@ def test_different_timm_backbone(self): self.model_tester.num_labels, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3) elif model_class.__name__ == "ConditionalDetrForSegmentation": - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.conditional_detr.model.backbone.conv_encoder.intermediate_channel_sizes), 3) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3) self.assertTrue(outputs) @@ -501,13 +501,13 @@ def test_hf_backbone(self): self.model_tester.num_labels, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3) elif model_class.__name__ == "ConditionalDetrForSegmentation": - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.conditional_detr.model.backbone.conv_encoder.intermediate_channel_sizes), 3) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3) self.assertTrue(outputs) diff --git a/tests/models/cpmant/test_modeling_cpmant.py b/tests/models/cpmant/test_modeling_cpmant.py index d3835ec2374b..4a6c28e1eb91 100644 --- a/tests/models/cpmant/test_modeling_cpmant.py +++ b/tests/models/cpmant/test_modeling_cpmant.py @@ -161,7 +161,7 @@ def test_inputs_embeds(self): def test_retain_grad_hidden_states_attentions(self): unittest.skip( "CPMAnt doesn't support retain grad in hidden_states or attentions, because prompt management will peel off the output.hidden_states from graph.\ - So is attentions. We strongly recommand you use loss to tune model." + So is attentions. We strongly recommend you use loss to tune model." )(self.test_retain_grad_hidden_states_attentions) def test_cpmant_model(self): diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index 1e27aaabf8d8..e3441e606d23 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -542,10 +542,10 @@ def test_different_timm_backbone(self): self.model_tester.num_labels, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 4) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 4) self.assertTrue(outputs) @@ -574,10 +574,10 @@ def test_hf_backbone(self): self.model_tester.num_labels, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 4) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 4) self.assertTrue(outputs) diff --git a/tests/models/depth_anything/test_modeling_depth_anything.py b/tests/models/depth_anything/test_modeling_depth_anything.py index 95026c1054e9..63d57d671706 100644 --- a/tests/models/depth_anything/test_modeling_depth_anything.py +++ b/tests/models/depth_anything/test_modeling_depth_anything.py @@ -214,7 +214,7 @@ def _validate_backbone_init(): model.to(torch_device) model.eval() - # Confirm out_indices propogated to backbone + # Confirm out_indices propagated to backbone self.assertEqual(len(model.backbone.out_indices), 2) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/depth_pro/test_modeling_depth_pro.py b/tests/models/depth_pro/test_modeling_depth_pro.py index bd0237370720..cb38380dc740 100644 --- a/tests/models/depth_pro/test_modeling_depth_pro.py +++ b/tests/models/depth_pro/test_modeling_depth_pro.py @@ -323,7 +323,7 @@ def test_initialization(self): msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) - # this started when switched from normal initialization to kaiming_normal intialization + # this started when switched from normal initialization to kaiming_normal initialization # maybe because the magnitude of offset values from ViT-encoders increases when followed by many convolution layers def test_batching_equivalence(self, atol=1e-4, rtol=1e-4): super().test_batching_equivalence(atol=atol, rtol=rtol) diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index 381fa1d7cd23..003deceab762 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -465,13 +465,13 @@ def test_different_timm_backbone(self): self.model_tester.num_labels + 1, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3) elif model_class.__name__ == "DetrForSegmentation": - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.detr.model.backbone.conv_encoder.intermediate_channel_sizes), 3) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3) self.assertTrue(outputs) @@ -500,13 +500,13 @@ def test_hf_backbone(self): self.model_tester.num_labels + 1, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3) elif model_class.__name__ == "DetrForSegmentation": - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.detr.model.backbone.conv_encoder.intermediate_channel_sizes), 3) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3) self.assertTrue(outputs) diff --git a/tests/models/diffllama/test_modeling_diffllama.py b/tests/models/diffllama/test_modeling_diffllama.py index 81b963cfc47f..9864b713a59d 100644 --- a/tests/models/diffllama/test_modeling_diffllama.py +++ b/tests/models/diffllama/test_modeling_diffllama.py @@ -433,7 +433,9 @@ def test_model_rope_scaling(self): long_input_length = int(config.max_position_embeddings * 1.5) # Inputs - x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device + x = torch.randn( + 1, dtype=torch.float32, device=torch_device + ) # used exclusively to get the dtype and the device position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device) position_ids_short = position_ids_short.unsqueeze(0) position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device) @@ -553,7 +555,7 @@ def _reinitialize_config(base_config, new_kwargs): @slow def test_flash_attn_2_generate_padding_right(self): """ - Overwritting the common test as the test is flaky on tiny models + Overwriting the common test as the test is flaky on tiny models """ model = DiffLlamaForCausalLM.from_pretrained( "kajuma/DiffLlama-0.3B-handcut", @@ -617,7 +619,7 @@ def test_use_flash_attention_2_true(self): @slow def test_eager_matches_sdpa_generate(self): """ - Overwritting the common test as the test is flaky on tiny models + Overwriting the common test as the test is flaky on tiny models """ max_new_tokens = 30 diff --git a/tests/models/dpt/test_modeling_dpt.py b/tests/models/dpt/test_modeling_dpt.py index da40466d484b..bdfd480a90ab 100644 --- a/tests/models/dpt/test_modeling_dpt.py +++ b/tests/models/dpt/test_modeling_dpt.py @@ -290,7 +290,7 @@ def _validate_backbone_init(): model.eval() if model.__class__.__name__ == "DPTForDepthEstimation": - # Confirm out_indices propogated to backbone + # Confirm out_indices propagated to backbone self.assertEqual(len(model.backbone.out_indices), 2) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py index 2d5eca4b83ae..209d02ad19c3 100644 --- a/tests/models/encodec/test_modeling_encodec.py +++ b/tests/models/encodec/test_modeling_encodec.py @@ -492,7 +492,7 @@ def test_integration_24kHz(self): for bandwidth, expected_rmse in expected_rmse.items(): with torch.no_grad(): - # use max bandwith for best possible reconstruction + # use max bandwidth for best possible reconstruction encoder_outputs = model.encode(inputs["input_values"], bandwidth=float(bandwidth)) audio_code_sums = [a[0].sum().cpu().item() for a in encoder_outputs[0]] @@ -548,7 +548,7 @@ def test_integration_48kHz(self): for bandwidth, expected_rmse in expected_rmse.items(): with torch.no_grad(): - # use max bandwith for best possible reconstruction + # use max bandwidth for best possible reconstruction encoder_outputs = model.encode( inputs["input_values"], inputs["padding_mask"], bandwidth=float(bandwidth), return_dict=False ) @@ -608,7 +608,7 @@ def test_batch_48kHz(self): input_values = inputs["input_values"].to(torch_device) for bandwidth, expected_rmse in expected_rmse.items(): with torch.no_grad(): - # use max bandwith for best possible reconstruction + # use max bandwidth for best possible reconstruction encoder_outputs = model.encode(input_values, bandwidth=float(bandwidth), return_dict=False) audio_code_sums_0 = [a[0][0].sum().cpu().item() for a in encoder_outputs[0]] audio_code_sums_1 = [a[0][1].sum().cpu().item() for a in encoder_outputs[0]] diff --git a/tests/models/encoder_decoder/test_modeling_encoder_decoder.py b/tests/models/encoder_decoder/test_modeling_encoder_decoder.py index 1c4051f2e264..4f1cb32348d8 100644 --- a/tests/models/encoder_decoder/test_modeling_encoder_decoder.py +++ b/tests/models/encoder_decoder/test_modeling_encoder_decoder.py @@ -179,7 +179,10 @@ def check_encoder_decoder_model_from_pretrained_using_model_paths( **kwargs, ): encoder_model, decoder_model = self.get_encoder_decoder_model(config, decoder_config) - with tempfile.TemporaryDirectory() as encoder_tmp_dirname, tempfile.TemporaryDirectory() as decoder_tmp_dirname: + with ( + tempfile.TemporaryDirectory() as encoder_tmp_dirname, + tempfile.TemporaryDirectory() as decoder_tmp_dirname, + ): encoder_model.save_pretrained(encoder_tmp_dirname) decoder_model.save_pretrained(decoder_tmp_dirname) model_kwargs = {"encoder_hidden_dropout_prob": 0.0} @@ -306,7 +309,10 @@ def check_save_and_load_encoder_decoder_model( out_2 = outputs[0].cpu().numpy() out_2[np.isnan(out_2)] = 0 - with tempfile.TemporaryDirectory() as encoder_tmp_dirname, tempfile.TemporaryDirectory() as decoder_tmp_dirname: + with ( + tempfile.TemporaryDirectory() as encoder_tmp_dirname, + tempfile.TemporaryDirectory() as decoder_tmp_dirname, + ): enc_dec_model.encoder.save_pretrained(encoder_tmp_dirname) enc_dec_model.decoder.save_pretrained(decoder_tmp_dirname) enc_dec_model = EncoderDecoderModel.from_encoder_decoder_pretrained( diff --git a/tests/models/falcon/test_modeling_falcon.py b/tests/models/falcon/test_modeling_falcon.py index a024b801e8bc..aec59eca3356 100644 --- a/tests/models/falcon/test_modeling_falcon.py +++ b/tests/models/falcon/test_modeling_falcon.py @@ -460,7 +460,9 @@ def test_model_rope_scaling(self): long_input_length = int(config.max_position_embeddings * 1.5) # Inputs - x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device + x = torch.randn( + 1, dtype=torch.float32, device=torch_device + ) # used exclusively to get the dtype and the device position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device) position_ids_short = position_ids_short.unsqueeze(0) position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device) diff --git a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py index 971468026d71..a75238265cd4 100644 --- a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py +++ b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py @@ -211,7 +211,7 @@ def create_and_check_state_equivalency(self, config, input_ids, *args): output_two = outputs.last_hidden_state self.parent.assertTrue(torch.allclose(torch.cat([output_one, output_two], dim=1), output_whole, atol=1e-5)) - # TODO the orignal mamba does not support decoding more than 1 token neither do we + # TODO the original mamba does not support decoding more than 1 token neither do we def create_and_check_falcon_mamba_cached_slow_forward_and_backwards( self, config, input_ids, *args, gradient_checkpointing=False diff --git a/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py b/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py index cc413b94a63e..0480335f05aa 100644 --- a/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py +++ b/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py @@ -104,7 +104,7 @@ def create_and_check_model(self, config, input_ids, *args): # check batch sizes match for value in result.values(): self.parent.assertEqual(value.size(0), self.batch_size) - # check duration, pitch, and energy have the appopriate shapes + # check duration, pitch, and energy have the appropriate shapes # duration: (batch_size, max_text_length), pitch and energy: (batch_size, max_text_length, 1) self.parent.assertEqual(result["duration_outputs"].shape + (1,), result["pitch_outputs"].shape) self.parent.assertEqual(result["pitch_outputs"].shape, result["energy_outputs"].shape) @@ -527,7 +527,7 @@ def create_and_check_model(self, config, input_ids, *args): # check batch sizes match for value in result.values(): self.parent.assertEqual(value.size(0), self.batch_size) - # check duration, pitch, and energy have the appopriate shapes + # check duration, pitch, and energy have the appropriate shapes # duration: (batch_size, max_text_length), pitch and energy: (batch_size, max_text_length, 1) self.parent.assertEqual(result["duration_outputs"].shape + (1,), result["pitch_outputs"].shape) self.parent.assertEqual(result["pitch_outputs"].shape, result["energy_outputs"].shape) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 9fdb7f240f2d..2aa35863697b 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -326,7 +326,7 @@ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): ) return inputs_dict - # Overriden Tests + # Overridden Tests @unittest.skip def test_attention_outputs(self): pass diff --git a/tests/models/fnet/test_tokenization_fnet.py b/tests/models/fnet/test_tokenization_fnet.py index 16f2e4950ef0..a55c142b25d8 100644 --- a/tests/models/fnet/test_tokenization_fnet.py +++ b/tests/models/fnet/test_tokenization_fnet.py @@ -141,7 +141,7 @@ def test_sequence_builders(self): tokenizer.sep_token_id ] - # Overriden Tests - loading the fast tokenizer from slow just takes too long + # Overridden Tests - loading the fast tokenizer from slow just takes too long def test_special_tokens_initialization(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): @@ -190,7 +190,7 @@ def test_special_tokens_initialization_from_slow(self): self.assertTrue(special_token_id in p_output) self.assertTrue(special_token_id in cr_output) - # Overriden Tests + # Overridden Tests def test_padding(self, max_length=50): if not self.test_slow_tokenizer: # as we don't have a slow version, we can't compare the outputs between slow and fast versions diff --git a/tests/models/gemma/test_tokenization_gemma.py b/tests/models/gemma/test_tokenization_gemma.py index 3a9e7af4b6f5..0b43fec80556 100644 --- a/tests/models/gemma/test_tokenization_gemma.py +++ b/tests/models/gemma/test_tokenization_gemma.py @@ -342,8 +342,8 @@ def test_integration_test_xnli(self): encoded1, encoded2, msg="Hint: the following tokenization diff were obtained for slow vs fast:\n " - f"elements in slow: {set(pyth_tokenizer.tokenize(string))-set(rust_tokenizer.tokenize(string))} \nvs\n " - f"elements in fast: {set(rust_tokenizer.tokenize(string))-set(pyth_tokenizer.tokenize(string))} \n\n{string}", + f"elements in slow: {set(pyth_tokenizer.tokenize(string)) - set(rust_tokenizer.tokenize(string))} \nvs\n " + f"elements in fast: {set(rust_tokenizer.tokenize(string)) - set(pyth_tokenizer.tokenize(string))} \n\n{string}", ) decoded1 = pyth_tokenizer.decode(encoded1, skip_special_tokens=True) diff --git a/tests/models/gpt2/test_modeling_gpt2.py b/tests/models/gpt2/test_modeling_gpt2.py index 4af2739ff5fb..0f66e1681882 100644 --- a/tests/models/gpt2/test_modeling_gpt2.py +++ b/tests/models/gpt2/test_modeling_gpt2.py @@ -866,7 +866,7 @@ def test_contrastive_search_gpt2(self): @slow def test_flash_attn_2_generate_padding_left(self): """ - Overwritting the common test as the test is flaky on tiny models + Overwriting the common test as the test is flaky on tiny models """ model = GPT2LMHeadModel.from_pretrained("gpt2", torch_dtype=torch.float16).to(0) diff --git a/tests/models/gpt2/test_modeling_tf_gpt2.py b/tests/models/gpt2/test_modeling_tf_gpt2.py index c56d837939c5..ed4c4a2e89b7 100644 --- a/tests/models/gpt2/test_modeling_tf_gpt2.py +++ b/tests/models/gpt2/test_modeling_tf_gpt2.py @@ -451,7 +451,7 @@ def test_onnx_runtime_optimize(self): onnxruntime.InferenceSession(onnx_model_proto.SerializeToString()) # TODO (Joao): fix me - @unittest.skip("Onnx compliancy broke with TF 2.10") + @unittest.skip("Onnx compliance broke with TF 2.10") def test_onnx_compliancy(self): pass @@ -548,7 +548,7 @@ def test_lm_generate_greedy_distilgpt2_beam_search_special(self): @slow def test_lm_generate_distilgpt2_left_padding(self): - """Tests that the generated text is the same, regarless of left padding""" + """Tests that the generated text is the same, regardless of left padding""" model = TFGPT2LMHeadModel.from_pretrained("distilbert/distilgpt2") tokenizer = GPT2Tokenizer.from_pretrained("distilbert/distilgpt2") diff --git a/tests/models/gpt_neo/test_modeling_gpt_neo.py b/tests/models/gpt_neo/test_modeling_gpt_neo.py index 213c3ed497f1..c0c3639781ca 100644 --- a/tests/models/gpt_neo/test_modeling_gpt_neo.py +++ b/tests/models/gpt_neo/test_modeling_gpt_neo.py @@ -479,7 +479,7 @@ def test_local_attn_probs(self): # the last 2 tokens are masked, and should have 0 attn_probs self.assertTrue(torch.all(attn_probs[:, :, -mask_tokens:, -mask_tokens:] == 0)) - # in loacal attention each token can only attend to the previous window_size tokens (inlcuding itself) + # in loacal attention each token can only attend to the previous window_size tokens (including itself) # here window_size is 4, so a token at index 5 can only attend to indcies [2, 3, 4, 5] # and the attn_probs should be 0 for token [0, 1] self.assertTrue(torch.all(attn_probs[:, :, 5, 2:6] != 0)) diff --git a/tests/models/gpt_neox/test_modeling_gpt_neox.py b/tests/models/gpt_neox/test_modeling_gpt_neox.py index 34a8e54f700d..874c62f0c8ab 100644 --- a/tests/models/gpt_neox/test_modeling_gpt_neox.py +++ b/tests/models/gpt_neox/test_modeling_gpt_neox.py @@ -381,7 +381,9 @@ def test_model_rope_scaling(self): long_input_length = int(config.max_position_embeddings * 1.5) # Inputs - x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device + x = torch.randn( + 1, dtype=torch.float32, device=torch_device + ) # used exclusively to get the dtype and the device position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device) position_ids_short = position_ids_short.unsqueeze(0) position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device) diff --git a/tests/models/granite/test_modeling_granite.py b/tests/models/granite/test_modeling_granite.py index 469e96fd8304..826cda3f67c8 100644 --- a/tests/models/granite/test_modeling_granite.py +++ b/tests/models/granite/test_modeling_granite.py @@ -356,7 +356,9 @@ def test_model_rope_scaling(self): long_input_length = int(config.max_position_embeddings * 1.5) # Inputs - x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device + x = torch.randn( + 1, dtype=torch.float32, device=torch_device + ) # used exclusively to get the dtype and the device position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device) position_ids_short = position_ids_short.unsqueeze(0) position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device) diff --git a/tests/models/granitemoe/test_modeling_granitemoe.py b/tests/models/granitemoe/test_modeling_granitemoe.py index 0e64d29c9189..cd2470827b90 100644 --- a/tests/models/granitemoe/test_modeling_granitemoe.py +++ b/tests/models/granitemoe/test_modeling_granitemoe.py @@ -355,7 +355,9 @@ def test_model_rope_scaling(self): long_input_length = int(config.max_position_embeddings * 1.5) # Inputs - x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device + x = torch.randn( + 1, dtype=torch.float32, device=torch_device + ) # used exclusively to get the dtype and the device position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device) position_ids_short = position_ids_short.unsqueeze(0) position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device) diff --git a/tests/models/groupvit/test_modeling_groupvit.py b/tests/models/groupvit/test_modeling_groupvit.py index 6eebdbf2e45e..4e836f827e9b 100644 --- a/tests/models/groupvit/test_modeling_groupvit.py +++ b/tests/models/groupvit/test_modeling_groupvit.py @@ -583,7 +583,7 @@ def test_retain_grad_hidden_states_attentions(self): def test_model_get_set_embeddings(self): pass - # override as the `logit_scale` parameter initilization is different for GROUPVIT + # override as the `logit_scale` parameter initialization is different for GROUPVIT def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -592,7 +592,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index a8b5083ebd51..ee62ed011bcd 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -497,7 +497,7 @@ def recursive_check(batched_object, single_row_object, model_name, key): single_batch_shape = value.shape[0] // batch_size single_row_input[key] = value[:single_batch_shape] elif hasattr(value, "tensor"): - # layoutlmv2uses ImageList intead of pixel values (needs for torchscript) + # layoutlmv2uses ImageList instead of pixel values (needs for torchscript) single_row_input[key] = value.tensor[:single_batch_shape] with torch.no_grad(): diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index 01d807fbdba2..319187f11346 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -426,7 +426,9 @@ def test_model_rope_scaling(self): long_input_length = int(config.max_position_embeddings * 1.5) # Inputs - x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device + x = torch.randn( + 1, dtype=torch.float32, device=torch_device + ) # used exclusively to get the dtype and the device position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device) position_ids_short = position_ids_short.unsqueeze(0) position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device) diff --git a/tests/models/llava_next/test_modeling_llava_next.py b/tests/models/llava_next/test_modeling_llava_next.py index 327e33a9b7f9..8d517c644d1d 100644 --- a/tests/models/llava_next/test_modeling_llava_next.py +++ b/tests/models/llava_next/test_modeling_llava_next.py @@ -298,7 +298,7 @@ def test_mismatching_num_image_tokens(self): config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config).to(torch_device) - _ = model(**input_dict) # successfull forward with no modifications + _ = model(**input_dict) # successful forward with no modifications # remove one image but leave the image token in text input_dict["pixel_values"] = input_dict["pixel_values"][-1:, ...] diff --git a/tests/models/llava_next_video/test_modeling_llava_next_video.py b/tests/models/llava_next_video/test_modeling_llava_next_video.py index ba7323a07551..ab3a34ab3b12 100644 --- a/tests/models/llava_next_video/test_modeling_llava_next_video.py +++ b/tests/models/llava_next_video/test_modeling_llava_next_video.py @@ -315,7 +315,7 @@ def test_mismatching_num_image_tokens(self): config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config).to(torch_device) - _ = model(**input_dict) # successfull forward with no modifications + _ = model(**input_dict) # successful forward with no modifications # remove one image but leave the image token in text input_dict["pixel_values"] = input_dict["pixel_values"][-1:, ...] diff --git a/tests/models/m2m_100/test_modeling_m2m_100.py b/tests/models/m2m_100/test_modeling_m2m_100.py index 60b3d220b153..9cfcbfb778f3 100644 --- a/tests/models/m2m_100/test_modeling_m2m_100.py +++ b/tests/models/m2m_100/test_modeling_m2m_100.py @@ -433,7 +433,7 @@ def test_seq_to_seq_generation(self): @slow def test_flash_attn_2_seq_to_seq_generation(self): """ - Overwritting the common test as the test is flaky on tiny models + Overwriting the common test as the test is flaky on tiny models """ model = M2M100ForConditionalGeneration.from_pretrained( "facebook/m2m100_418M", attn_implementation="flash_attention_2" diff --git a/tests/models/mgp_str/test_modeling_mgp_str.py b/tests/models/mgp_str/test_modeling_mgp_str.py index 465444f6927e..849eb90a75a0 100644 --- a/tests/models/mgp_str/test_modeling_mgp_str.py +++ b/tests/models/mgp_str/test_modeling_mgp_str.py @@ -202,7 +202,7 @@ def check_hidden_states_output(inputs_dict, config, model_class): check_hidden_states_output(inputs_dict, config, model_class) - # override as the `logit_scale` parameter initilization is different for MgpstrModel + # override as the `logit_scale` parameter initialization is different for MgpstrModel def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/mgp_str/test_processor_mgp_str.py b/tests/models/mgp_str/test_processor_mgp_str.py index 783a61ebf144..f42dfb266149 100644 --- a/tests/models/mgp_str/test_processor_mgp_str.py +++ b/tests/models/mgp_str/test_processor_mgp_str.py @@ -70,7 +70,7 @@ def setUp(self): with open(self.image_processor_file, "w", encoding="utf-8") as fp: json.dump(image_processor_map, fp) - # We copy here rather than use the ProcessorTesterMixin as this processor has a `char_tokenizer` instad of a + # We copy here rather than use the ProcessorTesterMixin as this processor has a `char_tokenizer` instead of a # tokenizer attribute, which means all the tests would need to be overridden. @require_vision def prepare_image_inputs(self): diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index 7ad879243933..f660171774f0 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -450,7 +450,7 @@ def test_load_balancing_loss(self): padded_result = model(padded_input_ids, attention_mask=padded_attention_mask) torch.testing.assert_close(result.aux_loss.cpu(), padded_result.aux_loss.cpu(), rtol=1e-4, atol=1e-4) - # We make sure that the loss of includding padding tokens != the loss without padding tokens + # We make sure that the loss of including padding tokens != the loss without padding tokens # if attention_mask=None --> we don't exclude padding tokens include_padding_result = model(padded_input_ids, attention_mask=None) @@ -480,7 +480,7 @@ def test_small_model_logits(self): torch_device ) # TODO: might need to tweak it in case the logits do not match on our daily runners - # these logits have been obtained with the original megablocks impelmentation. + # these logits have been obtained with the original megablocks implementation. # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. # # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, diff --git a/tests/models/mllama/test_modeling_mllama.py b/tests/models/mllama/test_modeling_mllama.py index 5c8e5c0e70fa..023dd8ea2be3 100644 --- a/tests/models/mllama/test_modeling_mllama.py +++ b/tests/models/mllama/test_modeling_mllama.py @@ -414,7 +414,7 @@ def test_past_key_values_format(self): embed_dim = getattr(text_config, "d_model", text_config.hidden_size) per_head_embed_dim = embed_dim // num_attention_heads - # some models have diffent num-head for query vs key/value so we need to assign correct value + # some models have different num-head for query vs key/value so we need to assign correct value # BUT only after `per_head_embed_dim` is set num_attention_heads = ( text_config.num_key_value_heads diff --git a/tests/models/mobilevit/test_modeling_tf_mobilevit.py b/tests/models/mobilevit/test_modeling_tf_mobilevit.py index fcad3be021e2..61967ec3414f 100644 --- a/tests/models/mobilevit/test_modeling_tf_mobilevit.py +++ b/tests/models/mobilevit/test_modeling_tf_mobilevit.py @@ -284,7 +284,7 @@ def test_keras_fit(self): super().test_keras_fit() # The default test_loss_computation() uses -100 as a proxy ignore_index - # to test masked losses. Overridding to avoid -100 since semantic segmentation + # to test masked losses. Overriding to avoid -100 since semantic segmentation # models use `semantic_loss_ignore_index` from the config. def test_loss_computation(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/moshi/test_modeling_moshi.py b/tests/models/moshi/test_modeling_moshi.py index e6ad1b639e92..4229410ed383 100644 --- a/tests/models/moshi/test_modeling_moshi.py +++ b/tests/models/moshi/test_modeling_moshi.py @@ -581,7 +581,7 @@ def prepare_config_and_inputs_for_generate(self, batch_size=2): return config, filtered_inputs_dict def _check_generate_outputs(self, output, config, use_cache=False, num_return_sequences=1, num_beams=1): - # Overwrite because the generate method actually alway uses `inputs_embeds` so `use_cache` is always `True` + # Overwrite because the generate method actually always uses `inputs_embeds` so `use_cache` is always `True` super()._check_generate_outputs( output, config, use_cache=True, num_return_sequences=num_return_sequences, num_beams=num_beams ) @@ -618,13 +618,13 @@ def test_contrastive_generate_low_memory(self): pass @unittest.skip( - "Moshi either needs deafult generation config or fix for fullgraph compile because it hardcodes SlidingWindowCache in custom generation loop." + "Moshi either needs default generation config or fix for fullgraph compile because it hardcodes SlidingWindowCache in custom generation loop." ) def test_greedy_generate_dict_outputs_use_cache(self): pass @unittest.skip( - "Moshi either needs deafult generation config or fix for fullgraph compile because it hardcodes SlidingWindowCache in custom generation loop." + "Moshi either needs default generation config or fix for fullgraph compile because it hardcodes SlidingWindowCache in custom generation loop." ) def test_beam_search_generate_dict_outputs_use_cache(self): pass @@ -849,7 +849,7 @@ def test_generate_from_unconditional(self): **model.get_unconditional_inputs(num_samples=4), max_new_tokens=5, concat_unconditional_inputs=False ) - # check same results from uncondtional or no inputs + # check same results from unconditional or no inputs outputs_from_unconditional = model.generate( **model.get_unconditional_inputs(num_samples=1), max_new_tokens=5, concat_unconditional_inputs=False ) diff --git a/tests/models/moshi/test_tokenization_moshi.py b/tests/models/moshi/test_tokenization_moshi.py index a520cca94bbe..0aaa6295ea66 100644 --- a/tests/models/moshi/test_tokenization_moshi.py +++ b/tests/models/moshi/test_tokenization_moshi.py @@ -289,7 +289,7 @@ def test_training_new_tokenizer_with_special_tokens_change(self): self.assertTrue( find, f"'{special_token.__repr__()}' should appear as an `AddedToken` in the all_special_tokens_extended = " - f"{[k for k in new_tokenizer.all_special_tokens_extended if str(k)==new_special_token_str]} but it is missing" + f"{[k for k in new_tokenizer.all_special_tokens_extended if str(k) == new_special_token_str]} but it is missing" ", this means that the new tokenizers did not keep the `rstrip`, `lstrip`, `normalized` etc attributes.", ) elif special_token not in special_tokens_map: diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py index c8faac1b7d3c..c14292b093f2 100644 --- a/tests/models/musicgen/test_modeling_musicgen.py +++ b/tests/models/musicgen/test_modeling_musicgen.py @@ -971,7 +971,7 @@ def test_greedy_generate_stereo_outputs(self): self.model_tester.audio_channels = original_audio_channels @unittest.skip( - reason="MusicgenModel is actually not the base of MusicgenForCausalLM as the latter is a composit model" + reason="MusicgenModel is actually not the base of MusicgenForCausalLM as the latter is a composite model" ) def test_save_load_fast_init_from_base(self): pass diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index c89471d119a9..0fcfa254afa2 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -961,7 +961,7 @@ def test_greedy_generate_stereo_outputs(self): self.model_tester.audio_channels = original_audio_channels @unittest.skip( - reason="MusicgenMelodyModel is actually not the base of MusicgenMelodyForCausalLM as the latter is a composit model" + reason="MusicgenMelodyModel is actually not the base of MusicgenMelodyForCausalLM as the latter is a composite model" ) def test_save_load_fast_init_from_base(self): pass diff --git a/tests/models/oneformer/test_modeling_oneformer.py b/tests/models/oneformer/test_modeling_oneformer.py index 8f1df74ea627..c544154b2364 100644 --- a/tests/models/oneformer/test_modeling_oneformer.py +++ b/tests/models/oneformer/test_modeling_oneformer.py @@ -173,7 +173,7 @@ def create_and_check_oneformer_model( output = model(pixel_values=pixel_values, task_inputs=task_inputs, pixel_mask=pixel_mask) output = model(pixel_values, task_inputs=task_inputs, output_hidden_states=True) - # the correct shape of output.transformer_decoder_hidden_states ensure the correcteness of the + # the correct shape of output.transformer_decoder_hidden_states ensure the correctness of the # encoder and pixel decoder self.parent.assertEqual( output.transformer_decoder_object_queries.shape, diff --git a/tests/models/owlv2/test_modeling_owlv2.py b/tests/models/owlv2/test_modeling_owlv2.py index e1278d3c937b..dff1cbe8c00c 100644 --- a/tests/models/owlv2/test_modeling_owlv2.py +++ b/tests/models/owlv2/test_modeling_owlv2.py @@ -475,7 +475,7 @@ def test_retain_grad_hidden_states_attentions(self): def test_model_get_set_embeddings(self): pass - # override as the `logit_scale` parameter initilization is different for OWLV2 + # override as the `logit_scale` parameter initialization is different for OWLV2 def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -484,7 +484,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/owlvit/test_modeling_owlvit.py b/tests/models/owlvit/test_modeling_owlvit.py index 315cdf813a6c..1ad85cb37919 100644 --- a/tests/models/owlvit/test_modeling_owlvit.py +++ b/tests/models/owlvit/test_modeling_owlvit.py @@ -470,7 +470,7 @@ def test_retain_grad_hidden_states_attentions(self): def test_model_get_set_embeddings(self): pass - # override as the `logit_scale` parameter initilization is different for OWLVIT + # override as the `logit_scale` parameter initialization is different for OWLVIT def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -479,7 +479,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py index 744788cf6447..4867e38acb68 100644 --- a/tests/models/persimmon/test_modeling_persimmon.py +++ b/tests/models/persimmon/test_modeling_persimmon.py @@ -424,7 +424,9 @@ def test_model_rope_scaling(self): long_input_length = int(config.max_position_embeddings * 1.5) # Inputs - x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device + x = torch.randn( + 1, dtype=torch.float32, device=torch_device + ) # used exclusively to get the dtype and the device position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device) position_ids_short = position_ids_short.unsqueeze(0) position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device) diff --git a/tests/models/phi/test_modeling_phi.py b/tests/models/phi/test_modeling_phi.py index 9b7d44ca1cb7..5fe681d99593 100644 --- a/tests/models/phi/test_modeling_phi.py +++ b/tests/models/phi/test_modeling_phi.py @@ -403,7 +403,9 @@ def test_model_rope_scaling(self): long_input_length = int(config.max_position_embeddings * 1.5) # Inputs - x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device + x = torch.randn( + 1, dtype=torch.float32, device=torch_device + ) # used exclusively to get the dtype and the device position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device) position_ids_short = position_ids_short.unsqueeze(0) position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device) diff --git a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py index 4b5bb61eb8f9..3e4f02626d5f 100644 --- a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py +++ b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py @@ -263,7 +263,7 @@ def test_mismatching_num_image_tokens(self): config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config).to(torch_device) - _ = model(**input_dict) # successfull forward with no modifications + _ = model(**input_dict) # successful forward with no modifications # remove one image but leave the image token in text patch_size = config.vision_config.patch_size diff --git a/tests/models/reformer/test_modeling_reformer.py b/tests/models/reformer/test_modeling_reformer.py index 235db79a9dbe..b4e493dd2c1d 100644 --- a/tests/models/reformer/test_modeling_reformer.py +++ b/tests/models/reformer/test_modeling_reformer.py @@ -696,7 +696,7 @@ def test_left_padding_compatibility(self): pass def prepare_config_and_inputs_for_generate(self, *args, **kwargs): - # override because overwise we hit max possible seq length for model (4*8=32) + # override because otherwise we hit max possible seq length for model (4*8=32) # decreasing the seq_length in tester causes errors for "training_tests", those need exactly max seq length # NOTE: seq_length has to be multiple of 4, otherwise it fails for other tests original_sequence_length = self.model_tester.seq_length @@ -887,7 +887,7 @@ def test_left_padding_compatibility(self): @require_tokenizers class ReformerIntegrationTests(unittest.TestCase): """ - These integration tests test the current layer activations and gradients againts the output of the Hugging Face Reformer model at time of integration: 29/06/2020. During integration, the model was tested against the output of the official Trax ReformerLM model for various cases ("lsh" only, "lsh" only, masked / non-masked, different chunk length, ....). In order to recover the original trax integration tests, one should use patrickvonplaten's fork of trax and the code that lives on the branch `reformer_trax_tests`. + These integration tests test the current layer activations and gradients against the output of the Hugging Face Reformer model at time of integration: 29/06/2020. During integration, the model was tested against the output of the official Trax ReformerLM model for various cases ("lsh" only, "lsh" only, masked / non-masked, different chunk length, ....). In order to recover the original trax integration tests, one should use patrickvonplaten's fork of trax and the code that lives on the branch `reformer_trax_tests`. """ def _get_basic_config_and_input(self): @@ -1246,7 +1246,7 @@ def test_local_lm_model_grad(self): ) loss.backward() - # check last grads to cover all proable errors + # check last grads to cover all probable errors grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5] expected_grad_slice_word = torch.tensor( [-0.0005, -0.0001, -0.0002, -0.0006, -0.0006], @@ -1287,7 +1287,7 @@ def test_lsh_lm_model_grad(self): loss, torch.tensor(5.7854, dtype=torch.float, device=torch_device), rtol=1e-3, atol=1e-3 ) loss.backward() - # check last grads to cover all proable errors + # check last grads to cover all probable errors grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5] expected_grad_slice_word = torch.tensor( [0.0004, 0.0003, 0.0006, -0.0004, 0.0002], diff --git a/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py b/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py index d5388cf41a99..e8af79ca7baf 100644 --- a/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py +++ b/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py @@ -545,10 +545,10 @@ def test_different_timm_backbone(self): self.model_tester.num_labels, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.intermediate_channel_sizes), 3) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) self.assertTrue(outputs) @@ -577,10 +577,10 @@ def test_hf_backbone(self): self.model_tester.num_labels, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.intermediate_channel_sizes), 3) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) self.assertTrue(outputs) diff --git a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py index 25bbc1c3040d..c30ebcc87fcd 100644 --- a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py +++ b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py @@ -1017,7 +1017,7 @@ def test_to_eng_text(self): output = model.generate(**self.input_text, num_beams=1, tgt_lang="eng", return_intermediate_token_ids=True) self.assertListEqual(expected_text_tokens, output.sequences.squeeze().tolist()) - # FOR NOW, only first units correspondance + # FOR NOW, only first units correspondence self.assertListEqual(expected_unit_tokens[:10], output.unit_sequences.squeeze().tolist()[:10]) self.assertListAlmostEqual(expected_wav_slice, output.waveform.squeeze().tolist()[50:60]) diff --git a/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py b/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py index 897d4b056f19..399a111530d1 100644 --- a/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py +++ b/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py @@ -266,7 +266,10 @@ def check_save_and_load_encoder_decoder_model( out_2 = outputs[0].cpu().numpy() out_2[np.isnan(out_2)] = 0 - with tempfile.TemporaryDirectory() as encoder_tmp_dirname, tempfile.TemporaryDirectory() as decoder_tmp_dirname: + with ( + tempfile.TemporaryDirectory() as encoder_tmp_dirname, + tempfile.TemporaryDirectory() as decoder_tmp_dirname, + ): enc_dec_model.encoder.save_pretrained(encoder_tmp_dirname) enc_dec_model.decoder.save_pretrained(decoder_tmp_dirname) SpeechEncoderDecoderModel.from_encoder_decoder_pretrained( diff --git a/tests/models/stablelm/test_modeling_stablelm.py b/tests/models/stablelm/test_modeling_stablelm.py index 946b220e0ea9..14af6d5f7275 100644 --- a/tests/models/stablelm/test_modeling_stablelm.py +++ b/tests/models/stablelm/test_modeling_stablelm.py @@ -409,7 +409,9 @@ def test_model_rope_scaling(self): long_input_length = int(config.max_position_embeddings * 1.5) # Inputs - x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device + x = torch.randn( + 1, dtype=torch.float32, device=torch_device + ) # used exclusively to get the dtype and the device position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device) position_ids_short = position_ids_short.unsqueeze(0) position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device) diff --git a/tests/models/t5/test_modeling_flax_t5.py b/tests/models/t5/test_modeling_flax_t5.py index dc372c57694d..703b9973cb70 100644 --- a/tests/models/t5/test_modeling_flax_t5.py +++ b/tests/models/t5/test_modeling_flax_t5.py @@ -574,7 +574,7 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase): @slow def test_small_integration_test(self): """ - For comparision run: + For comparison run: >>> import t5 # pip install t5==0.7.1 >>> from t5.data.sentencepiece_vocabulary import SentencePieceVocabulary @@ -604,7 +604,7 @@ def test_small_integration_test(self): @slow def test_small_v1_1_integration_test(self): """ - For comparision run: + For comparison run: >>> import t5 # pip install t5==0.7.1 >>> from t5.data.sentencepiece_vocabulary import SentencePieceVocabulary @@ -634,7 +634,7 @@ def test_small_v1_1_integration_test(self): @slow def test_small_byt5_integration_test(self): """ - For comparision run: + For comparison run: >>> import t5 # pip install t5==0.9.1 >>> path_to_byt5_small_checkpoint = '' diff --git a/tests/models/t5/test_modeling_t5.py b/tests/models/t5/test_modeling_t5.py index 03a6adb1a916..fb807edc0bc7 100644 --- a/tests/models/t5/test_modeling_t5.py +++ b/tests/models/t5/test_modeling_t5.py @@ -1208,7 +1208,7 @@ def test_small_generation(self): @slow def test_small_integration_test(self): """ - For comparision run: + For comparison run: >>> import t5 # pip install t5==0.7.1 >>> from t5.data.sentencepiece_vocabulary import SentencePieceVocabulary @@ -1234,7 +1234,7 @@ def test_small_integration_test(self): @slow def test_small_v1_1_integration_test(self): """ - For comparision run: + For comparison run: >>> import t5 # pip install t5==0.7.1 >>> from t5.data.sentencepiece_vocabulary import SentencePieceVocabulary @@ -1260,7 +1260,7 @@ def test_small_v1_1_integration_test(self): @slow def test_small_byt5_integration_test(self): """ - For comparision run: + For comparison run: >>> import t5 # pip install t5==0.9.1 >>> path_to_byt5_small_checkpoint = '' diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py index 037f1b1e2188..7e6367582ead 100644 --- a/tests/models/t5/test_modeling_tf_t5.py +++ b/tests/models/t5/test_modeling_tf_t5.py @@ -618,7 +618,7 @@ def model(self): @slow def test_small_integration_test(self): """ - For comparision run: + For comparison run: >>> import t5 # pip install t5==0.7.1 >>> from t5.data.sentencepiece_vocabulary import SentencePieceVocabulary @@ -644,7 +644,7 @@ def test_small_integration_test(self): @slow def test_small_v1_1_integration_test(self): """ - For comparision run: + For comparison run: >>> import t5 # pip install t5==0.7.1 >>> from t5.data.sentencepiece_vocabulary import SentencePieceVocabulary @@ -670,7 +670,7 @@ def test_small_v1_1_integration_test(self): @slow def test_small_byt5_integration_test(self): """ - For comparision run: + For comparison run: >>> import t5 # pip install t5==0.9.1 >>> path_to_byt5_small_checkpoint = '' diff --git a/tests/models/t5/test_tokenization_t5.py b/tests/models/t5/test_tokenization_t5.py index e64882b6d3fc..a7ad5320af2f 100644 --- a/tests/models/t5/test_tokenization_t5.py +++ b/tests/models/t5/test_tokenization_t5.py @@ -498,7 +498,7 @@ def setUpClass(cls): tokenizer.add_special_tokens( {"additional_special_tokens": [AddedToken("", rstrip=False, lstrip=False)]} ) - # TODO ArthurZ the above is necessary as addedTokens / intialization sucks. Trie is not correctly created + # TODO ArthurZ the above is necessary as addedTokens / initialization sucks. Trie is not correctly created # So the extra ids are split.... cls.tokenizer = tokenizer diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py index aa4b7131f949..9995aae7d4b4 100644 --- a/tests/models/table_transformer/test_modeling_table_transformer.py +++ b/tests/models/table_transformer/test_modeling_table_transformer.py @@ -477,10 +477,10 @@ def test_different_timm_backbone(self): self.model_tester.num_labels + 1, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3) self.assertTrue(outputs) @@ -509,10 +509,10 @@ def test_hf_backbone(self): self.model_tester.num_labels + 1, ) self.assertEqual(outputs.logits.shape, expected_shape) - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3) else: - # Confirm out_indices was propogated to backbone + # Confirm out_indices was propagated to backbone self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3) self.assertTrue(outputs) diff --git a/tests/models/tvp/test_modeling_tvp.py b/tests/models/tvp/test_modeling_tvp.py index 6ab0dffde4eb..3ac01c53d52b 100644 --- a/tests/models/tvp/test_modeling_tvp.py +++ b/tests/models/tvp/test_modeling_tvp.py @@ -194,7 +194,7 @@ def test_inputs_embeds(self): def test_model_get_set_embeddings(self): pass - # override as the `logit_scale` parameter initilization is different for TVP + # override as the `logit_scale` parameter initialization is different for TVP def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -219,7 +219,7 @@ def _validate_backbone_init(): model.to(torch_device) model.eval() - # Confirm out_indices propogated to backbone + # Confirm out_indices propagated to backbone if model.__class__.__name__ == "TvpModel": self.assertEqual(len(model.vision_model.backbone.out_indices), 2) elif model.__class__.__name__ == "TvpForVideoGrounding": diff --git a/tests/models/univnet/test_modeling_univnet.py b/tests/models/univnet/test_modeling_univnet.py index 9a7ade715527..003c63a3e640 100644 --- a/tests/models/univnet/test_modeling_univnet.py +++ b/tests/models/univnet/test_modeling_univnet.py @@ -227,7 +227,7 @@ def get_inputs(self, device, num_samples: int = 3, noise_length: int = 10, seed: noise_sequence_shape = (64, noise_length) else: noise_sequence_shape = (num_samples, 64, noise_length) - # Explicity generate noise_sequence on CPU for consistency. + # Explicitly generate noise_sequence on CPU for consistency. noise_sequence = torch.randn(noise_sequence_shape, generator=generator, dtype=torch.float32, device="cpu") # Put noise_sequence on the desired device. noise_sequence = noise_sequence.to(device) diff --git a/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py b/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py index a680e504cd63..317048550e5c 100644 --- a/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py +++ b/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py @@ -216,7 +216,10 @@ def check_save_and_load_encoder_decoder_model( out_2 = outputs[0].cpu().numpy() out_2[np.isnan(out_2)] = 0 - with tempfile.TemporaryDirectory() as encoder_tmp_dirname, tempfile.TemporaryDirectory() as decoder_tmp_dirname: + with ( + tempfile.TemporaryDirectory() as encoder_tmp_dirname, + tempfile.TemporaryDirectory() as decoder_tmp_dirname, + ): enc_dec_model.encoder.save_pretrained(encoder_tmp_dirname) enc_dec_model.decoder.save_pretrained(decoder_tmp_dirname) VisionEncoderDecoderModel.from_encoder_decoder_pretrained( diff --git a/tests/models/vitmatte/test_modeling_vitmatte.py b/tests/models/vitmatte/test_modeling_vitmatte.py index 5abbc774135d..b75cfc886c0c 100644 --- a/tests/models/vitmatte/test_modeling_vitmatte.py +++ b/tests/models/vitmatte/test_modeling_vitmatte.py @@ -244,7 +244,7 @@ def _validate_backbone_init(): model.eval() if model.__class__.__name__ == "VitMatteForImageMatting": - # Confirm out_indices propogated to backbone + # Confirm out_indices propagated to backbone self.assertEqual(len(model.backbone.out_indices), 2) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py b/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py index 3dadab891e52..aa55557691b5 100644 --- a/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py @@ -616,9 +616,10 @@ def test_wav2vec2_with_lm_pool(self): self.assertEqual(transcription[0], "bien y qué regalo vas a abrir primero") # user-managed pool + num_processes should trigger a warning - with CaptureLogger(processing_wav2vec2_with_lm.logger) as cl, multiprocessing.get_context("fork").Pool( - 2 - ) as pool: + with ( + CaptureLogger(processing_wav2vec2_with_lm.logger) as cl, + multiprocessing.get_context("fork").Pool(2) as pool, + ): transcription = processor.batch_decode(np.array(logits), pool, num_processes=2).text self.assertIn("num_process", cl.out) diff --git a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py index 7508e4fc01fb..593d627ccf97 100644 --- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py @@ -712,9 +712,10 @@ def test_wav2vec2_with_lm_pool(self): self.assertEqual(transcription[0], "el libro ha sido escrito por cervantes") # user-managed pool + num_processes should trigger a warning - with CaptureLogger(processing_wav2vec2_with_lm.logger) as cl, multiprocessing.get_context("fork").Pool( - 2 - ) as pool: + with ( + CaptureLogger(processing_wav2vec2_with_lm.logger) as cl, + multiprocessing.get_context("fork").Pool(2) as pool, + ): transcription = processor.batch_decode(logits.numpy(), pool, num_processes=2).text self.assertIn("num_process", cl.out) diff --git a/tests/models/wav2vec2/test_modeling_wav2vec2.py b/tests/models/wav2vec2/test_modeling_wav2vec2.py index 8199ba04e1f8..df5fd7c452bf 100644 --- a/tests/models/wav2vec2/test_modeling_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py @@ -1885,9 +1885,10 @@ def test_wav2vec2_with_lm_pool(self): self.assertEqual(transcription[0], "habitan aguas poco profundas y rocosas") # user-managed pool + num_processes should trigger a warning - with CaptureLogger(processing_wav2vec2_with_lm.logger) as cl, multiprocessing.get_context("fork").Pool( - 2 - ) as pool: + with ( + CaptureLogger(processing_wav2vec2_with_lm.logger) as cl, + multiprocessing.get_context("fork").Pool(2) as pool, + ): transcription = processor.batch_decode(logits.cpu().numpy(), pool, num_processes=2).text self.assertIn("num_process", cl.out) diff --git a/tests/models/x_clip/test_modeling_x_clip.py b/tests/models/x_clip/test_modeling_x_clip.py index 85265589050a..ac402d2ff9ca 100644 --- a/tests/models/x_clip/test_modeling_x_clip.py +++ b/tests/models/x_clip/test_modeling_x_clip.py @@ -588,7 +588,7 @@ def test_initialization(self): model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation + # check if `logit_scale` is initialized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), diff --git a/tests/models/xglm/test_modeling_tf_xglm.py b/tests/models/xglm/test_modeling_tf_xglm.py index 7ddab81a2611..56094003279e 100644 --- a/tests/models/xglm/test_modeling_tf_xglm.py +++ b/tests/models/xglm/test_modeling_tf_xglm.py @@ -208,7 +208,7 @@ def test_batch_generation(self): # use different length sentences to test batching sentences = [ - "This is an extremelly long sentence that only exists to test the ability of the model to cope with " + "This is an extremely long sentence that only exists to test the ability of the model to cope with " "left-padding, such as in batched generation. The output for the sequence below should be the same " "regardless of whether left padding is applied or not. When", "Hello, my dog is a little", @@ -230,7 +230,7 @@ def test_batch_generation(self): padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) expected_output_sentence = [ - "This is an extremelly long sentence that only exists to test the ability of the model to cope with " + "This is an extremely long sentence that only exists to test the ability of the model to cope with " "left-padding, such as in batched generation. The output for the sequence below should be the same " "regardless of whether left padding is applied or not. When left padding is applied, the sequence will be " "a single", diff --git a/tests/models/xglm/test_modeling_xglm.py b/tests/models/xglm/test_modeling_xglm.py index 88f3c13497eb..31c298132e14 100644 --- a/tests/models/xglm/test_modeling_xglm.py +++ b/tests/models/xglm/test_modeling_xglm.py @@ -371,7 +371,7 @@ def test_batch_generation(self): # use different length sentences to test batching sentences = [ - "This is an extremelly long sentence that only exists to test the ability of the model to cope with " + "This is an extremely long sentence that only exists to test the ability of the model to cope with " "left-padding, such as in batched generation. The output for the sequence below should be the same " "regardless of whether left padding is applied or not. When", "Hello, my dog is a little", @@ -395,7 +395,7 @@ def test_batch_generation(self): padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) expected_output_sentence = [ - "This is an extremelly long sentence that only exists to test the ability of the model to cope with " + "This is an extremely long sentence that only exists to test the ability of the model to cope with " "left-padding, such as in batched generation. The output for the sequence below should be the same " "regardless of whether left padding is applied or not. When left padding is applied, the sequence will be " "a single", diff --git a/tests/sagemaker/scripts/pytorch/run_ddp.py b/tests/sagemaker/scripts/pytorch/run_ddp.py index 1191caeb96a2..474285841b0d 100644 --- a/tests/sagemaker/scripts/pytorch/run_ddp.py +++ b/tests/sagemaker/scripts/pytorch/run_ddp.py @@ -36,12 +36,12 @@ def main(): --master_addr={hosts[0]} \ --master_port={port} \ ./run_glue.py \ - {"".join([f" --{parameter} {value}" for parameter,value in args.__dict__.items()])}""" + {"".join([f" --{parameter} {value}" for parameter, value in args.__dict__.items()])}""" else: cmd = f"""python -m torch.distributed.launch \ --nproc_per_node={num_gpus} \ ./run_glue.py \ - {"".join([f" --{parameter} {value}" for parameter,value in args.__dict__.items()])}""" + {"".join([f" --{parameter} {value}" for parameter, value in args.__dict__.items()])}""" try: subprocess.run(cmd, shell=True) except Exception as e: diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index f5723d58320f..eba94a45c927 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -2065,21 +2065,21 @@ def test_encode_decode_fast_slow_all_tokens(self): for chunk in range(0, len(input_full_vocab_string) - 1024, 1024): string_to_check = input_full_vocab_string[chunk : chunk + 1024] - with self.subTest(f"{(chunk/len(input_full_vocab_string))*100}%"): + with self.subTest(f"{(chunk / len(input_full_vocab_string)) * 100}%"): slow_encode = slow_tokenizer.encode(string_to_check) fast_encode = rust_tokenizer.encode(string_to_check) self.assertEqual( slow_encode, fast_encode, "Hint: the following tokenization diff were obtained for slow vs fast:\n " - f"elements in slow: {set(slow_tokenizer.tokenize(string_to_check))-set(rust_tokenizer.tokenize(string_to_check))} \nvs\n " - f"elements in fast: {set(rust_tokenizer.tokenize(string_to_check))-set(slow_tokenizer.tokenize(string_to_check))} \n" + f"elements in slow: {set(slow_tokenizer.tokenize(string_to_check)) - set(rust_tokenizer.tokenize(string_to_check))} \nvs\n " + f"elements in fast: {set(rust_tokenizer.tokenize(string_to_check)) - set(slow_tokenizer.tokenize(string_to_check))} \n" f"string used : {string_to_check}", ) print(f"Length of the input ids that is tested: {len(input_full_vocab_ids)}") for chunk in range(0, len(input_full_vocab_ids) - 100, 100): ids_to_decode = input_full_vocab_ids[chunk : chunk + 100] - with self.subTest(f"{(chunk/len(input_full_vocab_string))*100}%"): + with self.subTest(f"{(chunk / len(input_full_vocab_string)) * 100}%"): self.assertEqual( slow_tokenizer.decode( ids_to_decode, @@ -4423,7 +4423,7 @@ def test_training_new_tokenizer_with_special_tokens_change(self): self.assertTrue( find, f"'{special_token.__repr__()}' should appear as an `AddedToken` in the all_special_tokens_extended = " - f"{[k for k in new_tokenizer.all_special_tokens_extended if str(k)==new_special_token_str]} but it is missing" + f"{[k for k in new_tokenizer.all_special_tokens_extended if str(k) == new_special_token_str]} but it is missing" ", this means that the new tokenizers did not keep the `rstrip`, `lstrip`, `normalized` etc attributes.", ) elif special_token not in special_tokens_map: diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 254202142565..6ea5d785231c 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -3200,7 +3200,7 @@ def test_can_resume_training_lm(self): # Checkpoint at intermediate step enable_full_determinism(0) - checkpoint = os.path.join(tmpdir, f"checkpoint-{resume_from_step+1}") + checkpoint = os.path.join(tmpdir, f"checkpoint-{resume_from_step + 1}") trainer = get_language_model_trainer(**kwargs) trainer.train(resume_from_checkpoint=checkpoint) model_params = torch.cat([p.cpu().flatten() for p in trainer.model.parameters()])