huggingface · amyeroberts · Apr 24, 2024 · May 1, 2024 · May 1, 2024 · amyeroberts
diff --git a/tests/models/align/test_modeling_align.py b/tests/models/align/test_modeling_align.py
@@ -482,33 +482,28 @@ def test_model_common_attributes(self):
  pass
 
  # override as the `temperature` parameter initilization is different for ALIGN
- def test_initialization(self):
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
- configs_no_init = _config_zero_init(config)
- for model_class in self.all_model_classes:
- model = model_class(config=configs_no_init)
- for name, param in model.named_parameters():
- if param.requires_grad:
- # check if `temperature` is initilized as per the original implementation
- if name == "temperature":
- self.assertAlmostEqual(
- param.data.item(),
- 1.0,
- delta=1e-3,
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
- elif name == "text_projection.weight":
- self.assertTrue(
- -1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
- else:
- self.assertIn(
- ((param.data.mean() * 1e9).round() / 1e9).item(),
- [0.0, 1.0],
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
+ def _test_models_weight_initialization(self, config, model_class, model):
+ for name, param in model.named_parameters():
+ if param.requires_grad:
+ # check if `temperature` is initilized as per the original implementation
+ if name == "temperature":
+ self.assertAlmostEqual(
+ param.data.item(),
+ 1.0,
+ delta=1e-3,
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
+ elif name == "text_projection.weight":
+ self.assertTrue(
+ -1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
+ else:
+ self.assertIn(
+ ((param.data.mean() * 1e9).round() / 1e9).item(),
+ [0.0, 1.0],
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
 
  def _create_and_check_torchscript(self, config, inputs_dict):
  if not self.test_torchscript:

diff --git a/tests/models/altclip/test_modeling_altclip.py b/tests/models/altclip/test_modeling_altclip.py
@@ -464,27 +464,23 @@ def test_model_common_attributes(self):
  pass
 
  # override as the `logit_scale` parameter initilization is different for AltCLIP
- def test_initialization(self):
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
- configs_no_init = _config_zero_init(config)
- for model_class in self.all_model_classes:
- model = model_class(config=configs_no_init)
- for name, param in model.named_parameters():
- if param.requires_grad:
- # check if `logit_scale` is initilized as per the original implementation
- if name == "logit_scale":
- self.assertAlmostEqual(
- param.data.item(),
- np.log(1 / 0.07),
- delta=1e-3,
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
- else:
- self.assertIn(
- ((param.data.mean() * 1e9).round() / 1e9).item(),
- [0.0, 1.0],
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
+ def _test_models_weight_initialization(self, config, model_class, model):
+ for name, param in model.named_parameters():
+ if param.requires_grad:
+ # check if `logit_scale` is initilized as per the original implementation
+ if name == "logit_scale":
+ self.assertAlmostEqual(
+ param.data.item(),
+ np.log(1 / 0.07),
+ delta=1e-3,
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
+ else:
+ self.assertIn(
+ ((param.data.mean() * 1e9).round() / 1e9).item(),
+ [0.0, 1.0],
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
 
  def _create_and_check_torchscript(self, config, inputs_dict):
  if not self.test_torchscript:

diff --git a/tests/models/beit/test_modeling_beit.py b/tests/models/beit/test_modeling_beit.py
@@ -26,7 +26,7 @@
 
 from ...test_backbone_common import BackboneTesterMixin
 from ...test_configuration_common import ConfigTester
-from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
+from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
 from ...test_pipeline_mixin import PipelineTesterMixin
 
 
@@ -364,23 +364,18 @@ def test_training_gradient_checkpointing_use_reentrant(self):
  def test_training_gradient_checkpointing_use_reentrant_false(self):
  pass
 
- def test_initialization(self):
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
- configs_no_init = _config_zero_init(config)
- for model_class in self.all_model_classes:
- model = model_class(config=configs_no_init)
- for name, param in model.named_parameters():
- # we skip lambda parameters as these require special initial values
- # determined by config.layer_scale_init_value
- if "lambda" in name:
- continue
- if param.requires_grad:
- self.assertIn(
- ((param.data.mean() * 1e9).round() / 1e9).item(),
- [0.0, 1.0],
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
+ def _test_models_weight_initialization(self, config, model_class, model):
+ for name, param in model.named_parameters():
+ # we skip lambda parameters as these require special initial values
+ # determined by config.layer_scale_init_value
+ if "lambda" in name:
+ continue
+ if param.requires_grad:
+ self.assertIn(
+ ((param.data.mean() * 1e9).round() / 1e9).item(),
+ [0.0, 1.0],
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
 
  @slow
  def test_model_from_pretrained(self):

diff --git a/tests/models/bit/test_modeling_bit.py b/tests/models/bit/test_modeling_bit.py
@@ -208,21 +208,17 @@ def test_backbone(self):
  config_and_inputs = self.model_tester.prepare_config_and_inputs()
  self.model_tester.create_and_check_backbone(*config_and_inputs)
 
- def test_initialization(self):
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
- for model_class in self.all_model_classes:
- model = model_class(config=config)
- for name, module in model.named_modules():
- if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
- self.assertTrue(
- torch.all(module.weight == 1),
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
- self.assertTrue(
- torch.all(module.bias == 0),
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
+ def _test_models_weight_initialization(self, config, model_class, model):
+ for name, module in model.named_modules():
+ if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
+ self.assertTrue(
+ torch.all(module.weight == 1),
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
+ self.assertTrue(
+ torch.all(module.bias == 0),
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
 
  def test_hidden_states_output(self):
  def check_hidden_states_output(inputs_dict, config, model_class):

diff --git a/tests/models/blip/test_modeling_blip.py b/tests/models/blip/test_modeling_blip.py
@@ -467,28 +467,23 @@ def test_model_common_attributes(self):
  pass
 
  # override as the `logit_scale` parameter initilization is different for Blip
- def test_initialization(self):
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
- configs_no_init = _config_zero_init(config)
- for model_class in self.all_model_classes:
- model = model_class(config=configs_no_init)
- for name, param in model.named_parameters():
- if param.requires_grad:
- # check if `logit_scale` is initilized as per the original implementation
- if name == "logit_scale":
- self.assertAlmostEqual(
- param.data.item(),
- np.log(1 / 0.07),
- delta=1e-3,
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
- else:
- self.assertIn(
- ((param.data.mean() * 1e9).round() / 1e9).item(),
- [0.0, 1.0],
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
+ def _test_models_weight_initialization(self, config, model_class, model):
+ for name, param in model.named_parameters():
+ if param.requires_grad:
+ # check if `logit_scale` is initilized as per the original implementation
+ if name == "logit_scale":
+ self.assertAlmostEqual(
+ param.data.item(),
+ np.log(1 / 0.07),
+ delta=1e-3,
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
+ else:
+ self.assertIn(
+ ((param.data.mean() * 1e9).round() / 1e9).item(),
+ [0.0, 1.0],
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
 
  def _create_and_check_torchscript(self, config, inputs_dict):
  if not self.test_torchscript:
@@ -927,28 +922,23 @@ def test_training_gradient_checkpointing_use_reentrant_false(self):
  pass
 
  # override as the `logit_scale` parameter initilization is different for Blip
- def test_initialization(self):
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
- configs_no_init = _config_zero_init(config)
- for model_class in self.all_model_classes:
- model = model_class(config=configs_no_init)
- for name, param in model.named_parameters():
- if param.requires_grad:
- # check if `logit_scale` is initilized as per the original implementation
- if name == "logit_scale":
- self.assertAlmostEqual(
- param.data.item(),
- np.log(1 / 0.07),
- delta=1e-3,
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
- else:
- self.assertIn(
- ((param.data.mean() * 1e9).round() / 1e9).item(),
- [0.0, 1.0],
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
+ def _test_models_weight_initialization(self, config, model_class, model):
+ for name, param in model.named_parameters():
+ if param.requires_grad:
+ # check if `logit_scale` is initilized as per the original implementation
+ if name == "logit_scale":
+ self.assertAlmostEqual(
+ param.data.item(),
+ np.log(1 / 0.07),
+ delta=1e-3,
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
+ else:
+ self.assertIn(
+ ((param.data.mean() * 1e9).round() / 1e9).item(),
+ [0.0, 1.0],
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
 
  def _create_and_check_torchscript(self, config, inputs_dict):
  if not self.test_torchscript:
@@ -1143,28 +1133,23 @@ def test_training_gradient_checkpointing(self):
  loss.backward()
 
  # override as the `logit_scale` parameter initilization is different for Blip
- def test_initialization(self):
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
- configs_no_init = _config_zero_init(config)
- for model_class in self.all_model_classes:
- model = model_class(config=configs_no_init)
- for name, param in model.named_parameters():
- if param.requires_grad:
- # check if `logit_scale` is initilized as per the original implementation
- if name == "logit_scale":
- self.assertAlmostEqual(
- param.data.item(),
- np.log(1 / 0.07),
- delta=1e-3,
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
- else:
- self.assertIn(
- ((param.data.mean() * 1e9).round() / 1e9).item(),
- [0.0, 1.0],
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
+ def _test_models_weight_initialization(self, config, model_class, model):
+ for name, param in model.named_parameters():
+ if param.requires_grad:
+ # check if `logit_scale` is initilized as per the original implementation
+ if name == "logit_scale":
+ self.assertAlmostEqual(
+ param.data.item(),
+ np.log(1 / 0.07),
+ delta=1e-3,
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
+ else:
+ self.assertIn(
+ ((param.data.mean() * 1e9).round() / 1e9).item(),
+ [0.0, 1.0],
+ msg=f"Parameter {name} of model {model_class} seems not properly initialized",
+ )
 
  def _create_and_check_torchscript(self, config, inputs_dict):
  if not self.test_torchscript:

diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py
@@ -16,6 +16,7 @@
 
 
 import inspect
+import os
 import tempfile
 import unittest
 
@@ -30,7 +31,7 @@
  slow,
  torch_device,
 )
-from transformers.utils import is_torch_available, is_vision_available
+from transformers.utils import is_safetensors_available, is_torch_available, is_vision_available
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -56,6 +57,9 @@
 
  from transformers import Blip2Processor
 
+if is_safetensors_available():
+ from safetensors.torch import save_file
+
 
 class Blip2VisionModelTester:
  def __init__(
@@ -830,13 +834,28 @@ def test_initialization(self):
  setattr(configs_no_init, key, _config_zero_init(getattr(configs_no_init, key)))
  for model_class in self.all_model_classes:
  model = model_class(config=configs_no_init)
- for name, param in model.named_parameters():
- if param.requires_grad:
- self.assertIn(
- ((param.data.mean() * 1e9).round() / 1e9).item(),
- [0.0, 1.0],
- msg=f"Parameter {name} of model {model_class} seems not properly initialized",
- )
+ self._test_models_weight_initialization(configs_no_init, model_class, model)
+
+ # override from common to deal with nested configurations (`vision_config`, `text_config` and `qformer_config`)
+ def test_initialization_from_pretrained(self):
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+ configs_no_init = _config_zero_init(config)
+ for key in ["vision_config", "qformer_config", "text_config"]:
+ setattr(configs_no_init, key, _config_zero_init(getattr(configs_no_init, key)))
+
+ # We create a dummy state dict - to ensure all the parameters are initialized in the from_pretrained method
+ # We have to add a dummy key to the state dict to allow it to be loaded.
+ # See: https://github.com/huggingface/safetensors/pull/472 which enables saving empty state dicts with metadata
+ dummy_state_dict = {"dummy": torch.empty(1)}
+
+ with tempfile.TemporaryDirectory() as tmp_dir_name:
+ config.save_pretrained(tmp_dir_name)
+ save_file(dummy_state_dict, os.path.join(tmp_dir_name, "model.safetensors"), metadata={"format": "pt"})
+
+ for model_class in self.all_model_classes:
+ model = model_class.from_pretrained(tmp_dir_name)
+ self._test_models_weight_initialization(configs_no_init, model_class, model)
 
 
 # We will verify our results on an image of cute cats