Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor initialization tests to check init from from_pretrained #30451

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
49 changes: 22 additions & 27 deletions tests/models/align/test_modeling_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,33 +482,28 @@ def test_model_common_attributes(self):
pass

# override as the `temperature` parameter initilization is different for ALIGN
def test_initialization(self):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion for reviewer to look at the changes in test_modeling_common.py as this will provide more context for the changes in the other model's tests :)

config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `temperature` is initilized as per the original implementation
if name == "temperature":
self.assertAlmostEqual(
param.data.item(),
1.0,
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
elif name == "text_projection.weight":
self.assertTrue(
-1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
def _test_models_weight_initialization(self, config, model_class, model):
for name, param in model.named_parameters():
if param.requires_grad:
# check if `temperature` is initilized as per the original implementation
if name == "temperature":
self.assertAlmostEqual(
param.data.item(),
1.0,
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
elif name == "text_projection.weight":
self.assertTrue(
-1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
Expand Down
38 changes: 17 additions & 21 deletions tests/models/altclip/test_modeling_altclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,27 +464,23 @@ def test_model_common_attributes(self):
pass

# override as the `logit_scale` parameter initilization is different for AltCLIP
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initilized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
def _test_models_weight_initialization(self, config, model_class, model):
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initilized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
Expand Down
31 changes: 13 additions & 18 deletions tests/models/beit/test_modeling_beit.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from ...test_backbone_common import BackboneTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin


Expand Down Expand Up @@ -364,23 +364,18 @@ def test_training_gradient_checkpointing_use_reentrant(self):
def test_training_gradient_checkpointing_use_reentrant_false(self):
pass

def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
# we skip lambda parameters as these require special initial values
# determined by config.layer_scale_init_value
if "lambda" in name:
continue
if param.requires_grad:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
def _test_models_weight_initialization(self, config, model_class, model):
for name, param in model.named_parameters():
# we skip lambda parameters as these require special initial values
# determined by config.layer_scale_init_value
if "lambda" in name:
continue
if param.requires_grad:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

@slow
def test_model_from_pretrained(self):
Expand Down
26 changes: 11 additions & 15 deletions tests/models/bit/test_modeling_bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,21 +208,17 @@ def test_backbone(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_backbone(*config_and_inputs)

def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

for model_class in self.all_model_classes:
model = model_class(config=config)
for name, module in model.named_modules():
if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
self.assertTrue(
torch.all(module.weight == 1),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
self.assertTrue(
torch.all(module.bias == 0),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
def _test_models_weight_initialization(self, config, model_class, model):
for name, module in model.named_modules():
if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
self.assertTrue(
torch.all(module.weight == 1),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
self.assertTrue(
torch.all(module.bias == 0),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
Expand Down
117 changes: 51 additions & 66 deletions tests/models/blip/test_modeling_blip.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,28 +467,23 @@ def test_model_common_attributes(self):
pass

# override as the `logit_scale` parameter initilization is different for Blip
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initilized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
def _test_models_weight_initialization(self, config, model_class, model):
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initilized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
Expand Down Expand Up @@ -927,28 +922,23 @@ def test_training_gradient_checkpointing_use_reentrant_false(self):
pass

# override as the `logit_scale` parameter initilization is different for Blip
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initilized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
def _test_models_weight_initialization(self, config, model_class, model):
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initilized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
Expand Down Expand Up @@ -1143,28 +1133,23 @@ def test_training_gradient_checkpointing(self):
loss.backward()

# override as the `logit_scale` parameter initilization is different for Blip
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initilized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
def _test_models_weight_initialization(self, config, model_class, model):
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initilized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
Expand Down
35 changes: 27 additions & 8 deletions tests/models/blip_2/test_modeling_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@


import inspect
import os
import tempfile
import unittest

Expand All @@ -30,7 +31,7 @@
slow,
torch_device,
)
from transformers.utils import is_torch_available, is_vision_available
from transformers.utils import is_safetensors_available, is_torch_available, is_vision_available

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
Expand All @@ -56,6 +57,9 @@

from transformers import Blip2Processor

if is_safetensors_available():
from safetensors.torch import save_file


class Blip2VisionModelTester:
def __init__(
Expand Down Expand Up @@ -830,13 +834,28 @@ def test_initialization(self):
setattr(configs_no_init, key, _config_zero_init(getattr(configs_no_init, key)))
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
self._test_models_weight_initialization(configs_no_init, model_class, model)

# override from common to deal with nested configurations (`vision_config`, `text_config` and `qformer_config`)
def test_initialization_from_pretrained(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for key in ["vision_config", "qformer_config", "text_config"]:
setattr(configs_no_init, key, _config_zero_init(getattr(configs_no_init, key)))

# We create a dummy state dict - to ensure all the parameters are initialized in the from_pretrained method
# We have to add a dummy key to the state dict to allow it to be loaded.
# See: https://github.com/huggingface/safetensors/pull/472 which enables saving empty state dicts with metadata
dummy_state_dict = {"dummy": torch.empty(1)}

with tempfile.TemporaryDirectory() as tmp_dir_name:
config.save_pretrained(tmp_dir_name)
save_file(dummy_state_dict, os.path.join(tmp_dir_name, "model.safetensors"), metadata={"format": "pt"})

for model_class in self.all_model_classes:
model = model_class.from_pretrained(tmp_dir_name)
self._test_models_weight_initialization(configs_no_init, model_class, model)


# We will verify our results on an image of cute cats
Expand Down