Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚨Deprecate legacy argument for image-text-to-text models and adopt new behavior by default #36307

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 1 addition & 12 deletions src/transformers/models/donut/processing_donut.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,17 +88,6 @@ def __call__(
[`~DonutProcessor.as_target_processor`] this method forwards all its arguments to DonutTokenizer's
[`~DonutTokenizer.__call__`]. Please refer to the doctsring of the above two methods for more information.
"""
# For backward compatibility
legacy = kwargs.pop("legacy", True)
if legacy:
# With `add_special_tokens=True`, the performance of donut are degraded when working with both images and text.
logger.warning_once(
"Legacy behavior is being used. The current behavior will be deprecated in version 5.0.0. "
"In the new behavior, if both images and text are provided, the default value of `add_special_tokens` "
"will be changed to `False` when calling the tokenizer if `add_special_tokens` is unset. "
"To test the new behavior, set `legacy=False`as a processor call argument."
)

if self._in_target_context_manager:
return self.current_processor(images, text, **kwargs)

Expand All @@ -114,7 +103,7 @@ def __call__(
if images is not None:
inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
if text is not None:
if not legacy and images is not None:
if images is not None:
output_kwargs["text_kwargs"].setdefault("add_special_tokens", False)
encodings = self.tokenizer(text, **output_kwargs["text_kwargs"])

Expand Down
12 changes: 0 additions & 12 deletions src/transformers/models/git/processing_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,6 @@ def __call__(
`None`).
- **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
"""
legacy = kwargs.pop("legacy", True)
if legacy:
logger.warning_once(
"Legacy behavior is being used. The current behavior will be deprecated in version 5.0.0. "
"In the new behavior, if both images and text are provided, the last token (EOS token) "
"of the input_ids and attention_mask tensors will be removed. "
"To test the new behavior, set `legacy=False`as a processor call argument."
)

if text is None and images is None:
raise ValueError("You have to specify either text or images. Both cannot be none.")

Expand All @@ -123,9 +114,6 @@ def __call__(
if images is not None:
image_features = self.image_processor(images, **output_kwargs["images_kwargs"])
data.update(image_features)
if not legacy:
data["input_ids"] = data["input_ids"][:, :-1]
data["attention_mask"] = data["attention_mask"][:, :-1]
Comment on lines -126 to -128
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was used to remove the SEP token at the end of prompts, as it was done in the inference example on the model card. However, it doesn't seem to have a big influence on the outputs, so I revert this back to its original behavior (before legacy was introduced)


return BatchFeature(data=data, tensor_type=output_kwargs["common_kwargs"].get("return_tensors"))

Expand Down
11 changes: 1 addition & 10 deletions src/transformers/models/pix2struct/processing_pix2struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,6 @@ def __call__(

Please refer to the docstring of the above two methods for more information.
"""
legacy = kwargs.pop("legacy", True)
if legacy:
logger.warning_once(
"Legacy behavior is being used. The current behavior will be deprecated in version 5.0.0. "
"In the new behavior, If both images and text are provided, image_processor is not a VQA processor, and `add_special_tokens` is unset, "
"the default value of `add_special_tokens` will be changed to `False` when calling the tokenizer. "
"To test the new behavior, set `legacy=False`as a processor call argument."
)

if images is None and text is None:
raise ValueError("You have to specify either images or text.")

Expand Down Expand Up @@ -126,7 +117,7 @@ def __call__(

if text is not None and not self.image_processor.is_vqa:
output_kwargs["text_kwargs"]["add_special_tokens"] = (
add_special_tokens if add_special_tokens is not None else legacy
add_special_tokens if add_special_tokens is not None else False
)
text_encoding = self.tokenizer(text=text, **output_kwargs["text_kwargs"])

Expand Down