PaddlePaddle · co63oc · Sep 2, 2025 · Oct 16, 2025
diff --git a/ppdiffusers/ppdiffusers/hooks/layerwise_casting.py b/ppdiffusers/ppdiffusers/hooks/layerwise_casting.py
@@ -117,7 +117,7 @@ class PeftInputAutocastDisableHook(ModelHook):
           that the inputs are casted to the computation dtype correctly always. However, there are two goals we are
           hoping to achieve:
             1. Making forward implementations independent of device/dtype casting operations as much as possible.
-            2. Peforming inference without losing information from casting to different precisions. With the current
+            2. Performing inference without losing information from casting to different precisions. With the current
                PEFT implementation (as linked in the reference above), and assuming running layerwise casting inference
                with storage_dtype=torch.float8_e4m3fn and compute_dtype=paddle.bfloat16, inputs are cast to
                torch.float8_e4m3fn in the lora layer. We will then upcast back to paddle.bfloat16 when we continue the

diff --git a/ppdiffusers/ppdiffusers/hooks/pyramid_attention_broadcast.py b/ppdiffusers/ppdiffusers/hooks/pyramid_attention_broadcast.py
@@ -40,15 +40,15 @@ class PyramidAttentionBroadcastConfig:
         spatial_attention_block_skip_range (`int`, *optional*, defaults to `None`):
             The number of times a specific spatial attention broadcast is skipped before computing the attention states
             to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e.,
-            old attention states will be re-used) before computing the new attention states again.
+            old attention states will be reused) before computing the new attention states again.
         temporal_attention_block_skip_range (`int`, *optional*, defaults to `None`):
             The number of times a specific temporal attention broadcast is skipped before computing the attention
             states to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times
-            (i.e., old attention states will be re-used) before computing the new attention states again.
+            (i.e., old attention states will be reused) before computing the new attention states again.
         cross_attention_block_skip_range (`int`, *optional*, defaults to `None`):
             The number of times a specific cross-attention broadcast is skipped before computing the attention states
             to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e.,
-            old attention states will be re-used) before computing the new attention states again.
+            old attention states will be reused) before computing the new attention states again.
         spatial_attention_timestep_skip_range (`Tuple[int, int]`, defaults to `(100, 800)`):
             The range of timesteps to skip in the spatial attention layer. The attention computations will be
             conditionally skipped if the current timestep is within the specified range.
@@ -287,7 +287,7 @@ def _apply_pyramid_attention_broadcast_hook(
         block_skip_range (`int`):
             The number of times a specific attention broadcast is skipped before computing the attention states to
             re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e., old
-            attention states will be re-used) before computing the new attention states again.
+            attention states will be reused) before computing the new attention states again.
         current_timestep_callback (`Callable[[], int]`):
             A callback function that returns the current inference timestep.
     """

diff --git a/ppdiffusers/ppdiffusers/models/attention.py b/ppdiffusers/ppdiffusers/models/attention.py
@@ -47,7 +47,7 @@ def _chunked_feed_forward(
             dim=chunk_dim,
         )
     else:
-        # TOOD(Patrick): LoRA scale can be removed once PEFT refactor is complete
+        # TODO(Patrick): LoRA scale can be removed once PEFT refactor is complete
         ff_output = paddle.concat(
             [ff(hid_slice, scale=lora_scale) for hid_slice in hidden_states.chunk(num_chunks, axis=chunk_dim)],
             axis=chunk_dim,

diff --git a/ppdiffusers/ppdiffusers/pipelines/flux/convert_from_ckpt.py b/ppdiffusers/ppdiffusers/pipelines/flux/convert_from_ckpt.py
@@ -456,7 +456,7 @@ def swap_scale_shift(weight):
             f"double_blocks.{i}.txt_attn.proj.bias"
         )
 
-    # single transfomer blocks
+    # single transformer blocks
     for i in range(num_single_layers):
         block_prefix = f"single_transformer_blocks.{i}."
         # norm.linear  <- single_blocks.0.modulation.lin

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_consistency_models.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_consistency_models.py
@@ -108,7 +108,7 @@ def index_for_timestep(self, timestep, schedule_timesteps=None):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_deis_multistep.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_deis_multistep.py
@@ -192,7 +192,7 @@ def __init__(
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_dpmsolver_multistep.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_dpmsolver_multistep.py
@@ -219,7 +219,7 @@ def __init__(
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py
@@ -215,7 +215,7 @@ def __init__(
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_dpmsolver_sde.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_dpmsolver_sde.py
@@ -253,7 +253,7 @@ def init_noise_sigma(self):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_dpmsolver_singlestep.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_dpmsolver_singlestep.py
@@ -235,7 +235,7 @@ def get_order_list(self, num_inference_steps: int) -> List[int]:
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_edm_dpmsolver_multistep.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_edm_dpmsolver_multistep.py
@@ -152,7 +152,7 @@ def init_noise_sigma(self):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_edm_euler.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_edm_euler.py
@@ -110,7 +110,7 @@ def init_noise_sigma(self):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_euler_ancestral_discrete.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_euler_ancestral_discrete.py
@@ -179,7 +179,7 @@ def init_noise_sigma(self):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_euler_discrete.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_euler_discrete.py
@@ -209,7 +209,7 @@ def init_noise_sigma(self):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_heun_discrete.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_heun_discrete.py
@@ -180,7 +180,7 @@ def init_noise_sigma(self):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_ipndm.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_ipndm.py
@@ -60,7 +60,7 @@ def __init__(
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py
@@ -173,7 +173,7 @@ def init_noise_sigma(self):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_k_dpm_2_discrete.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_k_dpm_2_discrete.py
@@ -173,7 +173,7 @@ def init_noise_sigma(self):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_lms_discrete.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_lms_discrete.py
@@ -182,7 +182,7 @@ def init_noise_sigma(self):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/schedulers/scheduling_unipc_multistep.py b/ppdiffusers/ppdiffusers/schedulers/scheduling_unipc_multistep.py
@@ -206,7 +206,7 @@ def __init__(
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
 

diff --git a/ppdiffusers/ppdiffusers/transformers/clip/modeling.py b/ppdiffusers/ppdiffusers/transformers/clip/modeling.py
@@ -307,7 +307,7 @@ def forward(
         attn_weights = F.softmax(attn_weights, axis=-1)
 
         if output_attentions:
-            # this operation is a bit akward, but it's required to
+            # this operation is a bit awkward, but it's required to
             # make sure that attn_weights keeps its gradient.
             # In order to do so, attn_weights have to reshaped
             # twice and have to be reused in the following

diff --git a/ppdiffusers/ppdiffusers/transformers/umt5/modeling.py b/ppdiffusers/ppdiffusers/transformers/umt5/modeling.py
@@ -87,7 +87,7 @@ def __init__(self, hidden_size, eps=1e-6):
 
     def forward(self, hidden_states):
         # UMT5 uses a layer_norm which only scales and doesn't shift, which is also known as Root Mean
-        # Square Layer Normalization https://arxiv.org/abs/1910.07467 thus varience is calculated
+        # Square Layer Normalization https://arxiv.org/abs/1910.07467 thus variance is calculated
         # w/o mean and there is no bias. Additionally we want to make sure that the accumulation for
         # half-precision inputs is done in fp32
 
@@ -317,7 +317,7 @@ def forward(
             )
 
             if past_key_value is not None:
-                # save all key/value_states to cache to be re-used for fast auto-regressive generation
+                # save all key/value_states to cache to be reused for fast auto-regressive generation
                 cache_position = cache_position if not is_cross_attention else None
                 key_states, value_states = curr_past_key_value.update(
                     key_states, value_states, self.layer_idx, {"cache_position": cache_position}
@@ -950,7 +950,7 @@ def _prepare_4d_causal_attention_mask_with_cache_position(
             dtype (`paddle.dtype`):
                 The dtype to use for the 4D attention mask.
             device (`paddle.device`):
-                The device to plcae the 4D attention mask on.
+                The device to place the 4D attention mask on.
             cache_position (`paddle.Tensor`):
                 Indices depicting the position of the input sequence tokens in the sequence.
             batch_size (`paddle.Tensor`):

diff --git a/ppdiffusers/scripts/ssim_psnr_score/calculate_psnr.py b/ppdiffusers/scripts/ssim_psnr_score/calculate_psnr.py
@@ -67,7 +67,7 @@ def calculate_psnr(videos1, videos2):
         "value": psnr,
         "value_std": psnr_std,
         "video_setting": video1.shape,
-        "video_setting_name": "time, channel, heigth, width",
+        "video_setting_name": "time, channel, height, width",
     }
 
     return result

diff --git a/ppdiffusers/scripts/ssim_psnr_score/calculate_ssim.py b/ppdiffusers/scripts/ssim_psnr_score/calculate_ssim.py
@@ -102,7 +102,7 @@ def calculate_ssim(videos1, videos2):
         "value": ssim,
         "value_std": ssim_std,
         "video_setting": video1.shape,
-        "video_setting_name": "time, channel, heigth, width",
+        "video_setting_name": "time, channel, height, width",
     }
 
     return result