Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ppdiffusers/ppdiffusers/hooks/layerwise_casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class PeftInputAutocastDisableHook(ModelHook):
that the inputs are casted to the computation dtype correctly always. However, there are two goals we are
hoping to achieve:
1. Making forward implementations independent of device/dtype casting operations as much as possible.
2. Peforming inference without losing information from casting to different precisions. With the current
2. Performing inference without losing information from casting to different precisions. With the current
PEFT implementation (as linked in the reference above), and assuming running layerwise casting inference
with storage_dtype=torch.float8_e4m3fn and compute_dtype=paddle.bfloat16, inputs are cast to
torch.float8_e4m3fn in the lora layer. We will then upcast back to paddle.bfloat16 when we continue the
Expand Down
8 changes: 4 additions & 4 deletions ppdiffusers/ppdiffusers/hooks/pyramid_attention_broadcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ class PyramidAttentionBroadcastConfig:
spatial_attention_block_skip_range (`int`, *optional*, defaults to `None`):
The number of times a specific spatial attention broadcast is skipped before computing the attention states
to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e.,
old attention states will be re-used) before computing the new attention states again.
old attention states will be reused) before computing the new attention states again.
temporal_attention_block_skip_range (`int`, *optional*, defaults to `None`):
The number of times a specific temporal attention broadcast is skipped before computing the attention
states to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times
(i.e., old attention states will be re-used) before computing the new attention states again.
(i.e., old attention states will be reused) before computing the new attention states again.
cross_attention_block_skip_range (`int`, *optional*, defaults to `None`):
The number of times a specific cross-attention broadcast is skipped before computing the attention states
to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e.,
old attention states will be re-used) before computing the new attention states again.
old attention states will be reused) before computing the new attention states again.
spatial_attention_timestep_skip_range (`Tuple[int, int]`, defaults to `(100, 800)`):
The range of timesteps to skip in the spatial attention layer. The attention computations will be
conditionally skipped if the current timestep is within the specified range.
Expand Down Expand Up @@ -287,7 +287,7 @@ def _apply_pyramid_attention_broadcast_hook(
block_skip_range (`int`):
The number of times a specific attention broadcast is skipped before computing the attention states to
re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e., old
attention states will be re-used) before computing the new attention states again.
attention states will be reused) before computing the new attention states again.
current_timestep_callback (`Callable[[], int]`):
A callback function that returns the current inference timestep.
"""
Expand Down
2 changes: 1 addition & 1 deletion ppdiffusers/ppdiffusers/models/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _chunked_feed_forward(
dim=chunk_dim,
)
else:
# TOOD(Patrick): LoRA scale can be removed once PEFT refactor is complete
# TODO(Patrick): LoRA scale can be removed once PEFT refactor is complete
ff_output = paddle.concat(
[ff(hid_slice, scale=lora_scale) for hid_slice in hidden_states.chunk(num_chunks, axis=chunk_dim)],
axis=chunk_dim,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def swap_scale_shift(weight):
f"double_blocks.{i}.txt_attn.proj.bias"
)

# single transfomer blocks
# single transformer blocks
for i in range(num_single_layers):
block_prefix = f"single_transformer_blocks.{i}."
# norm.linear <- single_blocks.0.modulation.lin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def index_for_timestep(self, timestep, schedule_timesteps=None):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def __init__(
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def __init__(
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def __init__(
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def init_noise_sigma(self):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def get_order_list(self, num_inference_steps: int) -> List[int]:
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def init_noise_sigma(self):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
2 changes: 1 addition & 1 deletion ppdiffusers/ppdiffusers/schedulers/scheduling_edm_euler.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def init_noise_sigma(self):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def init_noise_sigma(self):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def init_noise_sigma(self):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def init_noise_sigma(self):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
2 changes: 1 addition & 1 deletion ppdiffusers/ppdiffusers/schedulers/scheduling_ipndm.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def init_noise_sigma(self):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def init_noise_sigma(self):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def init_noise_sigma(self):
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def __init__(
@property
def step_index(self):
"""
The index counter for current timestep. It will increae 1 after each scheduler step.
The index counter for current timestep. It will increase 1 after each scheduler step.
"""
return self._step_index

Expand Down
2 changes: 1 addition & 1 deletion ppdiffusers/ppdiffusers/transformers/clip/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def forward(
attn_weights = F.softmax(attn_weights, axis=-1)

if output_attentions:
# this operation is a bit akward, but it's required to
# this operation is a bit awkward, but it's required to
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# twice and have to be reused in the following
Expand Down
6 changes: 3 additions & 3 deletions ppdiffusers/ppdiffusers/transformers/umt5/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def __init__(self, hidden_size, eps=1e-6):

def forward(self, hidden_states):
# UMT5 uses a layer_norm which only scales and doesn't shift, which is also known as Root Mean
# Square Layer Normalization https://arxiv.org/abs/1910.07467 thus varience is calculated
# Square Layer Normalization https://arxiv.org/abs/1910.07467 thus variance is calculated
# w/o mean and there is no bias. Additionally we want to make sure that the accumulation for
# half-precision inputs is done in fp32

Expand Down Expand Up @@ -317,7 +317,7 @@ def forward(
)

if past_key_value is not None:
# save all key/value_states to cache to be re-used for fast auto-regressive generation
# save all key/value_states to cache to be reused for fast auto-regressive generation
cache_position = cache_position if not is_cross_attention else None
key_states, value_states = curr_past_key_value.update(
key_states, value_states, self.layer_idx, {"cache_position": cache_position}
Expand Down Expand Up @@ -950,7 +950,7 @@ def _prepare_4d_causal_attention_mask_with_cache_position(
dtype (`paddle.dtype`):
The dtype to use for the 4D attention mask.
device (`paddle.device`):
The device to plcae the 4D attention mask on.
The device to place the 4D attention mask on.
cache_position (`paddle.Tensor`):
Indices depicting the position of the input sequence tokens in the sequence.
batch_size (`paddle.Tensor`):
Expand Down
2 changes: 1 addition & 1 deletion ppdiffusers/scripts/ssim_psnr_score/calculate_psnr.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def calculate_psnr(videos1, videos2):
"value": psnr,
"value_std": psnr_std,
"video_setting": video1.shape,
"video_setting_name": "time, channel, heigth, width",
"video_setting_name": "time, channel, height, width",
}

return result
Expand Down
2 changes: 1 addition & 1 deletion ppdiffusers/scripts/ssim_psnr_score/calculate_ssim.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def calculate_ssim(videos1, videos2):
"value": ssim,
"value_std": ssim_std,
"video_setting": video1.shape,
"video_setting_name": "time, channel, heigth, width",
"video_setting_name": "time, channel, height, width",
}

return result
Expand Down