Skip to content

Commit

Permalink
format
Browse files Browse the repository at this point in the history
Signed-off-by: NickLucche <[email protected]>
  • Loading branch information
NickLucche committed Feb 18, 2025
1 parent 0887736 commit 12d448a
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 3 deletions.
1 change: 0 additions & 1 deletion vllm/commit_id.py

This file was deleted.

4 changes: 2 additions & 2 deletions vllm/model_executor/models/bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def __init__(
f" and `num_heads`: {num_heads}).")
self.scaling = self.head_dim**-0.5

# TP sharding sizes is accounted for within "*Parallel" layers.
# TP sharding sizes is accounted for within "*Parallel" layers.
self.qkv_proj = QKVCrossParallelLinear(self.d_model,
self.d_model //
self.total_num_heads,
Expand Down Expand Up @@ -328,7 +328,7 @@ def __init__(
# Number of KV heads is less than TP size, so we replicate
# the KV heads across multiple tensor parallel GPUs.
assert tp_world_size % self.total_num_kv_heads == 0
self.num_kv_heads = self.num_heads # No GQA in bart
self.num_kv_heads = self.num_heads # No GQA in bart
self.attn = Attention(self.num_heads,
self.head_dim,
self.scaling,
Expand Down

0 comments on commit 12d448a

Please sign in to comment.