From 6510636ecb31a6c9f7045181946ab16cdc833ca5 Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Sun, 16 Feb 2025 03:38:26 +0000
Subject: [PATCH 1/6] Fix 2 Node tests

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/distributed/test_pipeline_parallel.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
index 06f9435816d5c..92a366892ea16 100644
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -307,7 +307,8 @@ def _compare_tp(
         "8",
     ]
     if chunked_prefill:
-        common_args.append("--enable-chunked-prefill")
+        common_args.extend(
+            ["--enable-chunked-prefill", "--disable-sliding-window"])
     if eager_mode:
         common_args.append("--enforce-eager")
     if task != "auto":

From f5e98e7b5b84898d91afc5b86be72dda7fef0205 Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Sun, 16 Feb 2025 04:52:13 +0000
Subject: [PATCH 2/6] Try another fix

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/distributed/test_pipeline_parallel.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
index 92a366892ea16..e768ca23009ef 100644
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -277,6 +277,7 @@ def _compare_tp(
         text_overrides = {
             "num_layers": 1,
             "num_hidden_layers": 1,
+            "num_attention_heads": tp_size * pp_size,
             "num_experts": 2,
             "num_experts_per_tok": 2,
             "num_local_experts": 2,
@@ -307,8 +308,7 @@ def _compare_tp(
         "8",
     ]
     if chunked_prefill:
-        common_args.extend(
-            ["--enable-chunked-prefill", "--disable-sliding-window"])
+        common_args.append("--enable-chunked-prefill")
     if eager_mode:
         common_args.append("--enforce-eager")
     if task != "auto":

From e880fc6186299e8e227988ed4310e4bddd6b2575 Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Sun, 16 Feb 2025 05:03:37 +0000
Subject: [PATCH 3/6] Fix `NGramWorker` initialization

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/spec_decode/ngram_worker.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/vllm/spec_decode/ngram_worker.py b/vllm/spec_decode/ngram_worker.py
index 86390c99c2fbc..57ae173af6744 100644
--- a/vllm/spec_decode/ngram_worker.py
+++ b/vllm/spec_decode/ngram_worker.py
@@ -6,6 +6,7 @@
 import torch
 import torch.nn as nn
 
+from vllm.config import VllmConfig
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.sequence import ExecuteModelRequest
 from vllm.spec_decode.interfaces import SpeculativeProposals
@@ -25,11 +26,18 @@ class NGramWorker(NonLLMProposerWorkerBase):
     which don't rely on LLM model to give proposals.
     """
 
-    def __init__(self, *args, **kwargs):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        local_rank: int,
+        device_type: str = "cuda",
+        **kwargs,
+    ):
+        super().__init__(vllm_config)
+
         # Get local_rank/vocab_size from kwargs attribute
-        self.local_rank = kwargs["local_rank"]
-        self.vocab_size = kwargs["vllm_config"].model_config.get_vocab_size()
-        self.device_type = kwargs.get("device_type", "cuda")
+        self.local_rank = local_rank
+        self.device_type = device_type
 
         # Lazy initialization list.
         self._proposer: Top1Proposer

From a855844f3ba7fe72b743b07280de5a52dc946aa9 Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Sun, 16 Feb 2025 07:36:00 +0000
Subject: [PATCH 4/6] Reduce memory footprint to avoid OOM

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/distributed/test_pipeline_parallel.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
index e768ca23009ef..dcff12dc9c876 100644
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -275,6 +275,8 @@ def _compare_tp(
     if load_format == "dummy":
         # Avoid OOM
         text_overrides = {
+            "hidden_size": 512,
+            "intermediate_size": 800,
             "num_layers": 1,
             "num_hidden_layers": 1,
             "num_attention_heads": tp_size * pp_size,

From fb2fcf7a4c30c027d99d9bb5863cc6a0085d7907 Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Sun, 16 Feb 2025 08:30:44 +0000
Subject: [PATCH 5/6] Test

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/distributed/test_pipeline_parallel.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
index dcff12dc9c876..b3149e8604772 100644
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -279,7 +279,8 @@ def _compare_tp(
             "intermediate_size": 800,
             "num_layers": 1,
             "num_hidden_layers": 1,
-            "num_attention_heads": tp_size * pp_size,
+            "num_attention_heads": 4,
+            "num_key_value_heads": 1,
             "num_experts": 2,
             "num_experts_per_tok": 2,
             "num_local_experts": 2,

From 2e3cbae5c40b56b64a42ca8dacc8842409a94e5a Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Sun, 16 Feb 2025 09:35:41 +0000
Subject: [PATCH 6/6] Test

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/distributed/test_pipeline_parallel.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
index b3149e8604772..9677ccd2ea823 100644
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -275,15 +275,11 @@ def _compare_tp(
     if load_format == "dummy":
         # Avoid OOM
         text_overrides = {
+            "num_hidden_layers": 4,
             "hidden_size": 512,
             "intermediate_size": 800,
-            "num_layers": 1,
-            "num_hidden_layers": 1,
             "num_attention_heads": 4,
             "num_key_value_heads": 1,
-            "num_experts": 2,
-            "num_experts_per_tok": 2,
-            "num_local_experts": 2,
         }
 
         if is_multimodal: