fix kv_cache_manager.prepare_dummy_resources

lfr-0531 · lfr-0531 · commit b1dd9e640c0b · 2025-06-18T02:00:58.000-07:00
Signed-off-by: Fanrong Li &lt;23290157+lfr-0531@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/resource_manager.py b/tensorrt_llm/_torch/pyexecutor/resource_manager.py
@@ -325,8 +325,11 @@ def prepare_resources(self, scheduled_batch: ScheduledRequests):
     def prepare_dummy_resources(self, dummy_requests: List[LlmRequest]):
         beam_width = 1
         for req in dummy_requests:
-            self.impl.add_sequence(req.py_request_id, req.py_prompt_len + 1,
-                                   beam_width, req)
+            token_num = req.py_prompt_len
+            if req.state == LlmRequestState.GENERATION_IN_PROGRESS:
+                token_num += 1
+            self.impl.add_sequence(req.py_request_id, token_num, beam_width,
+                                   req)
             for _ in range(self.num_extra_kv_tokens):
                 self.impl.add_token(req.py_request_id)
             for _ in range(len(req.py_draft_tokens)):