We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 84ed307 commit 30ae6b4Copy full SHA for 30ae6b4
vllm/worker/hpu_model_runner.py
@@ -2462,7 +2462,7 @@ def execute_model(
2462
) -> Optional[Union[List[SamplerOutput], IntermediateTensors]]:
2463
# Delayed sampling is only supported for single step scheduling
2464
use_delayed_sampling = VLLM_DELAYED_SAMPLING and not warmup_mode \
2465
- and self.is_single_step
+ and self.is_single_step and not is_fake_hpu()
2466
assert model_input.input_tokens is not None
2467
if use_delayed_sampling and not model_input.is_prompt and \
2468
self.is_driver_worker:
0 commit comments