You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
(VllmWorkerProcess pid=1791034) Process VllmWorkerProcess:
(VllmWorkerProcess pid=1791035) Process VllmWorkerProcess:
(VllmWorkerProcess pid=1791034) Traceback (most recent call last):
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
(VllmWorkerProcess pid=1791034) self.run()
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 108, in run
(VllmWorkerProcess pid=1791034) self._target(*self._args, **self._kwargs)
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 210, in _run_worker_process
(VllmWorkerProcess pid=1791034) worker = worker_factory()
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/gpu_executor.py", line 70, in _create_worker
(VllmWorkerProcess pid=1791034) wrapper.init_worker(**self._get_worker_kwargs(local_rank, rank,
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 326, in init_worker
(VllmWorkerProcess pid=1791034) self.worker = worker_class(*args, **kwargs)
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker.py", line 90, in init
(VllmWorkerProcess pid=1791034) self.model_runner: GPUModelRunnerBase = ModelRunnerClass(
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 229, in init
(VllmWorkerProcess pid=1791034) self.attn_backend = get_attn_backend(
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 45, in get_attn_backend
(VllmWorkerProcess pid=1791034) backend = which_attn_to_use(num_heads, head_size, num_kv_heads,
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 148, in which_attn_to_use
(VllmWorkerProcess pid=1791034) if torch.cuda.get_device_capability()[0] < 8:
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
(VllmWorkerProcess pid=1791034) prop = get_device_properties(device)
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
(VllmWorkerProcess pid=1791034) _lazy_init() # will define _get_device_properties
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 279, in _lazy_init
(VllmWorkerProcess pid=1791034) raise RuntimeError(
(VllmWorkerProcess pid=1791034) RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
(VllmWorkerProcess pid=1791035) Traceback (most recent call last):
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
(VllmWorkerProcess pid=1791035) self.run()
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 108, in run
(VllmWorkerProcess pid=1791035) self._target(*self._args, **self._kwargs)
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 210, in _run_worker_process
(VllmWorkerProcess pid=1791035) worker = worker_factory()
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/gpu_executor.py", line 70, in _create_worker
(VllmWorkerProcess pid=1791035) wrapper.init_worker(**self._get_worker_kwargs(local_rank, rank,
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 326, in init_worker
(VllmWorkerProcess pid=1791035) self.worker = worker_class(*args, **kwargs)
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker.py", line 90, in init
(VllmWorkerProcess pid=1791035) self.model_runner: GPUModelRunnerBase = ModelRunnerClass(
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 229, in init
(VllmWorkerProcess pid=1791035) self.attn_backend = get_attn_backend(
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 45, in get_attn_backend
(VllmWorkerProcess pid=1791035) backend = which_attn_to_use(num_heads, head_size, num_kv_heads,
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 148, in which_attn_to_use
(VllmWorkerProcess pid=1791035) if torch.cuda.get_device_capability()[0] < 8:
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
(VllmWorkerProcess pid=1791035) prop = get_device_properties(device)
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
(VllmWorkerProcess pid=1791035) _lazy_init() # will define _get_device_properties
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 279, in _lazy_init
(VllmWorkerProcess pid=1791035) raise RuntimeError(
(VllmWorkerProcess pid=1791035) RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
(VllmWorkerProcess pid=1791036) Process VllmWorkerProcess:
(VllmWorkerProcess pid=1791036) Traceback (most recent call last):
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
(VllmWorkerProcess pid=1791036) self.run()
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 108, in run
(VllmWorkerProcess pid=1791036) self._target(*self._args, **self._kwargs)
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 210, in _run_worker_process
(VllmWorkerProcess pid=1791036) worker = worker_factory()
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/gpu_executor.py", line 70, in _create_worker
(VllmWorkerProcess pid=1791036) wrapper.init_worker(**self._get_worker_kwargs(local_rank, rank,
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 326, in init_worker
(VllmWorkerProcess pid=1791036) self.worker = worker_class(*args, **kwargs)
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker.py", line 90, in init
(VllmWorkerProcess pid=1791036) self.model_runner: GPUModelRunnerBase = ModelRunnerClass(
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 229, in init
(VllmWorkerProcess pid=1791036) self.attn_backend = get_attn_backend(
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 45, in get_attn_backend
(VllmWorkerProcess pid=1791036) backend = which_attn_to_use(num_heads, head_size, num_kv_heads,
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 148, in which_attn_to_use
(VllmWorkerProcess pid=1791036) if torch.cuda.get_device_capability()[0] < 8:
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
(VllmWorkerProcess pid=1791036) prop = get_device_properties(device)
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
(VllmWorkerProcess pid=1791036) _lazy_init() # will define _get_device_properties
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 279, in _lazy_init
(VllmWorkerProcess pid=1791036) raise RuntimeError(
(VllmWorkerProcess pid=1791036) RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
(VllmWorkerProcess pid=1791037) Process VllmWorkerProcess:
ERROR 07-23 10:26:30 multiproc_worker_utils.py:120] Worker VllmWorkerProcess pid 1791034 died, exit code: 1
ERROR 07-23 10:26:30 multiproc_worker_utils.py:120] Worker VllmWorkerProcess pid 1791035 died, exit code: 1
INFO 07-23 10:26:30 multiproc_worker_utils.py:123] Killing local vLLM worker processes
Traceback (most recent call last):
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 169, in _enqueue_task
self._task_queue.put((task_id, method, args, kwargs))
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/queues.py", line 88, in put
raise ValueError(f"Queue {self!r} is closed")
ValueError: Queue <multiprocessing.queues.Queue object at 0x7f013303ad70> is closed
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/cli/deploy.py", line 5, in
deploy_main()
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/utils/run_utils.py", line 27, in x_main
result = llm_x(args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/llm/deploy.py", line 552, in llm_deploy
llm_engine, template = prepare_vllm_engine_template(args, use_async=True)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/llm/utils/vllm_utils.py", line 462, in prepare_vllm_engine_template
llm_engine = get_vllm_engine(
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/llm/utils/vllm_utils.py", line 117, in get_vllm_engine
llm_engine = llm_engine_cls.from_engine_args(engine_args)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 444, in from_engine_args
engine = cls(
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 373, in init
self.engine = self._init_engine(*args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 520, in _init_engine
return engine_class(*args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 249, in init
self.model_executor = executor_class(
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_gpu_executor.py", line 158, in init
super().init(*args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/distributed_gpu_executor.py", line 25, in init
super().init(*args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 150, in init
super().init(model_config, cache_config, parallel_config,
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 46, in init
self._init_executor()
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_gpu_executor.py", line 83, in _init_executor
self._run_workers("init_device")
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_gpu_executor.py", line 125, in _run_workers
worker_outputs = [
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_gpu_executor.py", line 126, in
worker.execute_method(method, *args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 176, in execute_method
self._enqueue_task(future, method, args, kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 172, in _enqueue_task
raise ChildProcessError("worker died") from e
ChildProcessError: worker died
The text was updated successfully, but these errors were encountered:
(VllmWorkerProcess pid=1791034) Process VllmWorkerProcess:
(VllmWorkerProcess pid=1791035) Process VllmWorkerProcess:
(VllmWorkerProcess pid=1791034) Traceback (most recent call last):
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
(VllmWorkerProcess pid=1791034) self.run()
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 108, in run
(VllmWorkerProcess pid=1791034) self._target(*self._args, **self._kwargs)
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 210, in _run_worker_process
(VllmWorkerProcess pid=1791034) worker = worker_factory()
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/gpu_executor.py", line 70, in _create_worker
(VllmWorkerProcess pid=1791034) wrapper.init_worker(**self._get_worker_kwargs(local_rank, rank,
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 326, in init_worker
(VllmWorkerProcess pid=1791034) self.worker = worker_class(*args, **kwargs)
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker.py", line 90, in init
(VllmWorkerProcess pid=1791034) self.model_runner: GPUModelRunnerBase = ModelRunnerClass(
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 229, in init
(VllmWorkerProcess pid=1791034) self.attn_backend = get_attn_backend(
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 45, in get_attn_backend
(VllmWorkerProcess pid=1791034) backend = which_attn_to_use(num_heads, head_size, num_kv_heads,
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 148, in which_attn_to_use
(VllmWorkerProcess pid=1791034) if torch.cuda.get_device_capability()[0] < 8:
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
(VllmWorkerProcess pid=1791034) prop = get_device_properties(device)
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
(VllmWorkerProcess pid=1791034) _lazy_init() # will define _get_device_properties
(VllmWorkerProcess pid=1791034) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 279, in _lazy_init
(VllmWorkerProcess pid=1791034) raise RuntimeError(
(VllmWorkerProcess pid=1791034) RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
(VllmWorkerProcess pid=1791035) Traceback (most recent call last):
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
(VllmWorkerProcess pid=1791035) self.run()
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 108, in run
(VllmWorkerProcess pid=1791035) self._target(*self._args, **self._kwargs)
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 210, in _run_worker_process
(VllmWorkerProcess pid=1791035) worker = worker_factory()
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/gpu_executor.py", line 70, in _create_worker
(VllmWorkerProcess pid=1791035) wrapper.init_worker(**self._get_worker_kwargs(local_rank, rank,
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 326, in init_worker
(VllmWorkerProcess pid=1791035) self.worker = worker_class(*args, **kwargs)
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker.py", line 90, in init
(VllmWorkerProcess pid=1791035) self.model_runner: GPUModelRunnerBase = ModelRunnerClass(
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 229, in init
(VllmWorkerProcess pid=1791035) self.attn_backend = get_attn_backend(
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 45, in get_attn_backend
(VllmWorkerProcess pid=1791035) backend = which_attn_to_use(num_heads, head_size, num_kv_heads,
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 148, in which_attn_to_use
(VllmWorkerProcess pid=1791035) if torch.cuda.get_device_capability()[0] < 8:
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
(VllmWorkerProcess pid=1791035) prop = get_device_properties(device)
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
(VllmWorkerProcess pid=1791035) _lazy_init() # will define _get_device_properties
(VllmWorkerProcess pid=1791035) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 279, in _lazy_init
(VllmWorkerProcess pid=1791035) raise RuntimeError(
(VllmWorkerProcess pid=1791035) RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
(VllmWorkerProcess pid=1791036) Process VllmWorkerProcess:
(VllmWorkerProcess pid=1791036) Traceback (most recent call last):
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
(VllmWorkerProcess pid=1791036) self.run()
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/process.py", line 108, in run
(VllmWorkerProcess pid=1791036) self._target(*self._args, **self._kwargs)
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 210, in _run_worker_process
(VllmWorkerProcess pid=1791036) worker = worker_factory()
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/gpu_executor.py", line 70, in _create_worker
(VllmWorkerProcess pid=1791036) wrapper.init_worker(**self._get_worker_kwargs(local_rank, rank,
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 326, in init_worker
(VllmWorkerProcess pid=1791036) self.worker = worker_class(*args, **kwargs)
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/worker.py", line 90, in init
(VllmWorkerProcess pid=1791036) self.model_runner: GPUModelRunnerBase = ModelRunnerClass(
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 229, in init
(VllmWorkerProcess pid=1791036) self.attn_backend = get_attn_backend(
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 45, in get_attn_backend
(VllmWorkerProcess pid=1791036) backend = which_attn_to_use(num_heads, head_size, num_kv_heads,
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/attention/selector.py", line 148, in which_attn_to_use
(VllmWorkerProcess pid=1791036) if torch.cuda.get_device_capability()[0] < 8:
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
(VllmWorkerProcess pid=1791036) prop = get_device_properties(device)
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
(VllmWorkerProcess pid=1791036) _lazy_init() # will define _get_device_properties
(VllmWorkerProcess pid=1791036) File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/torch/cuda/init.py", line 279, in _lazy_init
(VllmWorkerProcess pid=1791036) raise RuntimeError(
(VllmWorkerProcess pid=1791036) RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
(VllmWorkerProcess pid=1791037) Process VllmWorkerProcess:
ERROR 07-23 10:26:30 multiproc_worker_utils.py:120] Worker VllmWorkerProcess pid 1791034 died, exit code: 1
ERROR 07-23 10:26:30 multiproc_worker_utils.py:120] Worker VllmWorkerProcess pid 1791035 died, exit code: 1
INFO 07-23 10:26:30 multiproc_worker_utils.py:123] Killing local vLLM worker processes
Traceback (most recent call last):
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 169, in _enqueue_task
self._task_queue.put((task_id, method, args, kwargs))
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/multiprocessing/queues.py", line 88, in put
raise ValueError(f"Queue {self!r} is closed")
ValueError: Queue <multiprocessing.queues.Queue object at 0x7f013303ad70> is closed
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/cli/deploy.py", line 5, in
deploy_main()
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/utils/run_utils.py", line 27, in x_main
result = llm_x(args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/llm/deploy.py", line 552, in llm_deploy
llm_engine, template = prepare_vllm_engine_template(args, use_async=True)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/llm/utils/vllm_utils.py", line 462, in prepare_vllm_engine_template
llm_engine = get_vllm_engine(
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/swift/llm/utils/vllm_utils.py", line 117, in get_vllm_engine
llm_engine = llm_engine_cls.from_engine_args(engine_args)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 444, in from_engine_args
engine = cls(
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 373, in init
self.engine = self._init_engine(*args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 520, in _init_engine
return engine_class(*args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 249, in init
self.model_executor = executor_class(
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_gpu_executor.py", line 158, in init
super().init(*args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/distributed_gpu_executor.py", line 25, in init
super().init(*args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 150, in init
super().init(model_config, cache_config, parallel_config,
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 46, in init
self._init_executor()
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_gpu_executor.py", line 83, in _init_executor
self._run_workers("init_device")
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_gpu_executor.py", line 125, in _run_workers
worker_outputs = [
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_gpu_executor.py", line 126, in
worker.execute_method(method, *args, **kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 176, in execute_method
self._enqueue_task(future, method, args, kwargs)
File "/mnt/pfs/zhangfan/system/test/conda/envs/swift/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 172, in _enqueue_task
raise ChildProcessError("worker died") from e
ChildProcessError: worker died
The text was updated successfully, but these errors were encountered: