You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm using pytorch-lightning and test_tube at the same time. I try to perform hyperparameter search using optimize_parallel_gpu, but I see the strange error in the title: ChildProcessError: [Errno 10] No child processes
Code
defmain_local(hparams, gpu_ids=None):
# init module# model = SparseNet(hparams)model=SparseNet(hparams)
# most basic trainer, uses good defaultstrainer=Trainer(
max_nb_epochs=hparams.max_nb_epochs,
gpus=gpu_ids,
distributed_backend=hparams.distributed_backend,
nb_gpu_nodes=hparams.nodes,
# optionalfast_dev_run=hparams.fast_dev_run,
use_amp=hparams.use_amp,
amp_level=("O1"ifhparams.use_ampelse"O0"),
)
trainer.fit(model)
...
if__name__=="__main__":
...
parser=SparseNet.add_model_specific_args(parser)
# HyperParameter searchparser.opt_list(
"--n", default=2000, type=int, tunable=True, options=[2000, 3000, 4000]
)
parser.opt_list(
"--k", default=50, type=int, tunable=True, options=[100, 200, 300, 400]
)
parser.opt_list(
"--batch_size",
default=32,
type=int,
tunable=True,
options=[32, 64, 128, 256, 512],
)
# parse paramshparams=parser.parse_args()
# LR for different batch_sizeifhparams.batch_size<=128:
hparams.learning_rate=0.001else:
hparams.learning_rate=0.002# run trials of random search over the hyperparamsiftorch.cuda.is_available():
hparams.optimize_parallel_gpu(
main_local, max_nb_trials=20, gpu_ids=["0, 1"]
)
else:
hparams.gpus=Nonehparams.distributed_backend=Nonehparams.optimize_parallel_cpu(main_local, nb_trials=20)
# main_local(hparams) # this works
console log
gpu available: True, used: True
VISIBLE GPUS: 0,1
Caught exception in worker thread [Errno 10] No child processes
Traceback (most recent call last):
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/test_tube/argparse_hopt.py", line 37, in optimize_parallel_gpu_private
results = train_function(trial_params, gpu_id_set)
File "sparse_trainer.py", line 29, in main_local
trainer.fit(model)
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 746, in fit
mp.spawn(self.ddp_train, nprocs=self.num_gpus, args=(model, ))
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 156, in spawn
error_queue = mp.SimpleQueue()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/context.py", line 112, in SimpleQueue
return SimpleQueue(ctx=self.get_context())
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/queues.py", line 332, in __init__
self._rlock = ctx.Lock()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/context.py", line 67, in Lock
return Lock(ctx=self.get_context())
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
register(self._semlock.name)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py", line 83, in register
self._send('REGISTER', name)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py", line 90, in _send
self.ensure_running()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py", line 46, in ensure_running
pid, status = os.waitpid(self._pid, os.WNOHANG)
ChildProcessError: [Errno 10] No child processes
gpu available: True, used: True
VISIBLE GPUS: 0,1
Caught exception in worker thread [Errno 10] No child processes
Traceback (most recent call last):
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/test_tube/argparse_hopt.py", line 37, in optimize_parallel_gpu_private
results = train_function(trial_params, gpu_id_set)
File "sparse_trainer.py", line 29, in main_local
trainer.fit(model)
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 746, in fit
mp.spawn(self.ddp_train, nprocs=self.num_gpus, args=(model, ))
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 156, in spawn
error_queue = mp.SimpleQueue()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/context.py", line 112, in SimpleQueue
return SimpleQueue(ctx=self.get_context())
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/queues.py", line 332, in __init__
self._rlock = ctx.Lock()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/context.py", line 67, in Lock
return Lock(ctx=self.get_context())
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
register(self._semlock.name)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py", line 83, in register
self._send('REGISTER', name)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py", line 90, in _send
self.ensure_running()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py", line 46, in ensure_running
pid, status = os.waitpid(self._pid, os.WNOHANG)
ChildProcessError: [Errno 10] No child processes
^CTraceback (most recent call last):
File "sparse_trainer.py", line 73, in <module>
Process ForkPoolWorker-2:
Process ForkPoolWorker-1:
Process ForkPoolWorker-4:
main_local, nb_trials=20, trials=hparams.trials(20), gpu_ids=["0, 1"]
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/test_tube/argparse_hopt.py", line 361, in optimize_trials_parallel_gpu
Traceback (most recent call last):
results = self.pool.map(optimize_parallel_gpu_private, self.trials)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/pool.py", line 268, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
Traceback (most recent call last):
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/test_tube/argparse_hopt.py", line 29, in optimize_parallel_gpu_private
gpu_id_set = g_gpu_id_q.get(block=True)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/queues.py", line 93, in get
with self._rlock:
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
KeyboardInterrupt
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/test_tube/argparse_hopt.py", line 29, in optimize_parallel_gpu_private
gpu_id_set = g_gpu_id_q.get(block=True)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/queues.py", line 93, in get
with self._rlock:
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/pool.py", line 651, in get
Traceback (most recent call last):
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
File "/home/kyoungrok/anaconda3/lib/python3.7/site-packages/test_tube/argparse_hopt.py", line 29, in optimize_parallel_gpu_private
gpu_id_set = g_gpu_id_q.get(block=True)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/queues.py", line 94, in get
res = self._recv_bytes()
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
buf = self._recv(4)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
KeyboardInterrupt
self.wait(timeout)
File "/home/kyoungrok/anaconda3/lib/python3.7/multiprocessing/pool.py", line 648, in wait
self._event.wait(timeout)
File "/home/kyoungrok/anaconda3/lib/python3.7/threading.py", line 552, in wait
signaled = self._cond.wait(timeout)
File "/home/kyoungrok/anaconda3/lib/python3.7/threading.py", line 296, in wait
waiter.acquire()
KeyboardInterrupt
The text was updated successfully, but these errors were encountered:
Caught exception in worker thread daemonic processes are not allowed to have children
Traceback (most recent call last):
File "/home/roger/libs/torch/test-tube/test_tube/argparse_hopt.py", line 37, in optimize_parallel_gpu_private
results = train_function(trial_params, gpu_id_set)
File "./training.py", line 40, in main
trainer.fit(model)
File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py", line 343, in fit
mp.spawn(self.ddp_train, nprocs=self.num_gpus, args=(model,))
File "/opt/conda/lib/python3.6/site-packages/torch/multiprocessing/spawn.py", line 162, in spawn
process.start()
File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 103, in start
'daemonic processes are not allowed to have children'
AssertionError: daemonic processes are not allowed to have children
I'm using
pytorch-lightning
andtest_tube
at the same time. I try to perform hyperparameter search usingoptimize_parallel_gpu
, but I see the strange error in the title:ChildProcessError: [Errno 10] No child processes
Code
console log
The text was updated successfully, but these errors were encountered: