Fix autopara GPU test

bernstei · bernstei · commit 1a2f51bbe416 · 2025-10-29T12:28:10.000-04:00
diff --git a/tests/local_scripts/complete_pytest.tin b/tests/local_scripts/complete_pytest.tin
@@ -7,6 +7,13 @@ module load compiler/gnu python python_extras/structure python_extras/quippy lap
 module load python_extras/wif
 module load python_extras/torch/cpu
 
+if [ ! -z "$WFL_PYTEST_POST_MODULE_COMMANDS" ]; then
+    echo "Using WFL_PYTEST_POST_MODULE_COMMANDS '$WFL_PYTEST_POST_MODULE_COMMANDS'" 1>&2
+    eval $WFL_PYTEST_POST_MODULE_COMMANDS
+else
+    echo "Using no WFL_PYTEST_POST_MODULE_COMMANDS" 1>&2
+fi
+
 if [ -z "$WFL_PYTEST_EXPYRE_INFO" ]; then
     echo "To override partition used, set WFL_PYTEST_EXPYRE_INFO='{\"resources\" : {\"partitions\": \"DESIRED_PARTITION\"}}'" 1>&2
 fi
@@ -22,11 +29,13 @@ print(json.dumps(i))
 EOF
 )
 export WFL_PYTEST_EXPYRE_INFO
-echo Using WFL_PYTEST_EXPYRE_INFO \'$WFL_PYTEST_EXPYRE_INFO\' 1>&2
+echo "Using WFL_PYTEST_EXPYRE_INFO '$WFL_PYTEST_EXPYRE_INFO'" 1>&2
 
 if [ ! -z $WFL_PYTHONPATH_EXTRA ]; then
     echo "Adding WFL_PYTHONPATH_EXTRA '$WFL_PYTHONPATH_EXTRA'" 1>&2
     export PYTHONPATH=${WFL_PYTHONPATH_EXTRA}:${PYTHONPATH}
+else
+    echo "Adding no WFL_PYTHONPATH_EXTRA" 1>&2
 fi
 
 export JULIA_PROJECT=${PWD}/tests/assets/julia
@@ -51,6 +60,8 @@ export PYTEST_VASP_POTCAR_DIR=$VASP_PATH/pot/rev_54/PBE
 module load dft/pwscf
 # no ORCA
 
+module list
+
 export OPENBLAS_NUM_THREADS=1
 export MKL_NUM_THREADS=1
 # required for descriptor calc to not hang
diff --git a/tests/test_autoparallelize.py b/tests/test_autoparallelize.py
@@ -66,7 +66,7 @@ def test_pool_speedup():
     assert dt_2 / dt_1 < 0.75
 
 
-@pytest.mark.skipif(torch is None or not torch.cuda.is_available(), reason="No torch CUDA devices available")
+@pytest.mark.skipif(torch is None or not torch.cuda.is_available() or os.environ.get("WFL_TORCH_N_GPUS") is None, reason="No torch CUDA devices available, or WFL_TORCH_N_GPUS isn't set")
 @pytest.mark.perf
 def test_pool_speedup_GPU(monkeypatch):
     np.random.seed(5)
@@ -81,20 +81,29 @@ def test_pool_speedup_GPU(monkeypatch):
 
     calc = (mace_mp, ["small-omat-0"], {"device": "cuda"})
 
+    req_n_gpus = os.environ["WFL_TORCH_N_GPUS"]
+    if len(req_n_gpus) == 0:
+        req_n_gpus = str(len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")))
+
+    if "WFL_TORCH_N_GPUS" in os.environ:
+        monkeypatch.delenv("WFL_TORCH_N_GPUS")
+
     t0 = time.time()
     co = generic.calculate(ConfigSet(ats), OutputSpec(), calc, output_prefix="_auto_",
                            autopara_info=AutoparaInfo(num_python_subprocesses=1,
                                                       num_inputs_per_python_subprocess=30))
     dt_1 = time.time() - t0
 
+    monkeypatch.setenv("WFL_TORCH_N_GPUS", req_n_gpus)
+
     t0 = time.time()
-    monkeypatch.setenv("WFL_TORCH_N_GPUS", str(len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))))
     co = generic.calculate(ConfigSet(ats), OutputSpec(), calc, output_prefix="_auto_",
                            autopara_info=AutoparaInfo(num_python_subprocesses=2,
                                                       num_inputs_per_python_subprocess=30))
-    monkeypatch.delenv("WFL_TORCH_N_GPUS")
     dt_2 = time.time() - t0
 
+    monkeypatch.delenv("WFL_TORCH_N_GPUS")
+
     print("time ratio", dt_2 / dt_1)
     assert dt_2 / dt_1 < 0.75
 
diff --git a/wfl/autoparallelize/pool.py b/wfl/autoparallelize/pool.py
@@ -17,12 +17,19 @@
 #     pass
 
 # https://docs.pytorch.org/docs/stable/notes/multiprocessing.html#poison-fork-in-multiprocessing
+# But, only use forkserver if needed because it has a lot of overhead
+try:
+    import torch
+except:
+    torch = None
 if os.environ.get("WFL_TORCH_N_GPUS") is not None:
+    if not torch:
+        raise RuntimeError(f"Got WFL_TORCH_N_GPUS '{WFL_TORCH_N_GPUS}' but torch module is not available")
     try:
-        import torch
         import multiprocessing
         multiprocessing.set_start_method('forkserver')
-    except (ImportError, RuntimeError) as exc:
+    except RuntimeError as exc:
+        # ignore complains about setting start method more than once
         pass
 from multiprocessing.pool import Pool
 # to help keep track of distinct GPU for each python subprocess