-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Christine Simpson
committed
Oct 30, 2024
1 parent
62bb7d3
commit 28ce7ff
Showing
6 changed files
with
328 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
*~ | ||
*.png | ||
*.png | ||
polaris_tutorial |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ On Polaris, you will need to create a python virtual environment or a conda envi | |
|
||
For the workshop, you can use the workshop python virtual environment: | ||
```bash | ||
source /eagle/fallwkshp23/workflows/env/bin/activate | ||
source /grand/alcf_training/workflows_2024/_env/bin/activate | ||
``` | ||
|
||
To create your own environment: | ||
|
@@ -60,7 +60,7 @@ git clone [email protected]:argonne-lcf/ALCF_Hands_on_HPC_Workshop.git | |
cd ALCF_Hands_on_HPC_Workshop/workflows/globus_compute | ||
|
||
# If you haven't already, activate the environment | ||
source /eagle/fallwkshp23/workflows/env/bin/activate | ||
source /grand/alcf_training/workflows_2024/_env/bin/activate | ||
``` | ||
|
||
Use the sample config [polaris_config.yaml](polaris_config.yaml) provided to configure and start your endpoint. The sample config has similar features to the Parsl config and looks like this: | ||
|
@@ -70,16 +70,17 @@ engine: | |
type: GlobusComputeEngine | ||
|
||
available_accelerators: 4 # Assign one worker per GPU | ||
cpu_affinity: block-reverse # Assigns cpus in reverse sequential order | ||
prefetch_capacity: 0 # Increase if you have many more tasks than workers | ||
|
||
address: | ||
type: address_by_interface | ||
ifname: bond0 | ||
max_workers_per_node: 4 | ||
|
||
cpu_affinity: "list:24-31,56-63:16-23,48-55:8-15,40-47:0-7,32-39" | ||
|
||
prefetch_capacity: 0 # Increase if you have many more tasks than workers | ||
max_retries_on_system_failure: 2 | ||
|
||
strategy: | ||
type: SimpleStrategy | ||
strategy: simple | ||
job_status_kwargs: | ||
max_idletime: 300 | ||
strategy_period: 60 | ||
|
||
provider: | ||
type: PBSProProvider | ||
|
@@ -90,18 +91,18 @@ engine: | |
bind_cmd: --cpu-bind | ||
overrides: --ppn 1 | ||
|
||
account: fallwkshp23 | ||
queue: fallws23single | ||
account: alcf_training | ||
queue: HandsOnHPC | ||
cpus_per_node: 64 | ||
select_options: ngpus=4 | ||
|
||
# e.g., "#PBS -l filesystems=home:grand:eagle\n#PBS -k doe" | ||
scheduler_options: "#PBS -l filesystems=home:eagle" | ||
scheduler_options: "#PBS -l filesystems=home:eagle:grand" | ||
|
||
# Node setup: activate necessary conda environment and such | ||
worker_init: "source /eagle/fallwkshp23/workflows/env/bin/activate; module load PrgEnv-nvhpc; cd $HOME/.globus_compute/workshop-endpoint" | ||
worker_init: "source /grand/alcf_training/workflows_2024/_env/bin/activate; module load PrgEnv-nvhpc; cd $HOME/.globus_compute/workshop-endpoint" | ||
|
||
walltime: 00:05:00 | ||
walltime: 00:30:00 | ||
nodes_per_block: 1 | ||
init_blocks: 0 | ||
min_blocks: 0 | ||
|
@@ -180,7 +181,7 @@ def hello_affinity(run_directory): | |
os.chdir(os.path.expandvars(run_directory)) | ||
# This is the command that calls the compiled executable | ||
command = f"/eagle/fallwkshp23/workflows/affinity_gpu/hello_affinity" | ||
command = f"/grand/alcf_training/workflows_2024/GettingStarted/Examples/Polaris/affinity_gpu/hello_affinity" | ||
# This runs the application command | ||
res = subprocess.run(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import parsl | ||
import os | ||
from parsl.config import Config | ||
from parsl import bash_app | ||
# PBSPro is the right provider for polaris: | ||
from parsl.providers import PBSProProvider | ||
# The MPIExecutor is for running MPI applications: | ||
from parsl.executors import MPIExecutor | ||
# Use the Simple launcher | ||
from parsl.launchers import SimpleLauncher | ||
|
||
# We will save outputs in the current working directory | ||
working_directory = os.getcwd() | ||
|
||
config = Config( | ||
executors=[ | ||
MPIExecutor( | ||
max_workers_per_block=2, # Assuming 2 nodes per task | ||
provider=PBSProProvider( | ||
account="alcf_training", | ||
worker_init=f"""source /grand/alcf_training/workflows_2024/_env/bin/activate; \ | ||
cd {working_directory}""", | ||
walltime="1:00:00", | ||
queue="debug-scaling", | ||
scheduler_options="#PBS -l filesystems=home:eagle:grand", | ||
launcher=SimpleLauncher(), | ||
select_options="ngpus=4", | ||
nodes_per_block=4, | ||
max_blocks=1, | ||
cpus_per_node=64, | ||
), | ||
), | ||
] | ||
) | ||
|
||
resource_specification = { | ||
'num_nodes': 2, # Number of nodes required for the application instance | ||
'ranks_per_node': 4, # Number of ranks / application elements to be launched per node | ||
'num_ranks': 8, # Number of ranks in total | ||
} | ||
|
||
@bash_app | ||
def mpi_hello_affinity(parsl_resource_specification, depth=8, stdout='mpi_hello.stdout', stderr='mpi_hello.stderr'): | ||
# PARSL_MPI_PREFIX will resolve to `mpiexec -n 8 -ppn 4 -hosts NODE001,NODE002` | ||
APP_DIR = "/grand/alcf_training/workflows_2024/GettingStarted/Examples/Polaris/affinity_gpu" | ||
return f"$PARSL_MPI_PREFIX --cpu-bind depth --depth={depth} \ | ||
{APP_DIR}/set_affinity_gpu_polaris.sh {APP_DIR}/hello_affinity" | ||
|
||
with parsl.load(config): | ||
tasks = [] | ||
for i in range(4): | ||
tasks.append(mpi_hello_affinity(parsl_resource_specification=resource_specification, | ||
stdout=f"{working_directory}/mpi_output/hello_{i}.stdout", | ||
stderr=f"{working_directory}/mpi_output/hello_{i}.stderr")) | ||
|
||
# Wait on futures to return, and print results | ||
for i, t in enumerate(tasks): | ||
t.result() | ||
with open(f"{working_directory}/mpi_output/hello_{i}.stdout", "r") as f: | ||
print(f"Stdout of task {i}:") | ||
print(f.read()) |
Oops, something went wrong.