Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Christine Simpson committed Oct 30, 2024
1 parent 62bb7d3 commit 28ce7ff
Show file tree
Hide file tree
Showing 6 changed files with 328 additions and 23 deletions.
3 changes: 2 additions & 1 deletion workflows/balsam/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*~
*.png
*.png
polaris_tutorial
2 changes: 1 addition & 1 deletion workflows/globus_compute/1_register_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def hello_affinity(run_directory):
os.chdir(os.path.expandvars(run_directory))

# This is the command that calls the compiled executable
command = "/eagle/fallwkshp23/workflows/affinity_gpu/hello_affinity"
command = "/grand/alcf_training/workflows_2024/GettingStarted/Examples/Polaris/affinity_gpu/hello_affinity"

# This runs the application command
res = subprocess.run(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Expand Down
33 changes: 17 additions & 16 deletions workflows/globus_compute/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ On Polaris, you will need to create a python virtual environment or a conda envi

For the workshop, you can use the workshop python virtual environment:
```bash
source /eagle/fallwkshp23/workflows/env/bin/activate
source /grand/alcf_training/workflows_2024/_env/bin/activate
```

To create your own environment:
Expand Down Expand Up @@ -60,7 +60,7 @@ git clone [email protected]:argonne-lcf/ALCF_Hands_on_HPC_Workshop.git
cd ALCF_Hands_on_HPC_Workshop/workflows/globus_compute

# If you haven't already, activate the environment
source /eagle/fallwkshp23/workflows/env/bin/activate
source /grand/alcf_training/workflows_2024/_env/bin/activate
```

Use the sample config [polaris_config.yaml](polaris_config.yaml) provided to configure and start your endpoint. The sample config has similar features to the Parsl config and looks like this:
Expand All @@ -70,16 +70,17 @@ engine:
type: GlobusComputeEngine

available_accelerators: 4 # Assign one worker per GPU
cpu_affinity: block-reverse # Assigns cpus in reverse sequential order
prefetch_capacity: 0 # Increase if you have many more tasks than workers

address:
type: address_by_interface
ifname: bond0
max_workers_per_node: 4

cpu_affinity: "list:24-31,56-63:16-23,48-55:8-15,40-47:0-7,32-39"

prefetch_capacity: 0 # Increase if you have many more tasks than workers
max_retries_on_system_failure: 2

strategy:
type: SimpleStrategy
strategy: simple
job_status_kwargs:
max_idletime: 300
strategy_period: 60

provider:
type: PBSProProvider
Expand All @@ -90,18 +91,18 @@ engine:
bind_cmd: --cpu-bind
overrides: --ppn 1

account: fallwkshp23
queue: fallws23single
account: alcf_training
queue: HandsOnHPC
cpus_per_node: 64
select_options: ngpus=4

# e.g., "#PBS -l filesystems=home:grand:eagle\n#PBS -k doe"
scheduler_options: "#PBS -l filesystems=home:eagle"
scheduler_options: "#PBS -l filesystems=home:eagle:grand"

# Node setup: activate necessary conda environment and such
worker_init: "source /eagle/fallwkshp23/workflows/env/bin/activate; module load PrgEnv-nvhpc; cd $HOME/.globus_compute/workshop-endpoint"
worker_init: "source /grand/alcf_training/workflows_2024/_env/bin/activate; module load PrgEnv-nvhpc; cd $HOME/.globus_compute/workshop-endpoint"

walltime: 00:05:00
walltime: 00:30:00
nodes_per_block: 1
init_blocks: 0
min_blocks: 0
Expand Down Expand Up @@ -180,7 +181,7 @@ def hello_affinity(run_directory):
os.chdir(os.path.expandvars(run_directory))
# This is the command that calls the compiled executable
command = f"/eagle/fallwkshp23/workflows/affinity_gpu/hello_affinity"
command = f"/grand/alcf_training/workflows_2024/GettingStarted/Examples/Polaris/affinity_gpu/hello_affinity"
# This runs the application command
res = subprocess.run(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Expand Down
4 changes: 0 additions & 4 deletions workflows/globus_compute/polaris_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ engine:
prefetch_capacity: 0 # Increase if you have many more tasks than workers
max_retries_on_system_failure: 2

address:
type: address_by_interface
ifname: bond0

strategy: simple
job_status_kwargs:
max_idletime: 300
Expand Down
61 changes: 61 additions & 0 deletions workflows/parsl/5_mpi_app_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import parsl
import os
from parsl.config import Config
from parsl import bash_app
# PBSPro is the right provider for polaris:
from parsl.providers import PBSProProvider
# The MPIExecutor is for running MPI applications:
from parsl.executors import MPIExecutor
# Use the Simple launcher
from parsl.launchers import SimpleLauncher

# We will save outputs in the current working directory
working_directory = os.getcwd()

config = Config(
executors=[
MPIExecutor(
max_workers_per_block=2, # Assuming 2 nodes per task
provider=PBSProProvider(
account="alcf_training",
worker_init=f"""source /grand/alcf_training/workflows_2024/_env/bin/activate; \
cd {working_directory}""",
walltime="1:00:00",
queue="debug-scaling",
scheduler_options="#PBS -l filesystems=home:eagle:grand",
launcher=SimpleLauncher(),
select_options="ngpus=4",
nodes_per_block=4,
max_blocks=1,
cpus_per_node=64,
),
),
]
)

resource_specification = {
'num_nodes': 2, # Number of nodes required for the application instance
'ranks_per_node': 4, # Number of ranks / application elements to be launched per node
'num_ranks': 8, # Number of ranks in total
}

@bash_app
def mpi_hello_affinity(parsl_resource_specification, depth=8, stdout='mpi_hello.stdout', stderr='mpi_hello.stderr'):
# PARSL_MPI_PREFIX will resolve to `mpiexec -n 8 -ppn 4 -hosts NODE001,NODE002`
APP_DIR = "/grand/alcf_training/workflows_2024/GettingStarted/Examples/Polaris/affinity_gpu"
return f"$PARSL_MPI_PREFIX --cpu-bind depth --depth={depth} \
{APP_DIR}/set_affinity_gpu_polaris.sh {APP_DIR}/hello_affinity"

with parsl.load(config):
tasks = []
for i in range(4):
tasks.append(mpi_hello_affinity(parsl_resource_specification=resource_specification,
stdout=f"{working_directory}/mpi_output/hello_{i}.stdout",
stderr=f"{working_directory}/mpi_output/hello_{i}.stderr"))

# Wait on futures to return, and print results
for i, t in enumerate(tasks):
t.result()
with open(f"{working_directory}/mpi_output/hello_{i}.stdout", "r") as f:
print(f"Stdout of task {i}:")
print(f.read())
Loading

0 comments on commit 28ce7ff

Please sign in to comment.