Skip to content

Commit

Permalink
Merge pull request #11 from GeoscienceAustralia/submission-scripts
Browse files Browse the repository at this point in the history
Submission scripts for pyroSAR + GAMMA
  • Loading branch information
caitlinadams authored Jan 16, 2025
2 parents d806f91 + 7cfce5f commit 5b53ed5
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 0 deletions.
25 changes: 25 additions & 0 deletions sar_antarctica/nci/configs/pyrosar_gamma.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[sources]
scenes = "/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD"
dem = "/g/data/v10/eoancillarydata-2/elevation/copernicus_30m_world"
ancillary = "/g/data/yp75/projects/ancillary"

[job]
root = "/g/data/yp75/projects/sar-antractica-processing"

[job.submission]
root = "submission"
logs = "logs"

[job.configuration]
root = "config"
workflow = "workflow_config"
scene = "scene_config"

[job.settings]
ncpu = 4
mem = 32
queue = "normal"
project = "u46"
walltime = "02:00:00"
storage = "gdata/yp75+gdata/dg9+gdata/fj7+gdata/v10"
workflow_config = "pyrosar_gamma"
161 changes: 161 additions & 0 deletions sar_antarctica/nci/submission/pyrosar_gamma/pyrosar_gamma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import click
import os
from pathlib import Path
import tomli
from typing import Any

WORKFLOW = "pyrosar_gamma"
PROCESSING_DIR = "/g/data/yp75/projects/sar-antractica-processing"

def get_list_of_scenes(scene_source: str) -> list[str]:
"""Convert script input to list.
If a .zip file, produce a list with that.
If a .txt file, open the file, and produce a list of all .zip files.
Parameters
----------
scene_source : str
The file to be processed. Either a single .zip or a .txt containing multiple .zip files
Returns
-------
list[str]
List of files to process
"""

# Process a single .zip file
if scene_source.endswith(".zip"):
scene_list = [scene_source]
# Process a .txt file containing .zip files
elif scene_source.endswith(".txt"):
with open(scene_source, 'r') as f:
scene_list = [line.strip() for line in f if line.strip().endswith('.zip')]
else:
scene_list = []

if scene_list is not None:
return scene_list
else:
raise RuntimeError("No valid scenes were found for processing. Expected single .zip file or .txt file containing at least one .zip file.")

def update_pbs_template(pbs_template: str, scene_id: str, job_config: dict[str, str | dict[str, Any]]) -> str:
"""_summary_
Parameters
----------
pbs_template : str
A string containing a PBS jobscript
scene_id : str
The scene ID for the job
job_config : dict[str, str | dict[str, Any]]
Dictionary containing information on the job, main keys are
root, submission, configuration, and settings
Returns
-------
str
The updated PBS jobscript string with specified values replaced
"""

"""For a given PBS jobscript template, replace specified values with jobscript settings
Parameters
----------
pbs_template : str
A string containing a PBS jobscript
jobscript_settings: dict
Returns
-------
str
The updated PBS jobscript string with specified values replaced
"""

processing_path = Path(job_config["root"])
log_path = processing_path / job_config["submission"]["root"] / job_config["submission"]["logs"]
config_path = processing_path / job_config["configuration"]["root"]

job_configuration = job_config["configuration"]
job_settings = job_config["settings"]

workflow_config = job_settings["workflow_config"]
# Dictionary to replace placeholders in PBS text with values from configurations
replace_dict = {
"<SCENE_ID>": scene_id,
"<NCPU>": job_settings["ncpu"],
"<MEM>": job_settings["mem"],
"<QUEUE>": job_settings["queue"],
"<PROJECT>": job_settings["project"],
"<WALLTIME>": job_settings["walltime"],
"<STORAGE>": job_settings["storage"],
"<LOG_DIR>": log_path,
"<WORKFLOW_CONFIG>": config_path / job_configuration["workflow"] / f"{workflow_config}.toml",
"<SCENE_CONFIG>": config_path / job_configuration["scene"] / f"{scene_id}.toml"
}

for key, value in replace_dict.items():
pbs_template = pbs_template.replace(key, value if isinstance(value, str) else str(value))

return pbs_template


@click.command()
@click.argument("config_file", nargs=1)
@click.argument("scene_source", nargs=1)
def pyrosar_gamma_workflow(config_file: str | os.PathLike, scene_source: str | os.PathLike) -> None:
"""Take an input of a single scene or file with multiple scenes and submit pyroSAR+GAMMA jobs
Parameters
----------
processing_dir : str
The directory to store configuration and jobscript files
scene_source : str
The file to be processed. Either a single .zip or a .txt containing multiple .zip files
"""

current_file_directory = Path(__file__).resolve().parent

with open(config_file, "rb") as f:
config = tomli.load(f)

# Extract specific configuration dictionaries
job_config = config["job"]
submission_config = job_config["submission"]
configuration_config = job_config["configuration"]
settings_config = job_config["settings"]

# Get folder structure
processing_dir = Path(job_config["root"])
log_dir = processing_dir / submission_config["root"] / submission_config["logs"]

# Get scenes from source
scene_list = get_list_of_scenes(scene_source)

for scene_path in scene_list:
# Determine scene ID from command line input and create submission script
scene_id = Path(scene_path).stem
scene_script = log_dir / scene_id / f"{scene_id}.sh"
scene_script.parent.mkdir(exist_ok=True, parents=True)

# Read the workflow template and replace values
workflow_name = settings_config['workflow_config']
template_file = current_file_directory / f"{workflow_name}.txt"
print(template_file)
pbs_template = template_file.read_text()
pbs_template = update_pbs_template(
pbs_template,
scene_id,
job_config
)

# Write updated text to pbs script
scene_script.write_text(pbs_template)

# Submit script
qsub_command = f"qsub {scene_script}"
os.system(qsub_command)


if __name__ == "__main__":
pyrosar_gamma_workflow()
27 changes: 27 additions & 0 deletions sar_antarctica/nci/submission/pyrosar_gamma/pyrosar_gamma.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

#PBS -l ncpus=<NCPU>
#PBS -l mem=<MEM>GB
#PBS -q <QUEUE>
#PBS -P <PROJECT>
#PBS -l walltime=<WALLTIME>
#PBS -l storage=<STORAGE>
#PBS -l wd
#PBS -o <LOG_DIR>/<SCENE_ID>
#PBS -e <LOG_DIR>/<SCENE_ID>
#PBS -N <SCENE_ID>

# Load the module
module use /g/data/yp75/modules/modulefiles
module load sar-antarctica/test-20250108

# Activate micromamba environment
micromamba activate sar-antarctica

# Run preparation script
cd /g/data/yp75/ca6983/repositories/sar-antarctica/sar_antarctica/nci/preparation/
python create_config.py <SCENE_ID> <SCENE_CONFIG>

# Run preocessing script
cd /g/data/yp75/ca6983/repositories/sar-antarctica/sar_antarctica/nci/processing/pyroSAR/
python pyrosar_geocode.py <WORKFLOW_CONFIG> <SCENE_CONFIG>

0 comments on commit 5b53ed5

Please sign in to comment.