Skip to content

Commit

Permalink
Merge pull request #17 from GeoscienceAustralia/aws-upload
Browse files Browse the repository at this point in the history
Push to S3
  • Loading branch information
caitlinadams authored Jan 31, 2025
2 parents 17e654c + 0196706 commit 3c9e03a
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 9 deletions.
22 changes: 13 additions & 9 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,16 @@ dependencies:
- zstandard=0.23.0
- zstd=1.5.6
- pip:
- exceptiongroup==1.2.2
- geopandas==1.0.1
- iniconfig==2.0.0
- pandas==2.2.3
- pluggy==1.5.0
- pyogrio==0.10.0
- pyproj==3.7.0
- pytest==8.3.4
- tzdata==2024.2
- boto3==1.36.4
- botocore==1.36.4
- exceptiongroup==1.2.2
- geopandas==1.0.1
- iniconfig==2.0.0
- jmespath==1.0.1
- pandas==2.2.3
- pluggy==1.5.0
- pyogrio==0.10.0
- pyproj==3.7.0
- pytest==8.3.4
- s3transfer==0.11.1
- tzdata==2024.2
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ find-scene = "sar_antarctica.nci.cli:find_scene_file"
find-orbits = "sar_antarctica.nci.cli:find_orbits_for_scene"
run-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:run_pyrosar_gamma_workflow"
submit-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:submit_pyrosar_gamma_workflow"
upload-files-in-folder-to-s3 = "sar_antarctica.nci.cli:upload_files_in_folder_to_s3"

[tool.pytest.ini_options]
testpaths = ["tests/*"]
Expand Down
34 changes: 34 additions & 0 deletions sar_antarctica/nci/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import click
from pathlib import Path
import tomli
import logging

from sar_antarctica.nci.filesystem import get_orbits_nci
from sar_antarctica.nci.submission.pyrosar_gamma.prepare_input import (
Expand All @@ -18,7 +19,9 @@
run_pyrosar_gamma_geocode,
)
from sar_antarctica.nci.submission.pyrosar_gamma.submit_job import submit_job
from sar_antarctica.nci.upload.push_folder_to_s3 import push_files_in_folder_to_s3

logging.basicConfig(level=logging.INFO)

@click.command()
@click.argument("scene_name", type=str)
Expand Down Expand Up @@ -172,3 +175,34 @@ def find_orbits_for_scene(scene: str):
)
for orbit in relevant_res_paths:
print(orbit["orbit"])


@click.command()
@click.argument('src_folder', type=click.Path(exists=True, file_okay=False))
@click.argument('s3_bucket')
@click.argument('s3_bucket_folder')
@click.option('--upload-folder',
default=False,
is_flag=True,
help="Upload the whole folder to specified s3_bucket_folder.")
@click.option('--exclude-extensions', '-e', multiple=True, help="File extensions to exclude, e.g., '.txt', '.log'")
@click.option('--exclude-files', '-f', multiple=True, help="Specific files to exclude, e.g., 'config.json'")
@click.option('--region-name', default='ap-southeast-2', show_default=True, help="AWS region name")
def upload_files_in_folder_to_s3(
src_folder : str,
s3_bucket : str,
s3_bucket_folder : str,
upload_folder : bool,
exclude_extensions : list[str] = [],
exclude_files : list[str] = [],
region_name : str = 'ap-southeast-2',
):
push_files_in_folder_to_s3(
src_folder = src_folder,
s3_bucket = s3_bucket,
s3_bucket_folder = s3_bucket_folder,
upload_folder = upload_folder,
exclude_extensions = exclude_extensions,
exclude_files = exclude_files,
region_name = region_name,
)
72 changes: 72 additions & 0 deletions sar_antarctica/nci/upload/push_folder_to_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import boto3
from pathlib import Path
import os
import click
import logging

def push_files_in_folder_to_s3(
src_folder : str,
s3_bucket : str,
s3_bucket_folder : str,
upload_folder : bool = False,
exclude_extensions : list[str] = [],
exclude_files : list[str] = [],
region_name : str = 'ap-southeast-2',
):
"""Upload the files in a local folder to an S3 bucket. The subfolder
structure in the specified folder is maintained in s3.
Parameters
----------
src_folder : str
Source folder containing files of interest
s3_bucket : str
S3 bucket to push to
s3_bucket_folder : str
Folder within bucket to push to
upload_folder : bool
upload the entire folder to the s3_bucket_folder.
If; src_folder = my/local_folder/ & s3_bucket_folder = s3/s3_folder
when True, all files uploaded to -> s3/s3_folder/local_folder/...
when False, all files uploaded to -> s3/s3_folder/...
exclude_extensions : list[str], optional
List of file extensions to exclude, by default []
exclude_files : list[str], optional
List of files to exclude, by default []
region_name : str, optional
_description_, by default 'ap-southeast-2'
"""

# search for credentials in envrionment and raise warning if not there
if os.environ.get('AWS_ACCESS_KEY_ID') is None:
wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket'
logging.warning(wrn_msg)
if os.environ.get('AWS_SECRET_ACCESS_KEY') is None:
wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket'
logging.warning(wrn_msg)


S3_CLIENT = boto3.client(
's3',
region_name=region_name
)

logging.info(f'Attempting to upload to S3 bucket : {s3_bucket}')

for root, dirs, files in os.walk(src_folder):
for file in files:
if exclude_extensions:
filename, file_extension = os.path.splitext(file)
if file_extension in exclude_extensions:
continue
if file in exclude_files:
continue
local_path = Path(root) / Path(file)
relative_path = Path(os.path.relpath(local_path, src_folder))
if not upload_folder:
s3_key = Path(os.path.join(s3_bucket_folder, relative_path).replace("\\", "/"))
else:
folder = Path(src_folder).name
s3_key = Path(os.path.join(s3_bucket_folder, folder, relative_path).replace("\\", "/"))
S3_CLIENT.upload_file(str(local_path), str(s3_bucket), str(s3_key))
logging.info(f"Uploaded {local_path} to s3://{s3_bucket}/{s3_key}")

0 comments on commit 3c9e03a

Please sign in to comment.