Skip to content

Commit

Permalink
Merge pull request #3 from GeoscienceAustralia/scene-and-orbit-tests
Browse files Browse the repository at this point in the history
Functions and tests for managing scenes and orbits
  • Loading branch information
caitlinadams authored Jan 9, 2025
2 parents 42e2e33 + da5aab3 commit 581347f
Show file tree
Hide file tree
Showing 8 changed files with 264 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
- name: Run tests
shell: micromamba-shell {0}
run: |
pytest
pytest tests/sar_antarctica/
4 changes: 2 additions & 2 deletions sar_antarctica/nci/preparation/find_scene.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import click
from pathlib import Path

from scenes import find_scene_file_from_id
from orbits import find_latest_orbit_for_scene
from sar_antarctica.nci.preparation.scenes import find_scene_file_from_id
from sar_antarctica.nci.preparation.orbits import find_latest_orbit_for_scene

@click.command()
@click.argument("scene_id")
Expand Down
85 changes: 59 additions & 26 deletions sar_antarctica/nci/preparation/orbits.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,36 @@
from datetime import datetime
from pathlib import Path
import re
from typing import Optional

from scenes import parse_scene_file_dates
from sar_antarctica.nci.preparation.scenes import parse_scene_file_dates, parse_scene_file_sensor

# Constants for NCI
S1_DIR = Path("/g/data/fj7/Copernicus/Sentinel-1/")
POE_DIR = "POEORB"
RES_DIR = "RESORB"
ORBIT_DIRS = [POE_DIR, RES_DIR]
SENSORS = ["S1A", "S1B"]

def parse_orbit_file_dates(orbit_file_name: str) -> tuple[datetime, datetime, datetime]:
"""
Extracts published_date, start_date, and end_date from the given orbit file.
"""Extracts published_date, start_date, and end_date from the given orbit file.
Filename example: S1A_OPER_AUX_POEORB_OPOD_20141207T123431_V20141115T225944_20141117T005944.EOF
- Published: 20141207T123431
- Start: 20141115T225944
- End: 20141117T005944
Args:
file_name (str): The orbit file name as a string.
Parameters
----------
orbit_file_name : str
The orbit file name as a string.
Returns
-------
tuple[datetime, datetime, datetime]
a tuple of datetimes for published, start and end of the orbit file
Returns:
tuple(datetime): a tuple of datetimes for published, start and end of the orbit file
Raises
------
ValueError
Did not find a match to the expected date pattern of published_date followed by start_date and end_date
"""
# Regex pattern to match the dates
pattern = (r"(?P<published_date>\d{8}T\d{6})_V"
Expand All @@ -43,34 +50,60 @@ def parse_orbit_file_dates(orbit_file_name: str) -> tuple[datetime, datetime, da

return (published_date, start_date, stop_date)

def find_latest_orbit_for_scene(scene_id: str, poe_only: bool = True) -> Path:
"""
Identifies the most recent orbit file available for a given scene, based
def find_latest_orbit_for_scene(scene_id: str, orbit_type: Optional[str] = None) -> Path:
"""Identifies the most recent orbit file available for a given scene, based
on the scene's start and end date.
Parameters
----------
scene_id : str
Sentinel-1 scene ID
e.g. S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F6
orbit_type : Optional[str], optional
Any of "POE" for POE orbits, "RES" for RES orbits, or None, by default None
Returns
-------
Path
Full file path to latest orbit file on NCI
Raises
------
ValueError
orbit_type must be one of "POE", "RES" or None
ValueError
No valid orbit file was found
"""

scene_start, scene_stop = parse_scene_file_dates(scene_id)
scene_sensor = parse_scene_file_sensor(scene_id)

relevant_orbits = []

for orbit_dir in ORBIT_DIRS:
if orbit_type == "POE":
orbit_directories = [POE_DIR]
elif orbit_type == "RES":
orbit_directories = [RES_DIR]
elif orbit_type is None:
orbit_directories = [RES_DIR, POE_DIR]
else:
raise ValueError("orbit_type must be one of 'POE', 'RES', or None")

# Find all orbits for the sensor that fall within the date range of the scene
for orbit_dir in orbit_directories:
orbit_dir_path = S1_DIR / orbit_dir
for sensor in SENSORS:
orbit_files_path = orbit_dir_path / sensor
orbit_files = orbit_files_path.glob("*.EOF")
orbit_files_path = orbit_dir_path / scene_sensor
orbit_files = orbit_files_path.glob("*.EOF")

for orbit_file in orbit_files:
for orbit_file in orbit_files:

orbit_published, orbit_start, orbit_stop = parse_orbit_file_dates(orbit_file)
# Check if scene falls within orbit
if scene_start >= orbit_start and scene_stop <= orbit_stop:
orbit_metadata = (orbit_file, orbit_dir, orbit_published)
relevant_orbits.append(orbit_metadata)
orbit_published, orbit_start, orbit_stop = parse_orbit_file_dates(orbit_file)

# Check if scene falls within orbit
if scene_start >= orbit_start and scene_stop <= orbit_stop:
orbit_metadata = (orbit_file, orbit_dir, orbit_published)
relevant_orbits.append(orbit_metadata)

if poe_only:
relevant_orbits = [item for item in relevant_orbits if item[1] == POE_DIR]

# If relevant_orbits is empty, set latest_orbit to None
latest_orbit = max(relevant_orbits, key=lambda x: x[2]) if relevant_orbits else None

Expand Down
71 changes: 67 additions & 4 deletions sar_antarctica/nci/preparation/scenes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,55 @@

SCENE_DIR = Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/")

def parse_scene_file_dates(scene_id: str) -> tuple[datetime, datetime]:
def parse_scene_file_sensor(scene_id: str) -> str:
"""Extract Sentinel-1 sensor string (SA1,S1B,S1C,S1D) from scene ID
Parameters
----------
scene_id : str
Sentinel-1 scene ID
e.g. S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F6
Returns
-------
str
Sensor string. Should be one of S1A, S1B, S1C, or S1D
Raises
------
ValueError
Did not find any of S1A, S1B, S1C, or S1D in the scene ID
"""
Extracts start_date and end_date from the given scene ID.
# Expect files to be prefaced with any of S1A, S1B, S1C, or S1D, followed by underscore
pattern=r"^(S1[A|B|C|D])_"

match = re.match(pattern, scene_id)

if not match:
raise ValueError("No valid sensor was found in the scene ID. Valid sensors are S1A, S1B, S1C, or S1D")

return match.group(1)


def parse_scene_file_dates(scene_id: str) -> tuple[datetime, datetime]:
"""Extracts start_date and end_date from the given scene ID.
Parameters
----------
scene_id : str
Sentinel-1 scene ID
e.g. S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F6
Returns
-------
tuple[datetime, datetime]
A tuple containing the start and stop date for the scene as datetimes
e.g. (datetime(2022,06,12,12,3,48), datetime(2022,06,12,12,4,52))
Raises
------
ValueError
Did not find a match to the expected date pattern of start_date followed by end_date in the scene ID
"""
# Regex pattern to match the dates
pattern = (r"(?P<start_date>\d{8}T\d{6})_"
Expand All @@ -23,8 +69,25 @@ def parse_scene_file_dates(scene_id: str) -> tuple[datetime, datetime]:
return (start_date, stop_date)

def find_scene_file_from_id(scene_id: str) -> Path:
"""
Finds the path to the scene on GADI based on the scene ID
"""Finds the path to the scene on GADI based on the scene ID
Parameters
----------
scene_id : str
Sentinel-1 scene ID
e.g. S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F6
Returns
-------
Path
Location of scene on NCI GADI
Raises
------
RuntimeError
Found more than one file -- expects one
RuntimeError
Found no files -- expects one. Or another Error
"""

# Parse the scene dates -- only start date is needed for search
Expand Down
54 changes: 54 additions & 0 deletions tests/filesystem/test_filesystem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from sar_antarctica.nci.preparation.orbits import find_latest_orbit_for_scene
from sar_antarctica.nci.preparation.scenes import find_scene_file_from_id

import dataclasses
from datetime import datetime
from pathlib import Path
import pytest

@dataclasses.dataclass
class Scene:
id: str
file: Path
sensor: str
start_date: datetime
stop_date: datetime
latest_orbit: Path
latest_poe_orbit: Path
latest_res_orbit: Path


scene_1 = Scene(
id="S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F66",
file=Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/2022/2022-06/65S115E-70S120E/S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F66.zip"),
sensor="S1A",
start_date=datetime(2022,6,12,12,3,48),
stop_date=datetime(2022,6,12,12,4,52),
latest_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/POEORB/S1A/S1A_OPER_AUX_POEORB_OPOD_20220702T081845_V20220611T225942_20220613T005942.EOF"),
latest_poe_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/POEORB/S1A/S1A_OPER_AUX_POEORB_OPOD_20220702T081845_V20220611T225942_20220613T005942.EOF"),
latest_res_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/RESORB/S1A/S1A_OPER_AUX_RESORB_OPOD_20220612T143829_V20220612T104432_20220612T140202.EOF"),
)

scene_2 = Scene(
id="S1B_EW_GRDM_1SDH_20191130T165626_20191130T165726_019159_0242A2_2F58",
file=Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/2019/2019-11/65S160E-70S165E/S1B_EW_GRDM_1SDH_20191130T165626_20191130T165726_019159_0242A2_2F58.zip"),
sensor="S1B",
start_date=datetime(2019,11,30,16,56,26),
stop_date=datetime(2019,11,30,16,57,26),
latest_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/POEORB/S1B/S1B_OPER_AUX_POEORB_OPOD_20191220T110516_V20191129T225942_20191201T005942.EOF"),
latest_poe_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/POEORB/S1B/S1B_OPER_AUX_POEORB_OPOD_20191220T110516_V20191129T225942_20191201T005942.EOF"),
latest_res_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/RESORB/S1B/S1B_OPER_AUX_RESORB_OPOD_20191130T210136_V20191130T154804_20191130T190534.EOF"),
)

scenes = [scene_1, scene_2]

@pytest.mark.parametrize("scene", scenes)
def test_find_latest_orbit_for_scene(scene: Scene):
assert find_latest_orbit_for_scene(scene.id) == scene.latest_orbit
assert find_latest_orbit_for_scene(scene.id, orbit_type="RES") == scene.latest_res_orbit
assert find_latest_orbit_for_scene(scene.id, orbit_type="POE") == scene.latest_poe_orbit


@pytest.mark.parametrize("scene", scenes)
def test_find_scene_file_from_id(scene: Scene):
assert find_scene_file_from_id(scene.id) == scene.file
File renamed without changes.
33 changes: 33 additions & 0 deletions tests/sar_antarctica/test_orbits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from sar_antarctica.nci.preparation.orbits import parse_orbit_file_dates

from pathlib import Path
import pytest
import dataclasses
from datetime import datetime

@dataclasses.dataclass
class Orbit:
file: str
published_date: datetime
start_date: datetime
stop_date: datetime

orbit_1 = Orbit(
file="S1A_OPER_AUX_POEORB_OPOD_20141207T123431_V20141115T225944_20141117T005944.EOF",
published_date=datetime(2014, 12, 7,12,34,31),
start_date=datetime(2014,11,15,22,59,44),
stop_date=datetime(2014,11,17,0,59,44)
)
orbit_2 = Orbit(
file="S1A_OPER_AUX_POEORB_OPOD_20191220T120706_V20191129T225942_20191201T005942.EOF",
published_date=datetime(2019,12,20,12,7,6),
start_date=datetime(2019,11,29,22,59,42),
stop_date=datetime(2019,12,1,0,59,42)
)

orbits = [orbit_1, orbit_2]

@pytest.mark.parametrize("orbit", orbits)
def test_parse_orbit_file_dates(orbit: Orbit):
date_tuple = (orbit.published_date, orbit.start_date, orbit.stop_date)
assert parse_orbit_file_dates(orbit.file) == date_tuple
48 changes: 48 additions & 0 deletions tests/sar_antarctica/test_scenes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import datetime
from sar_antarctica.nci.preparation.scenes import (
parse_scene_file_dates,
parse_scene_file_sensor,
)

import dataclasses
from datetime import datetime
from pathlib import Path
import pytest

@dataclasses.dataclass
class Scene:
id: str
file: Path
sensor: str
start_date: datetime
stop_date: datetime

scene_1 = Scene(
id="S1A_EW_GRDM_1SDH_20200330T165825_20200330T165929_031907_03AF02_8570",
file=Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/2020/2020-03/70S050E-75S055E/S1A_EW_GRDM_1SDH_20200330T165825_20200330T165929_031907_03AF02_8570.zip"),
sensor="S1A",
start_date=datetime(2020,3,30,16,58,25),
stop_date=datetime(2020,3,30,16,59,29)
)

scene_2 = Scene(
id="S1B_EW_GRDM_1SDH_20210914T112333_20210914T112403_028693_036C96_3EA8",
file=Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/2021/2021-09/60S120E-65S125E/S1B_EW_GRDM_1SDH_20210914T112333_20210914T112403_028693_036C96_3EA8.zip"),
sensor="S1B",
start_date=datetime(2021,9,14,11,23,33),
stop_date=datetime(2021,9,14,11,24,3)
)

scenes = [scene_1, scene_2]

@pytest.mark.parametrize("scene", scenes)
def test_parse_scene_file_dates(scene: Scene):
date_tuple = (scene.start_date, scene.stop_date)
assert parse_scene_file_dates(scene.id) == date_tuple


@pytest.mark.parametrize("scene", scenes)
def test_parse_scene_file_sensor(scene: Scene):
assert parse_scene_file_sensor(scene.id) == scene.sensor


0 comments on commit 581347f

Please sign in to comment.