From 874ddcbb77aeb6c43ba36a9a27ad64f2a5f0b605 Mon Sep 17 00:00:00 2001 From: raphaelrpl Date: Wed, 4 Oct 2023 08:38:32 -0300 Subject: [PATCH] :art: review parser scene support for sentinel-3 --- bdc_collectors/dataspace/__init__.py | 10 ++--- bdc_collectors/dataspace/odata.py | 18 ++++++-- bdc_collectors/scihub/parser.py | 53 +++++++++++++++++++++++ bdc_collectors/scihub/sentinel2.py | 65 +++++++++++++++++++++++++++- 4 files changed, 136 insertions(+), 10 deletions(-) diff --git a/bdc_collectors/dataspace/__init__.py b/bdc_collectors/dataspace/__init__.py index 334dd5d..da915d8 100644 --- a/bdc_collectors/dataspace/__init__.py +++ b/bdc_collectors/dataspace/__init__.py @@ -33,7 +33,7 @@ from ..base import BaseProvider, BulkDownloadResult, SceneResult, SceneResults from ..exceptions import DataOfflineError, DownloadError -from ..scihub.sentinel2 import Sentinel1, Sentinel2 +from ..scihub.sentinel2 import Sentinel1, Sentinel2, Sentinel3 from ..utils import download_stream, import_entry from ._token import TokenManager from .odata import ODATAStrategy @@ -104,6 +104,7 @@ def __init__(self, username: str, password: str, strategy: t.Optional[BaseProvid self.collections = { "SENTINEL-1": Sentinel1, "SENTINEL-2": Sentinel2, + "SENTINEL-3": Sentinel3 } manager_options = {k: v for k, v in kwargs.items() if k.startswith("token_")} @@ -119,14 +120,13 @@ def download(self, query: t.Union[SceneResult, str], output: str, *args, **kwarg if not isinstance(query, SceneResult): item_ids = kwargs.get("ids", []) - scene = "" - if query.startswith("S2"): - scene = query + scene = query if kwargs.get("sceneid") or kwargs.get("scene_id"): scene: str = kwargs.get("sceneid", kwargs.get("scene_id")) - if not scene.endswith(".SAFE"): + # Helper to set up SAFE files for Sentinel-1 and Sentinel-2 + if not scene.endswith(".SAFE") and scene[:2] in ("S1", "S2"): scene = f"{scene}.SAFE" item_ids.append(scene) diff --git a/bdc_collectors/dataspace/odata.py b/bdc_collectors/dataspace/odata.py index 868b10a..f094b6a 100644 --- a/bdc_collectors/dataspace/odata.py +++ b/bdc_collectors/dataspace/odata.py @@ -55,8 +55,7 @@ def search(self, query, *args, **kwargs) -> SceneResults: if data.get("ids"): products = [] for item_id in data["ids"]: - safe_id = f"{item_id}.SAFE" if not item_id.endswith(".SAFE") else item_id - products_found = self._retrieve_products(f"Name eq '{safe_id}'") + products_found = self._retrieve_products(f"Name eq '{item_id}'") products.extend(products_found) return products @@ -74,8 +73,19 @@ def search(self, query, *args, **kwargs) -> SceneResults: if data.get("end_date"): filters.append(f"ContentDate/Start lt {get_date_time(data.pop('end_date')).strftime(STAC_RFC_DATETIME)}") - if data.get("product"): - filters.append(f"Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq '{data.pop('product')}')") + # Specific attribute helpers + # TODO: Implement an adaptative method to deal these attribute names which supports comparators like eq/lt/gt etc + for entry in ["productType", "instrumentShortName"]: + if data.get(entry): + filters.append(f"Attributes/OData.CSC.StringAttribute/any(att:att/Name eq '{entry}' and att/OData.CSC.StringAttribute/Value eq '{data.pop(entry)}')") + + # For unmapped attribute filter, the user may specify manual attributes + # attributes = ["Attributes/....... eq '10'"] + if data.get("attributes"): + if not isinstance(data["attributes"], t.Iterable): + raise TypeError("Invalid value for 'attributes'.") + + filters.extend(data["attributes"]) return self._retrieve_products(*filters) diff --git a/bdc_collectors/scihub/parser.py b/bdc_collectors/scihub/parser.py index 8170a3b..5c0d3fd 100644 --- a/bdc_collectors/scihub/parser.py +++ b/bdc_collectors/scihub/parser.py @@ -100,3 +100,56 @@ def satellite(self): def source(self): """Retrieve the scene first parameter (S2A/S2B).""" return self.fragments[0] + + +class Sentinel3Scene(SceneParser): + """Define the parser of Sentinel 3 Scene identifiers. + + Follows the `Sentinel-3 Naming Convention `_.""" + + fragments: List[str] + + def __init__(self, scene_id: str): + """Create the parser SentinelScene.""" + super().__init__(scene_id) + + fragments = scene_id.split('_') + + if len(fragments) != 18 or not fragments[0].startswith('S3'): + raise RuntimeError(f'Invalid sentinel scene {scene_id}') + + self.fragments = fragments + + def tile_id(self): + """Retrieve the tile id value.""" + return None + + def sensing_date(self): + """Retrieve the scene sensing date.""" + return datetime.strptime(self.fragments[7], '%Y%m%dT%H%M%S') + + def processing_date(self): + """Retrieve the scene processing date.""" + return datetime.strptime(self.fragments[9], '%Y%m%dT%H%M%S') + + def satellite(self): + """Retrieve the Sentinel satellite - 3A/3B.""" + part = self.fragments[0] + + return part[-2:] + + def source(self): + """Retrieve the scene first parameter (S3A/S3B).""" + return self.fragments[0] + + def datatype_id(self): + """Return the scene data type identifier""" + return self.fragments[3:7] + + def level(self) -> str: + """Return the scene level.""" + return self.fragments[2] + + def datasource(self) -> str: + """Retrieve the data source/consume name.""" + return self.fragments[1] \ No newline at end of file diff --git a/bdc_collectors/scihub/sentinel2.py b/bdc_collectors/scihub/sentinel2.py index 3b1c6d6..f63f966 100644 --- a/bdc_collectors/scihub/sentinel2.py +++ b/bdc_collectors/scihub/sentinel2.py @@ -21,8 +21,9 @@ from flask import current_app +from ..utils import entry_version from .base import SentinelCollection -from .parser import Sentinel1Scene +from .parser import Sentinel1Scene, Sentinel3Scene class Sentinel1(SentinelCollection): @@ -53,3 +54,65 @@ def get_files(self, collection, path=None, prefix=None): class Sentinel2(SentinelCollection): """Simple abstraction for Sentinel-2.""" + + +class Sentinel3(SentinelCollection): + """Simple abstraction for Sentinel-3.""" + + parser_class = Sentinel3Scene + + def get_files(self, collection, path=None, prefix=None): + """List all files in the collection.""" + if path is None: + path = self.path(collection, prefix) + + path = Path(path) + + output = dict() + + for entry in path.rglob("*.nc"): + output[entry.stem] = entry + + return output + + def compressed_file(self, collection, prefix=None, path_include_month=False): + """Retrieve path to the compressed scene (.zip) on local storage.""" + if prefix is None: + prefix = current_app.config.get('DATA_DIR') + + scene_id = self.parser.scene_id + scene_path = self.path(collection, prefix=prefix, path_include_month=path_include_month) + + return scene_path / f'{scene_id}.zip' + + def path(self, collection, prefix=None, path_include_month=False) -> Path: + """Retrieve the relative path to the Collection on Brazil Data Cube cluster.""" + if prefix is None: + prefix = current_app.config.get('DATA_DIR') + + year = str(self.parser.sensing_date().year) + month = str(self.parser.sensing_date().month) + version = entry_version(collection.version) + scene_id = self.parser.scene_id + + relative = Path(collection.name) / version / year / month / scene_id + + scene_path = Path(prefix or '') / relative + + return scene_path + + def get_assets(self, collection, path=None, prefix=None) -> dict: + """Retrieve the map assets of Sentinel product.""" + if path is None: + path = self.path(collection, prefix=prefix) + + path = Path(path) + + output = dict() + + thumbnail = list(path.rglob('*.jpg')) + + if thumbnail: + output['thumbnail'] = str(thumbnail) + + return output \ No newline at end of file