From 874ddcbb77aeb6c43ba36a9a27ad64f2a5f0b605 Mon Sep 17 00:00:00 2001
From: raphaelrpl <raphael.wcosta@gmail.com>
Date: Wed, 4 Oct 2023 08:38:32 -0300
Subject: [PATCH] :art: review parser scene support for sentinel-3

---
 bdc_collectors/dataspace/__init__.py | 10 ++---
 bdc_collectors/dataspace/odata.py    | 18 ++++++--
 bdc_collectors/scihub/parser.py      | 53 +++++++++++++++++++++++
 bdc_collectors/scihub/sentinel2.py   | 65 +++++++++++++++++++++++++++-
 4 files changed, 136 insertions(+), 10 deletions(-)

diff --git a/bdc_collectors/dataspace/__init__.py b/bdc_collectors/dataspace/__init__.py
index 334dd5d..da915d8 100644
--- a/bdc_collectors/dataspace/__init__.py
+++ b/bdc_collectors/dataspace/__init__.py
@@ -33,7 +33,7 @@
 
 from ..base import BaseProvider, BulkDownloadResult, SceneResult, SceneResults
 from ..exceptions import DataOfflineError, DownloadError
-from ..scihub.sentinel2 import Sentinel1, Sentinel2
+from ..scihub.sentinel2 import Sentinel1, Sentinel2, Sentinel3
 from ..utils import download_stream, import_entry
 from ._token import TokenManager
 from .odata import ODATAStrategy
@@ -104,6 +104,7 @@ def __init__(self, username: str, password: str, strategy: t.Optional[BaseProvid
         self.collections = {
             "SENTINEL-1": Sentinel1,
             "SENTINEL-2": Sentinel2,
+            "SENTINEL-3": Sentinel3
         }
 
         manager_options = {k: v for k, v in kwargs.items() if k.startswith("token_")}
@@ -119,14 +120,13 @@ def download(self, query: t.Union[SceneResult, str], output: str, *args, **kwarg
         if not isinstance(query, SceneResult):
             item_ids = kwargs.get("ids", [])
 
-            scene = ""
-            if query.startswith("S2"):
-                scene = query
+            scene = query
 
             if kwargs.get("sceneid") or kwargs.get("scene_id"):
                 scene: str = kwargs.get("sceneid", kwargs.get("scene_id"))
 
-            if not scene.endswith(".SAFE"):
+            # Helper to set up SAFE files for Sentinel-1 and Sentinel-2
+            if not scene.endswith(".SAFE") and scene[:2] in ("S1", "S2"):
                 scene = f"{scene}.SAFE"
 
             item_ids.append(scene)
diff --git a/bdc_collectors/dataspace/odata.py b/bdc_collectors/dataspace/odata.py
index 868b10a..f094b6a 100644
--- a/bdc_collectors/dataspace/odata.py
+++ b/bdc_collectors/dataspace/odata.py
@@ -55,8 +55,7 @@ def search(self, query, *args, **kwargs) -> SceneResults:
         if data.get("ids"):
             products = []
             for item_id in data["ids"]:
-                safe_id = f"{item_id}.SAFE" if not item_id.endswith(".SAFE") else item_id
-                products_found = self._retrieve_products(f"Name eq '{safe_id}'")
+                products_found = self._retrieve_products(f"Name eq '{item_id}'")
                 products.extend(products_found)
 
             return products
@@ -74,8 +73,19 @@ def search(self, query, *args, **kwargs) -> SceneResults:
         if data.get("end_date"):
             filters.append(f"ContentDate/Start lt {get_date_time(data.pop('end_date')).strftime(STAC_RFC_DATETIME)}")
 
-        if data.get("product"):
-            filters.append(f"Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq '{data.pop('product')}')")
+        # Specific attribute helpers
+        # TODO: Implement an adaptative method to deal these attribute names which supports comparators like eq/lt/gt etc
+        for entry in ["productType", "instrumentShortName"]:
+            if data.get(entry):
+                filters.append(f"Attributes/OData.CSC.StringAttribute/any(att:att/Name eq '{entry}' and att/OData.CSC.StringAttribute/Value eq '{data.pop(entry)}')")
+
+        # For unmapped attribute filter, the user may specify manual attributes
+        # attributes = ["Attributes/....... eq '10'"]
+        if data.get("attributes"):
+            if not isinstance(data["attributes"], t.Iterable):
+                raise TypeError("Invalid value for 'attributes'.")
+
+            filters.extend(data["attributes"])
 
         return self._retrieve_products(*filters)
 
diff --git a/bdc_collectors/scihub/parser.py b/bdc_collectors/scihub/parser.py
index 8170a3b..5c0d3fd 100644
--- a/bdc_collectors/scihub/parser.py
+++ b/bdc_collectors/scihub/parser.py
@@ -100,3 +100,56 @@ def satellite(self):
     def source(self):
         """Retrieve the scene first parameter (S2A/S2B)."""
         return self.fragments[0]
+
+
+class Sentinel3Scene(SceneParser):
+    """Define the parser of Sentinel 3 Scene identifiers.
+    
+    Follows the `Sentinel-3 Naming Convention <https://sentinels.copernicus.eu/web/sentinel/user-guides/sentinel-3-olci/naming-convention>`_."""
+
+    fragments: List[str]
+
+    def __init__(self, scene_id: str):
+        """Create the parser SentinelScene."""
+        super().__init__(scene_id)
+
+        fragments = scene_id.split('_')
+
+        if len(fragments) != 18 or not fragments[0].startswith('S3'):
+            raise RuntimeError(f'Invalid sentinel scene {scene_id}')
+
+        self.fragments = fragments
+
+    def tile_id(self):
+        """Retrieve the tile id value."""
+        return None
+
+    def sensing_date(self):
+        """Retrieve the scene sensing date."""
+        return datetime.strptime(self.fragments[7], '%Y%m%dT%H%M%S')
+
+    def processing_date(self):
+        """Retrieve the scene processing date."""
+        return datetime.strptime(self.fragments[9], '%Y%m%dT%H%M%S')
+
+    def satellite(self):
+        """Retrieve the Sentinel satellite - 3A/3B."""
+        part = self.fragments[0]
+
+        return part[-2:]
+
+    def source(self):
+        """Retrieve the scene first parameter (S3A/S3B)."""
+        return self.fragments[0]
+
+    def datatype_id(self):
+        """Return the scene data type identifier"""
+        return self.fragments[3:7]
+
+    def level(self) -> str:
+        """Return the scene level."""
+        return self.fragments[2]
+
+    def datasource(self) -> str:
+        """Retrieve the data source/consume name."""
+        return self.fragments[1]
\ No newline at end of file
diff --git a/bdc_collectors/scihub/sentinel2.py b/bdc_collectors/scihub/sentinel2.py
index 3b1c6d6..f63f966 100644
--- a/bdc_collectors/scihub/sentinel2.py
+++ b/bdc_collectors/scihub/sentinel2.py
@@ -21,8 +21,9 @@
 
 from flask import current_app
 
+from ..utils import entry_version
 from .base import SentinelCollection
-from .parser import Sentinel1Scene
+from .parser import Sentinel1Scene, Sentinel3Scene
 
 
 class Sentinel1(SentinelCollection):
@@ -53,3 +54,65 @@ def get_files(self, collection, path=None, prefix=None):
 
 class Sentinel2(SentinelCollection):
     """Simple abstraction for Sentinel-2."""
+
+
+class Sentinel3(SentinelCollection):
+    """Simple abstraction for Sentinel-3."""
+
+    parser_class = Sentinel3Scene
+
+    def get_files(self, collection, path=None, prefix=None):
+        """List all files in the collection."""
+        if path is None:
+            path = self.path(collection, prefix)
+
+        path = Path(path)
+
+        output = dict()
+
+        for entry in path.rglob("*.nc"):
+            output[entry.stem] = entry
+
+        return output
+
+    def compressed_file(self, collection, prefix=None, path_include_month=False):
+        """Retrieve path to the compressed scene (.zip) on local storage."""
+        if prefix is None:
+            prefix = current_app.config.get('DATA_DIR')
+
+        scene_id = self.parser.scene_id
+        scene_path = self.path(collection, prefix=prefix, path_include_month=path_include_month)
+
+        return scene_path / f'{scene_id}.zip'
+
+    def path(self, collection, prefix=None, path_include_month=False) -> Path:
+        """Retrieve the relative path to the Collection on Brazil Data Cube cluster."""
+        if prefix is None:
+            prefix = current_app.config.get('DATA_DIR')
+
+        year = str(self.parser.sensing_date().year)
+        month = str(self.parser.sensing_date().month)
+        version = entry_version(collection.version)
+        scene_id = self.parser.scene_id
+
+        relative = Path(collection.name) / version / year / month / scene_id
+
+        scene_path = Path(prefix or '') / relative
+
+        return scene_path
+
+    def get_assets(self, collection, path=None, prefix=None) -> dict:
+        """Retrieve the map assets of Sentinel product."""
+        if path is None:
+            path = self.path(collection, prefix=prefix)
+
+        path = Path(path)
+
+        output = dict()
+
+        thumbnail = list(path.rglob('*.jpg'))
+
+        if thumbnail:
+            output['thumbnail'] = str(thumbnail)
+
+        return output
\ No newline at end of file