From f2ddb2f06e91e90fc43e6c00a250f780280119d4 Mon Sep 17 00:00:00 2001 From: raphaelrpl Date: Fri, 22 Sep 2023 10:35:05 -0300 Subject: [PATCH] :books: review docs and :bookmark: prepare release 1.0 --- CHANGES.rst | 8 ++++++++ USAGE.rst | 20 +++++++++++++++++++ bdc_collectors/dataspace/__init__.py | 29 +++++++++++++++++++-------- bdc_collectors/dataspace/_cache.py | 4 ++-- bdc_collectors/dataspace/_token.py | 2 ++ bdc_collectors/utils.py | 2 +- bdc_collectors/version.py | 2 +- docs/sphinx/api.rst | 30 ++++++++++++++++++++++++++++ 8 files changed, 85 insertions(+), 12 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 116253b..0562b26 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,14 @@ Changes ======= +Version 1.0.0 (2023-09-22) +-------------------------- + +- Review module dependencies +- Add driver for Copernicus DataSpace EcoSystem. +- Improve docs for command line and downloading. + + Version 0.9.0 (2023-01-26) -------------------------- diff --git a/USAGE.rst b/USAGE.rst index 49bed83..5266651 100644 --- a/USAGE.rst +++ b/USAGE.rst @@ -156,6 +156,12 @@ To download Landsat-8 Digital Number from `USGS Earth Explorer `_:: bdc-collector download --provider=SciHub \ @@ -173,6 +179,20 @@ To download L2A:: --password=password +Dataspace +~~~~~~~~~ + + +To download Sentinel-2 from `Dataspace EcoSystem `_:: + + bdc-collector download --provider=Dataspace \ + --scene-id=S2B_MSIL2A_20200930T135119_N0214_R024_T21KXA_20200930T175714 \ + --output=. \ + --dataset SENTINEL-2 \ + --username=user \ + --password=password + + Google Public Data Sets ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/bdc_collectors/dataspace/__init__.py b/bdc_collectors/dataspace/__init__.py index 200248b..d2c3b2c 100644 --- a/bdc_collectors/dataspace/__init__.py +++ b/bdc_collectors/dataspace/__init__.py @@ -24,13 +24,14 @@ import logging import os import shutil +import time import typing as t from urllib.parse import ParseResult, urlparse import requests from ..base import BaseProvider, BulkDownloadResult, SceneResult, SceneResults -from ..exceptions import DataOfflineError +from ..exceptions import DataOfflineError, DownloadError from ..scihub.sentinel2 import Sentinel1, Sentinel2 from ..utils import download_stream, import_entry from ._token import TokenManager @@ -60,7 +61,7 @@ class DataspaceProvider(BaseProvider): `Access Token `_, an ``access_token`` is required to download data. By default, this module stores these tokens in :class:`bdc_collectors.dataspace._token.TokenManager`. Whenever a download is initiated by - :method:`bdc_collectors.dataspace.DataspaceProvider.download`, the bdc-collectors creates two (2) access tokens + :meth:`bdc_collectors.dataspace.DataspaceProvider.download`, the bdc-collectors creates two (2) access tokens in memory and then use it to download as many scenes as can. When the token expires, it automatically refresh a new token. @@ -75,6 +76,7 @@ class DataspaceProvider(BaseProvider): You may change the API backend with command: + >>> from bdc_collectors.dataspace.stac import StacStrategy >>> stac = StacStrategy() >>> provider = DataspaceProvider(username='user@email.com', password='passwd', strategy=stac) @@ -151,14 +153,25 @@ def download(self, query: t.Union[SceneResult, str], output: str, *args, **kwarg download_url = parsed_changed.geturl() - token = self._token_manager.get_token() + # Retry 3 times before reject + for i in range(3): + token = self._token_manager.get_token() + + headers = {"Authorization": f"Bearer {token.token}"} + self.session.headers = headers + try: + response = self.session.get(download_url, stream=True, timeout=600, allow_redirects=True) + + # TODO: Validate Offline/Exception to retry later Checksum + download_stream(tmp_file, response, progress=self._kwargs.get("progress", False)) - headers = {"Authorization": f"Bearer {token.token}"} - self.session.headers = headers - response = self.session.get(download_url, stream=True, timeout=600, allow_redirects=True) + break + except Exception: + logging.debug(f"Error in download {query.scene_id}") + time.sleep(3) - # TODO: Validate Offline/Exception to retry later Checksum - download_stream(tmp_file, response, progress=self._kwargs.get("progress", False)) + if i == 2: + raise DownloadError(f"Could not download {query.scene_id}") shutil.move(tmp_file, target_file) diff --git a/bdc_collectors/dataspace/_cache.py b/bdc_collectors/dataspace/_cache.py index 27c43a6..2bc8267 100644 --- a/bdc_collectors/dataspace/_cache.py +++ b/bdc_collectors/dataspace/_cache.py @@ -19,8 +19,8 @@ """Define a minimal cache strategy for Dataspace metadata. This file contains the following strategies: -- RedisStrategy -- RawDictStrategy +- :class:`bdc_collectors.dataspace._cache.RedisStrategy` +- :class:`bdc_collectors.dataspace._cache.RawDictStrategy` """ import os diff --git a/bdc_collectors/dataspace/_token.py b/bdc_collectors/dataspace/_token.py index ebe665b..9c45bbf 100644 --- a/bdc_collectors/dataspace/_token.py +++ b/bdc_collectors/dataspace/_token.py @@ -37,6 +37,7 @@ class TokenManager: Examples: Use the TokenManager as following to generate a new token: + >>> from bdc_collectors.dataspace._token import TokenManager >>> manager = TokenManager("username", "password") >>> token = manager.get_token() @@ -44,6 +45,7 @@ class TokenManager: >>> another = manager.get_token() You can also use Redis Backend for token management. (Make sure you have the library 'redis' installed and server up and running.) + >>> from bdc_collectors.dataspace._cache import RedisStrategy >>> from bdc_collectors.dataspace._token import TokenManager >>> manager = TokenManager("username", "password", token_cache=RedisStrategy()) diff --git a/bdc_collectors/utils.py b/bdc_collectors/utils.py index 060dfc7..392f8e5 100644 --- a/bdc_collectors/utils.py +++ b/bdc_collectors/utils.py @@ -27,8 +27,8 @@ import dateutil import requests -from shapely import from_wkt from shapely.geometry import base, shape +from shapely.wkt import loads as from_wkt from tqdm import tqdm from .exceptions import DownloadError diff --git a/bdc_collectors/version.py b/bdc_collectors/version.py index c856c0d..6ca450f 100644 --- a/bdc_collectors/version.py +++ b/bdc_collectors/version.py @@ -18,4 +18,4 @@ """Version information for BDC-Collectors.""" -__version__ = '0.9.0' +__version__ = '1.0.0' diff --git a/docs/sphinx/api.rst b/docs/sphinx/api.rst index 32b21fe..8cf1c19 100644 --- a/docs/sphinx/api.rst +++ b/docs/sphinx/api.rst @@ -40,6 +40,10 @@ Providers SciHub Copernicus +++++++++++++++++ +.. deprecated: 1.0 + + Use `Dataspace`_ instead. + .. automodule:: bdc_collectors.scihub :members: @@ -79,6 +83,32 @@ MODIS :members: +.. _Dataspace: + +Copernicus Dataspace EcoSystem +++++++++++++++++++++++++++++++ + +.. automodule:: bdc_collectors.dataspace + :members: + + +Dataspace API Implementations + +.. automodule:: bdc_collectors.dataspace.odata + :members: + + +.. automodule:: bdc_collectors.dataspace.stac + :members: + + +.. automodule:: bdc_collectors.dataspace._token + :members: + + +.. automodule:: bdc_collectors.dataspace._cache + :members: + Exceptions ----------