diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..6cc2fe6 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,139 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/sphinx/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Local Development +.vscode +.idea +docs/sphinx/_build +*.zip +*.tar.gz + +# General Mac Files +.DS_Store diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c2742bd --- /dev/null +++ b/.gitignore @@ -0,0 +1,140 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/sphinx/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Local Development +.vscode +.idea +doc/sphinx/_build + +# General Mac Files +.DS_Store + +# RabbitMQ +volumes/ \ No newline at end of file diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..836dd2d --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,23 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +version: 2 + +sphinx: + configuration: docs/sphinx/conf.py + +formats: [] + +python: + version: 3.7 + + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..5e7d38c --- /dev/null +++ b/.travis.yml @@ -0,0 +1,50 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +language: python + +os: linux + +cache: + directories: + - $HOME/.cache/pip + +git: + quiet: true + submodules: false + +branches: + only: + - master + - /^b-\d+\.\d+/ + +dist: bionic + +services: + - docker + +python: + - 3.7 + +before_install: + - pip install --upgrade pip + - pip install --upgrade setuptools + - docker pull postgis/postgis:12-3.0 + - docker run --name bdc-pg -p 127.0.0.1:5432:5432 -e POSTGRES_PASSWORD=postgres -d postgis/postgis:12-3.0 + - docker ps -a + +install: + - pip install -e .[tests,docs] + +script: + - ./run-tests.sh + +after_success: + - docker stop bdc-pg + - docker rm bdc-pg + - coveralls diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..84a527a --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,11 @@ +.. + This file is part of BDC-Collectors. + Copyright (C) 2020 INPE. + + BDC-Collectors is a free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + + +======= +Changes +======= diff --git a/INSTALL.rst b/INSTALL.rst new file mode 100644 index 0000000..7bbc454 --- /dev/null +++ b/INSTALL.rst @@ -0,0 +1,101 @@ +.. + This file is part of BDC-Collectors. + Copyright (C) 2019-2020 INPE. + + BDC-Collectors is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + + +Installation +============ + + +Development installation +------------------------ + + +Pre-Requirements +++++++++++++++++ + + +The ``Brazil Data Cube Collectors`` (``BDC-Collectors``) depends essentially on: + +- `Python Client Library for STAC (stac.py) ` + +- `Flask `_ + +- `BDC-Catalog `_. + +- `rasterio `_ + +- `Shapely `_ + + +Clone the software repository ++++++++++++++++++++++++++++++ + + +Use ``git`` to clone the software repository:: + + git clone https://github.com/brazil-data-cube/bdc-collectors.git + + +Install BDC-Collectors in Development Mode +++++++++++++++++++++++++++++++++++++++++++ + + +Go to the source code folder:: + + cd bdc-collectors + + +Install in development mode:: + + pip3 install -e .[all] + + +.. note:: + + If you want to create a new *Python Virtual Environment*, please, follow this instruction: + + *1.* Create a new virtual environment linked to Python 3.7:: + + python3.7 -m venv venv + + + **2.** Activate the new environment:: + + source venv/bin/activate + + + **3.** Update pip and setuptools:: + + pip3 install --upgrade pip + + pip3 install --upgrade setuptools + + +Build the Documentation ++++++++++++++++++++++++ + + +You can generate the documentation based on Sphinx with the following command:: + + python setup.py build_sphinx + + +The above command will generate the documentation in HTML and it will place it under: + +.. code-block:: shell + + doc/sphinx/_build/html/ + + +The above command will generate the documentation in HTML and it will place it under:: + + docs/sphinx/_build/html/ + + +You can open the above documentation in your favorite browser, as:: + + firefox docs/sphinx/_build/html/index.html \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a59bb5c --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019-2020 National Institute for Space Research (INPE). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..242c898 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,35 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +exclude docs/sphinx/_build +exclude .dockerignore + +include *.rst +include *.json +include *.yml +include *.sh +include *.txt +include LICENSE +include pytest.ini +include alembic.ini +recursive-exclude docs/sphinx/_build * +recursive-exclude migrations * +recursive-exclude docker * +recursive-include bdc_collectors *.py +recursive-include bdc_collectors *.sql +recursive-include docs *.bat +recursive-include docs *.css +recursive-include docs *.ico +recursive-include docs *.png +recursive-include docs *.py +recursive-include docs *.rst +recursive-include docs Makefile +recursive-include examples *.py +recursive-include examples Dockerfile +recursive-include tests *.py +recursive-include tests *.json \ No newline at end of file diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..bd24097 --- /dev/null +++ b/README.rst @@ -0,0 +1,88 @@ +.. + This file is part of BDC-Collectors. + Copyright (C) 2020 INPE. + + BDC-Collectors is a free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + + +======================================================================= +BDC Collectors Extension for Brazil Data Cube Applications and Services +======================================================================= + + +.. image:: https://img.shields.io/badge/license-MIT-green + :target: https://github.com/brazil-data-cube/bdc-collectors/blob/master/LICENSE + :alt: Software License + + +.. image:: https://travis-ci.org/brazil-data-cube/bdc-collectors.svg?branch=master + :target: https://travis-ci.org/brazil-data-cube/bdc-collectors + :alt: Build Status + + +.. image:: https://coveralls.io/repos/github/brazil-data-cube/bdc-collectors/badge.svg?branch=master + :target: https://coveralls.io/github/brazil-data-cube/bdc-collectors?branch=master + :alt: Code Coverage Test + + +.. image:: https://readthedocs.org/projects/bdc-collectors/badge/?version=latest + :target: https://bdc-collectors.readthedocs.io/en/latest + :alt: Documentation Status + + +.. image:: https://img.shields.io/badge/lifecycle-experimental-orange.svg + :target: https://www.tidyverse.org/lifecycle/#experimental + :alt: Software Life Cycle + + +.. image:: https://img.shields.io/github/tag/brazil-data-cube/bdc-collectors.svg + :target: https://github.com/brazil-data-cube/bdc-collectors/releases + :alt: Release + + +.. image:: https://img.shields.io/discord/689541907621085198?logo=discord&logoColor=ffffff&color=7389D8 + :target: https://discord.com/channels/689541907621085198# + :alt: Join us at Discord + + +About +===== + + +BDC-Collectors is a Flask extension for Brazil Data Cube to collect data from multiple providers. + + +Other applications can also define custom collectors using Python entry point in ``setup.py``. See the `USAGE.rst `_ documentation on how to take advantage of this package. + + +Installation +============ + + +See `INSTALL.rst <./INSTALL.rst>`_. + + +Usage +===== + + +See `USAGE.rst <./USAGE.rst>`_. + + +Developer Documentation +======================= + + +See https://bdc-collectors.readthedocs.io/en/latest. + + +License +======= + + +.. admonition:: + Copyright (C) 2020 INPE. + + BDC-Collectors is a free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. diff --git a/USAGE.rst b/USAGE.rst new file mode 100644 index 0000000..9dbc103 --- /dev/null +++ b/USAGE.rst @@ -0,0 +1,247 @@ +.. + This file is part of BDC-Collectors. + Copyright (C) 2019-2020 INPE. + + BDC-Collectors is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + + +Usage +===== + + +Command-Line Interface (CLI) +---------------------------- + + +The ``BDC-Collectors`` extension installs a command line tool named ``bdc-collector``: + +- ``show-providers``: List all the supported providers. + +- ``search``: Search for products on remote server. + +- ``download``: Download scenes from remote server. + +- ``priority``: Download scenes associated with ``bdc_catalog.models.Collection`` and ``bdc_catalog.models.Provider``. + + +Search Data sets +++++++++++++++++ + +The command ``search`` has the following parameters:: + + Search for data set in the given provider. + + Options: + -p --provider TEXT [required] Provider name to search. + -d --dataset TEXT [required] Data set name in provider. + -b --bbox TEXT [required] Bounding box definition (west, south, east, north). + -t --time TEXT [required] Time interval. (start/end). Format should be (YYYY-mm-dd) + -u --username TEXT Optional username used to search in provider. + --password TEXT Optional password used to search in provider. + --platform TEXT Platform sensor (if required) + --help Show this message and exit. + +SciHub +~~~~~~ + +To search for Sentinel-2 L1 in `SciHub `_ catalog:: + + bdc-collector search --provider=SciHub \ + --dataset=S2MSI1C \ + --platform=Sentinel-2 \ + --time=2020-01-01/2020-01-15 \ + --bbox=-54,-12,-50,-10 \ + --username=user \ + --password=password + + +To search for Sentinel-1 GRD in `SciHub `_ catalog:: + + bdc-collector search --provider=SciHub \ + --dataset=GRD \ + --platform=Sentinel-1 \ + --time=2020-01-01/2020-01-15 \ + --bbox=-54,-12,-50,-10 \ + --username=user \ + --password=password + +.. note:: + + Make sure to change ``--username`` and ``--password``. You can create an account in + `SciHub Registration `_. + + You can also search for `Sentinel-2` `L2A` products. Use ``dataset=S2MSI2A`` and ``platform=Sentinel-2``. + + +USGS +~~~~ + +To search for Landsat-8 Digital Number in `USGS Earth Explorer `_:: + + bdc-collector search --provider=USGS \ + --dataset=LANDSAT_8_C1 \ + --time=2020-01-01/2020-01-15 \ + --bbox=-54,-12,-50,-10 \ + --username=user \ + --password=password + + +.. note:: + + Make sure to change ``--username`` and ``--password``. You can create an account in + `USGS EROS Registration `_. + + You can also search for others Landsat products: + + - ``Landsat-4/5``, use ``dataset=LANDSAT_TM_C1`` + - ``Landsat-7``, use ``dataset=LANDSAT_ETM_C1`` + + In the future, we will implement a way to request for L8 Surface Reflectance data sets. + + +Download scenes ++++++++++++++++ + +The command ``download`` has the following parameters:: + + Search for data set in the given provider. + + Options: + -p, --provider TEXT [required] Provider name to search + -s, --scene-id TEXT [required] Scene Identifier to download. + -o, --output TEXT [required] Save output directory + -u, --username TEXT Optional username to download + -P, --password TEXT User password + --help Show this message and exit. + + +.. note:: + + Currently, you can only download by ``scene_id`` like ``S2B_MSIL1C_20200223T135109_N0209_R024_T21LZG_20200223T153255``. + + We will implement way to download from tiles, since some apis (`sentinel-sat` - `SciHub`) already support this feature. + + +USGS +~~~~ + +To download Landsat-8 Digital Number from `USGS Earth Explorer `_:: + + bdc-collector download --provider=USGS \ + --scene-id=LC08_L1TP_223064_20200831_20200906_01_T1 \ + --output=. \ + --username=user \ + --password=password + + +SciHub +~~~~~~ + +To download Sentinel-2 from `SciHub `_:: + + bdc-collector download --provider=SciHub \ + --scene-id=S2B_MSIL1C_20200223T135109_N0209_R024_T21LZG_20200223T153255 \ + --output=. \ + --username=user \ + --password=password + +To download L2A:: + + bdc-collector download --provider=SciHub \ + --scene-id=S2B_MSIL2A_20200930T135119_N0214_R024_T21KXA_20200930T175714 \ + --output=. \ + --username=user \ + --password=password + + +Google Public Data Sets +~~~~~~~~~~~~~~~~~~~~~~~ + +You can also download both Landsat Digital Number and Sentinel-2 (L1C/L2A) from `Google Public Data Sets `_. +In order to do that, you will need to create an `Google Service Account Key `_ and export +the variable ``GOOGLE_APPLICATION_CREDENTIALS=path/to/google/your_service_account_key.json``.:: + + + export GOOGLE_APPLICATION_CREDENTIALS=path/to/google/your_service_account_key.json + + bdc-collector download --provider=Google \ + --scene-id=LC08_L1TP_223064_20200831_20200906_01_T1 \ + --output=. + + +You can download Sentinel-2 produts with:: + + export GOOGLE_APPLICATION_CREDENTIALS=path/to/google/your_service_account_key.json + + bdc-collector download --provider=Google \ + --scene-id=S2B_MSIL1C_20200223T135109_N0209_R024_T21LZG_20200223T153255 \ + --output=. + + +ONDA Catalogue +~~~~~~~~~~~~~~ + +You can also download Sentinel scenes from alternative `ONDA DIAS Catalogue `_. + +In order to do that, you must have an account `ONDA User Portal Registration `_.:: + + bdc-collector download --provider=ONDA \ + --scene-id=S2B_MSIL1C_20200223T135109_N0209_R024_T21LZG_20200223T153255 \ + --output=. \ + --username=user \ + --password=password + + +Preparing a new package with BDC-Collectors +------------------------------------------- + +In order to attach ``BDC-Collectors`` into your application, use the following statements: + +.. code-block:: python + + from flask import flask + from bdc_collectors.ext import CollectorExtension + + app = Flask(__name__) + CollectorExtension(app) + +.. note:: + + If you would like to connect into database with ``BDC-Catalog``, make sure to follow the steps defined in + `BDC-Catalog `_. + + + +Preparing a new provider for BDC-Collectors +------------------------------------------- + + +The ``BDC-Collectors`` follows the `Python Entry point specification `_ to +discover and load libraries dynamically. + + +Basically, the ``BDC-Collectors`` has the following entry points to deal with dynamic data provider: + +- ``bdc_db.providers``: The alembic migration folders. + + +.. note:: + + You can also set ``bdc_db.scripts`` if you would like to insert a new SQL for data provider. + Check `BDC-DB `_ for further details. + + +These entry points may be defined in the ``setup.py`` of your package. + + +The following code is an example of an ``entry_points`` in ``setup.py`` file: + + +.. code-block:: python + + entry_points={ + 'bdc_collectors.providers': [ + 'mycatalog = my_app.mycatalog' + ] + } diff --git a/bdc_collectors/__init__.py b/bdc_collectors/__init__.py new file mode 100644 index 0000000..48c10f1 --- /dev/null +++ b/bdc_collectors/__init__.py @@ -0,0 +1,34 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Python module for Brazil Data Cube Collectors.""" + +from flask import Flask + +from .ext import CollectorExtension +from .version import __version__ + + +def create_app() -> Flask: + """Create instance of Flask application for BDC-Collectors.""" + from bdc_catalog.ext import BDCCatalog + + app = Flask(__name__) + + # TODO: We should remove the BDC-Catalog initialization and pass to the invoker. + BDCCatalog(app) + CollectorExtension(app) + + return app + + +__all__ = ( + '__version__', + 'create_app', + 'CollectorExtension', +) \ No newline at end of file diff --git a/bdc_collectors/__main__.py b/bdc_collectors/__main__.py new file mode 100644 index 0000000..9ea1941 --- /dev/null +++ b/bdc_collectors/__main__.py @@ -0,0 +1,14 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Python module for Brazil Data Cube Collectors.""" + +from .cli import main + +if __name__ == '__main__': + main(as_module=True) diff --git a/bdc_collectors/base.py b/bdc_collectors/base.py new file mode 100644 index 0000000..708ad16 --- /dev/null +++ b/bdc_collectors/base.py @@ -0,0 +1,152 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the base abstractions for BDC-Collectors and Data Collections.""" + +from datetime import datetime +from pathlib import Path +from typing import Dict, Iterator, List, Tuple, Type + +from bdc_catalog.models import Collection +from flask import current_app + + +class SceneParser: + """Define the base parser of Scene identifiers.""" + + scene_id: str + + def __init__(self, scene_id: str): + """Create the scene parser.""" + self.scene_id = scene_id + + def tile_id(self) -> str: + """Retrieve the tile identifier from scene_id.""" + raise NotImplementedError() + + def sensing_date(self) -> datetime: + """Retrieve the scene sensing date.""" + raise NotImplementedError() + + def processing_date(self) -> datetime: + """Retrieve the scene processing date.""" + return self.sensing_date() + + def satellite(self) -> str: + """Retrieve the scene satellite origin.""" + raise NotImplementedError() + + def source(self) -> str: + """Define meta information for scene_id.""" + raise NotImplementedError() + + +class BaseCollection: + """Define the collection signature of a Provider.""" + + parser_class: Type[SceneParser] + parser: SceneParser + collection: Collection + + def __init__(self, scene_id: str, collection: Collection = None): + """Create the data collection definition.""" + self.parser = self.parser_class(scene_id) + self.collection = collection + + def get_files(self, collection: Collection, path=None, prefix=None) -> Iterator[Path]: + """List all files in the collection.""" + return [entry for entry in self.path(collection, prefix).glob('*') if entry.is_file()] + + def path(self, collection: Collection, prefix=None) -> Path: + """Retrieve the relative path to the Collection on Brazil Data Cube cluster.""" + if prefix is None: + prefix = current_app.config.get('DATA_DIR') + + sensing_date = self.parser.sensing_date() + + year_month = sensing_date.strftime('%Y-%m') + + scene_path = Path(prefix or '') / 'Repository/Archive' / collection.name / year_month / self.parser.tile_id() + + scene_path = scene_path / self.parser.scene_id + + return scene_path + + def compressed_file(self, collection: Collection, prefix=None) -> Path: + """Retrieve the path to the compressed file L1.""" + raise NotImplementedError() + + def __str__(self): + """Define data collection string representation.""" + return 'BaseCollection' + + +class SceneResult(dict): + """Class structure for Query Scene results.""" + + def __init__(self, scene_id, cloud_cover, **kwargs): + """Create a scene result instance.""" + super().__init__(scene_id=scene_id, cloud_cover=cloud_cover, **kwargs) + + @property + def scene_id(self) -> str: + """Retrieve the scene identifier.""" + return self['scene_id'] + + @property + def cloud_cover(self) -> float: + """Retrieve the cloud cover metadata.""" + return self['cloud_cover'] + + @property + def link(self) -> str: + """Retrieve the link of scene id. + + Notes: + It usually points to download url. + """ + return self['link'] + + +DownloadResult = List[str] +ScheduledResult = List[str] +FailureResult = List[str] +BulkDownloadResult = Tuple[DownloadResult, ScheduledResult, FailureResult] +"""Type to identify Bulk download result, which represents Success, scheduled (offline) and failure.""" + + +class BaseProvider: + """Define the signature of a Data Collector Provider.""" + + collections: Dict[str, Type[BaseCollection]] = dict() + + def collections_supported(self): + """Retrieve the collections supported by the Provider instance.""" + return self.collections + + def get_collector(self, collection: str) -> Type[BaseCollection]: + """Retrieve the data type of the given data collection.""" + return self.collections.get(collection) + + def search(self, query, *args, **kwargs) -> List[SceneResult]: + """Search for data set in Provider. + + Args: + query - Data set reference name. + *args - Optional parameters order for the given provider. + **kwargs - Optional keywords for given provider, like start_date, end_date and so on. + """ + raise NotImplementedError() + + def download(self, scene_id: str, *args, **kwargs) -> str: + """Download the scene from remote provider.""" + raise NotImplementedError() + + def download_all(self, scenes: List[SceneResult], output: str, **kwargs) -> BulkDownloadResult: + """Bulk download scenes from remote provider.""" + raise NotImplementedError() diff --git a/bdc_collectors/cli.py b/bdc_collectors/cli.py new file mode 100644 index 0000000..8592146 --- /dev/null +++ b/bdc_collectors/cli.py @@ -0,0 +1,143 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Command line for BDC-Collectors.""" + +import logging + +import click +from bdc_catalog.models import Collection +from flask import current_app +from flask.cli import FlaskGroup, with_appcontext + +from . import create_app + + +@click.group(cls=FlaskGroup, create_app=create_app) +def cli(): + """Command line for BDC-Collectors.""" + + +@cli.command() +@click.option('-p', '--provider', help='Provider name. (USGS, SciHub...)', required=True) +@click.option('-d', '--dataset', help='Dataset name', required=True) +@click.option('-b', '--bbox', help='Bounding Box (west, south, east, north)', required=True) +@click.option('-t', '--time', help='Time start/end', required=True) +@click.option('-u', '--username', help='User', required=False) +@click.option('--password', help='Password (if needed)', required=False) +@click.option('--platform', help='Platform sensor (if required)', required=False) +@with_appcontext +def search(provider, dataset, bbox, time, username=None, password=None, **kwargs): + """Search for data set in the given provider. + + Args: + provider - Provider name to search. + dataset - Data set name in provider. + bbox - Bounding box definition (west, south, east, north). + time - Time interval. (start/end). Format should be (YYYY-mm-dd) + username - Optional username used to search in provider. + password - Optional password used to search in provider. + """ + # Get BDC-Collectors extension and then seek for provider support. + ext = current_app.extensions['bdc:collector'] + + provider_class = ext.get_provider(provider) + + if provider_class is None: + raise RuntimeError(f'Provider {provider} not supported.') + + # Create an instance of supported provider. We pass progress=True for + # providers which support progress bar (SciHub). + p = provider_class(username=username, password=password, progress=True) + + bbox = [float(elm) for elm in bbox.split(',')] + + times = time.split('/') + + start_date, end_date = times + + res = p.search(query=dataset, bbox=bbox, start_date=start_date, end_date=end_date, **kwargs) + + print(res) + + +@cli.command() +@click.option('-p', '--provider', required=True) +@click.option('-s', '--scene-id', help='Scene Identifier', required=True) +@click.option('-o', '--output', help='Save output directory', required=True) +@click.option('-d', '--dataset', help='Data set', required=False) +@click.option('-u', '--username', help='User', required=False) +@click.option('-P', '--password', help='Password (if needed)', required=False) +@with_appcontext +def download(provider, scene_id, output, **kwargs): + """Search for data set in the given provider. + + Args: + provider - Provider name to search. + scene_id - Scene Id to download. + output - Directory to save + username - Optional username used to download from provider. + password - Optional password used to download from provider. + """ + ext = current_app.extensions['bdc:collector'] + + provider_class = ext.get_provider(provider) + + kwargs.setdefault('progress', True) + + p = provider_class(**kwargs) + + result = p.download(scene_id, output=output, force=False, **kwargs) + + click.secho(f'File saved in {result}') + + +@cli.command() +@with_appcontext +def show_providers(): + """List the supported providers of BDC-Collectors.""" + ext = current_app.extensions['bdc:collector'] + + click.secho('Supported providers: ', bold=True, fg='green') + for provider_name in ext.list_providers(): + click.secho(f'\t{provider_name}', bold=True, fg='green') + + +@cli.command() +@click.option('-c', '--collection-id', required=True) +@click.option('-s', '--scene-id', required=True) +@click.option('-o', '--output', help='Save output directory', required=True) +@with_appcontext +def priority(collection_id, scene_id, output): + """Download a scene seeking in CollectionProviders. + + Notes: + You must configure the BDC-Catalog. + + Args: + collection_id - Collection Identifier + scene_id - A scene identifier (Landsat Scene Id/Sentinel Scene Id, etc) + output - Directory to save. + """ + ext = current_app.extensions['bdc:collector'] + + collection = Collection.query().get(collection_id) + + order = ext.get_provider_order(collection) + + for driver in order: + try: + file_destination = driver.download(scene_id, output=output) + except Exception as e: + logging.warning(f'Download error for provider {driver.provider_name} - {str(e)}') + + +def main(as_module=False): + """Load Brazil Data Cube (bdc_collection_builder) as module.""" + import sys + cli.main(args=sys.argv[1:], prog_name="python -m bdc_collectors" if as_module else None) diff --git a/bdc_collectors/creodias/__init__.py b/bdc_collectors/creodias/__init__.py new file mode 100644 index 0000000..c8cf4e0 --- /dev/null +++ b/bdc_collectors/creodias/__init__.py @@ -0,0 +1,196 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Defines the structures for CREODIAS API.""" + +import concurrent +import os +from datetime import datetime +from typing import List + +from shapely.geometry import box + +from ..base import BaseProvider, SceneResult +from ..exceptions import DataOfflineError +from .api import Api + + +def init_provider(): + """Register the CREODIAS provider.""" + # TODO: Register in bdc_catalog.models.Provider + + return dict( + CREODIAS=CREODIAS + ) + + +class CREODIAS(BaseProvider): + """CREODIAS Catalog provider. + + This providers consumes the `CREODIAS API `_. + + Notes: + This provider requires `username` and `password`, respectively. + You can create an account `CREODIAS Registration `_. + + The CREODIAS has implemented Rate Limit in their API services. The limit is 60 requests per minute, per source IP address. + Make sure to do not overflow 60 requests. + """ + + def __init__(self, **kwargs): + """Create an instance of ONDA provider.""" + if 'username' not in kwargs or 'password' not in kwargs: + raise RuntimeError('Missing "username"/"password" for CREODIAS provider.') + + self.api = Api(kwargs['username'], kwargs['password'], progress=kwargs.get('progress', True)) + self.kwargs = kwargs + + def search(self, query, **kwargs): + """Search for data set in CREODIAS Provider. + + Based in CREODIAS EO-Data-Finder API, the following products are available in catalog: + + - Sentinel1 + - Sentinel2 + - Sentinel3 + - Sentinel5P + - Landsat8 + - Landsat7 + - Landsat5 + - Envisat + + Examples: + >>> from bdc_collectors.creodias import CREODIAS + >>> provider = CREODIAS(username='theuser@email.com', password='thepass') + >>> result = provider.search('Sentinel2', bbox=[-54,-12,-52,-10], start_date='2020-01-01', end_date='2020-01-31') + + Args: + query - The collection name + **kwargs + """ + bbox = kwargs.pop('bbox', None) + + if bbox: + geom = box(*bbox) + kwargs['geom'] = geom.wkt + + scenes = self.api.search(query, **kwargs) + + return scenes + + def download(self, scene_id: str, output: str, **kwargs): + """Download scene from CREODIAS API. + + Raises: + DataOfflineError when scene is not available/offline. + + Examples: + >>> from bdc_collectors.creodias import CREODIAS + >>> provider = CREODIAS(username='theuser@email.com', password='thepass') + >>> output_file = provider.download('S2A_MSIL1C_20201006T132241_N0209_R038_T23KLT_20201006T151824', output='/tmp') + >>> output_file + ... '/tmp/S2A_MSIL1C_20201006T132241_N0209_R038_T23KLT_20201006T151824.zip' + """ + collection = self._guess_collection(scene_id) + + scenes = self.api.search(collection, productIdentifier=f'%{scene_id}%') + + if len(scenes) == 0: + raise RuntimeError(f'Scene {scene_id} not found.') + + scene = scenes[0] + + if scene['properties']['status'] != 0: + raise DataOfflineError(scene_id) + + return self._submit_download(scene, output=output, force=kwargs.get('force', False))['path'] + + @staticmethod + def _guess_collection(scene_id) -> str: + """Try to identify a CREODIAS collection by sceneid.""" + if scene_id.startswith('S2'): + collection = 'Sentinel2' + elif scene_id.startswith('S1'): + collection = 'Sentinel1' + elif scene_id.startswith('LC08'): + collection = 'Landsat8' + elif scene_id.startswith('LE07'): + collection = 'Landsat7' + elif scene_id.startswith('LT05'): + collection = 'Landsat5' + else: + raise RuntimeError(f'Cant identify sceneid {scene_id}') + + return collection + + def download_all(self, scenes: List[SceneResult], output: str, **kwargs): + """Bulk download from CREODIAS provider in parallel. + + Examples: + >>> from bdc_collectors.creodias import CREODIAS + >>> provider = CREODIAS(username='theuser@email.com', password='thepass') + >>> scenes = provider.search('Sentinel2', bbox=[-54,-12,-52,-10], start_date='2020-01-01', end_date='2020-01-31') + >>> provider.download_all(scenes, output='/tmp') + + Args: + scenes - List of SceneResult to download + output - Directory to save + **kwargs - Optional parameters. You can also set ``max_workers``, which is 2 by default. + + Returns: + Tuple[List[SceneResult], List[str], List[Exception]] + + Returns the list of Success downloaded, scheduled files and download errors, respectively. + """ + max_workers = kwargs.pop('max_workers', 2) + + collection = kwargs.get('collection') + + success = [] + scheduled = [] + failed = [] + + products = [] + + for scene in scenes: + try: + result = self.api.search(collection or self._guess_collection(scene.scene_id), productIdentifier=f'%{scene.scene_id}%') + + if len(result) == 0: + raise RuntimeError('Not found in provider.') + + products.append(result[0]) + except Exception as e: + failed.append(scene) + + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + tasks = [] + + for scene in products: + tasks.append( + executor.submit(self._submit_download, scene, output) + ) + + for task in concurrent.futures.as_completed(tasks): + if not task.exception() and task.result(): + success.append(task.result()) + elif task.exception(): + exception = task.exception() + if isinstance(exception, DataOfflineError): + scheduled.append(exception.scene_id) + else: + failed.append(exception) + return success, scheduled, failed + + def _submit_download(self, scene: SceneResult, output: str, max_retry: int = 10, force: bool = False): + """Download function used by ThreadExecutor.""" + output = os.path.join(output, f'{scene.scene_id}.zip') + + scene['path'] = self.api.download(scene, output, max_retry=max_retry, force=force) + + return scene diff --git a/bdc_collectors/creodias/api.py b/bdc_collectors/creodias/api.py new file mode 100644 index 0000000..b73ade4 --- /dev/null +++ b/bdc_collectors/creodias/api.py @@ -0,0 +1,191 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the API class for communication with CREODIAS server.""" + +import os +from datetime import datetime +from typing import Dict, List, Optional, Union + +import requests + +from ..base import SceneResult +from ..utils import download_stream + +DateT = Union[str, datetime] +Link = Dict[str, str] + + +class Api: + """Define simple abstraction of CREODIAS API.""" + + url: str = 'http://finder.creodias.eu/resto/api/collections/{collection}/search.json?maxRecords=500' + auth_url: str = 'https://auth.creodias.eu/auth/realms/DIAS/protocol/openid-connect/token' + + def __init__(self, username, password, progress=False): + """Create CREODIAS API instance.""" + self.username = username + self.password = password + self.progress = progress + + @property + def access_token(self): + """Retrieve the user access token.""" + params = dict( + username=self.username, + password=self.password, + client_id='CLOUDFERRO_PUBLIC', + grant_type='password' + ) + + response = requests.post(self.auth_url, data=params) + + if response.status_code != 200: + raise RuntimeError('Unauthorized.') + + return response.json()['access_token'] + + def search(self, collection: str, start_date: Optional[DateT] = None, end_date: Optional[DateT] = None, + geom: str = None, status: str = 'all', **kwargs) -> List[SceneResult]: + """Search for data products in ONDA catalog. + + Args: + collection - The collections defined by ONDA provider. + The following values are supported: + Sentinel1, Sentinel2, Sentinel3, Sentinel5P,Landsat8,Landsat7,Landsat5 and EnvSat + start_date - The sensing date + end_date - The end date of the observation. + geom - Area in WKT + **kwargs - The others parameters for request. + The supported parameters are defined in `EO Data Finder API Manual `_. + + Returns: + The list of matched scenes. + """ + url = self.url.format(collection=collection) + + params = dict(**kwargs) + + if geom: + params['geometry'] = geom + + if status: + params['status'] = status + + if start_date: + params['startDate'] = self._parse_date(start_date).isoformat() + + if end_date: + params['completionDate'] = self._parse_date(end_date).isoformat() + + result = [] + + while url is not None: + response = requests.get(url, params=params) + + content = response.json() + + for feature in content['features']: + scene_id = feature['properties']['title'].replace('.SAFE', '') + cloud_cover = feature['properties']['cloudCover'] + + link = '' + + if feature['properties']['services']: + link = feature['properties']['services']['download'] + + result.append(SceneResult(scene_id, cloud_cover, link=link, **feature)) + + url = self._next_page(content['properties']['links']) + + return result + + @staticmethod + def _next_page(links: List[Link]): + """Seek for next page in query result links.""" + for link in links: + if link['rel'] == 'next': + return link['href'] + + return None + + @staticmethod + def _parse_date(date: DateT) -> datetime: + """Try to parse a value to date.""" + if isinstance(date, datetime): + return date + + return datetime.strptime(date, '%Y-%m-%d') + + def download(self, scene: SceneResult, output: str, max_retry: int = 10, force: bool = False) -> str: + """Download the scene of CREODIAS server. + + Notes: + We cant resume download since the CREODIAS given file size does not match with downloaded file. + """ + access_token = self.access_token + uuid = scene['id'] + # TODO: Get download_url from scene['properties']['servives']['download']. Check scene availability (status) + + download_url = f'https://zipper.creodias.eu/download/{uuid}?token={access_token}' + + tmp_file = f'{output}.incomplete' + + headers = dict() + + # HEAD Server to get file size + head = requests.head(download_url, timeout=90) + + def _remove_file_if_exists(file_path): + if os.path.exists(file_path) and os.path.isfile(file_path): + os.remove(file_path) + + with head: + expected_file_size = int(head.headers.get('Content-Length', 0)) + + # Force download ?? + if force: + _remove_file_if_exists(tmp_file) + else: + output_file_size = os.stat(output).st_size if os.path.exists(output) else 0 + + if output_file_size > 0 and output_file_size == expected_file_size: + # File has same byte size. + # TODO: Should we validate before?? + return output + + # Get current size of temporary file + tmp_file_size = os.stat(tmp_file).st_size if os.path.exists(tmp_file) else 0 + + for retry in range(max_retry): + if tmp_file_size > 0: + if tmp_file_size > expected_file_size: + # file large than expected. + _remove_file_if_exists(tmp_file) + if tmp_file_size == expected_file_size: + break + + headers['Range'] = f'bytes={tmp_file_size}-' + + response = requests.get(download_url, stream=True, timeout=90, headers=headers) + + download_stream(tmp_file, response, progress=self.progress, offset=tmp_file_size, total_size=expected_file_size) + + tmp_file_size = os.stat(tmp_file).st_size + + if tmp_file_size > 0 and tmp_file_size == expected_file_size: + break + + if retry == max_retry - 1: + raise DownloadError(f'Download error - Max retry exceeded for {scene.scene_id}.') + + _remove_file_if_exists(output) + + os.rename(tmp_file, output) + + return output diff --git a/bdc_collectors/earth_search/__init__.py b/bdc_collectors/earth_search/__init__.py new file mode 100644 index 0000000..3c07372 --- /dev/null +++ b/bdc_collectors/earth_search/__init__.py @@ -0,0 +1,114 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the providers to deal with STAC Element84.""" +import shutil +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import List + +import requests +from shapely.geometry import box, mapping +from stac import STAC + +from ..base import BaseProvider, SceneResult +from ..exceptions import DownloadError +from ..scihub.parser import Sentinel2Scene +from ..usgs import LandsatScene +from ..utils import download_stream + + +def init_provider(): + """Init provider factory loader.""" + return dict(EarthSearch=EarthSearch) + + +class EarthSearch(BaseProvider): + """Define a simple abstraction of Provider for Element84. + + It was designed to download Sentinel-2 COGS from + `Sentinel-2 Cloud-Optimized GeoTIFFs `_ + """ + + def __init__(self, **kwargs): + """Build STAC provider for Element84.""" + access_token = kwargs.pop('access_token', None) + + self.kwargs = kwargs + self.api = STAC('https://earth-search.aws.element84.com/v0', access_token=access_token) + self.progress = kwargs.get('progress') + + def search(self, query, *args, **kwargs) -> List[SceneResult]: + """Search for scenes in STAC.""" + options = dict() + + if 'start_date' in kwargs: + options['time'] = f'{kwargs.get("start_date")}/{kwargs.get("end_date")}' + + if 'bbox' in kwargs: + options['intersects'] = mapping(box(*kwargs['bbox'])) + + options['collection'] = query + + res = self.api.search(filter=options) + + # TODO: Implement next page as iterator or check stac.py support + return [ + SceneResult( + scene_id=f['properties']['sentinel:product_id'], + cloud_cover=f['properties']['sentinel:cloud_cover'], + **f + ) + for f in res['features'] + ] + + @staticmethod + def _guess_parser(scene_id: str): + """Get the supported parser for Scene.""" + if scene_id.startswith('S2'): + return Sentinel2Scene(scene_id) + return LandsatScene(scene_id) + + def download(self, scene_id: str, *args, **kwargs) -> str: + """Download files from STAC Element 84.""" + output = kwargs['output'] + + collection = kwargs['dataset'] + + parsed = self._guess_parser(scene_id) + + stac_collection = self.api.collection(collection) + + product = parsed.fragments[1][-3:] + + item_id = f'{parsed.source()}_{parsed.tile_id()}_{parsed.sensing_date().strftime("%Y%m%d")}_0_{product}' + + feature = stac_collection.get_items(item_id=item_id) + + if feature.get('code'): + raise RuntimeError(f'Scene {scene_id} not found for collection {collection}.') + + with TemporaryDirectory() as tmp: + tmp_path = Path(tmp) / item_id + + for asset_name, asset in feature['assets'].items(): + self._download(asset['href'], str(tmp_path)) + + shutil.move(str(tmp_path), output) + + return output + + def _download(self, link, output): + """Download asset from STAC.""" + file_name = Path(link).name + + path = Path(output) / file_name + + response = requests.get(link, stream=True, timeout=90) + + download_stream(str(path), response, progress=self.progress) diff --git a/bdc_collectors/exceptions.py b/bdc_collectors/exceptions.py new file mode 100644 index 0000000..fafc7be --- /dev/null +++ b/bdc_collectors/exceptions.py @@ -0,0 +1,42 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the common exceptions for Data Download.""" + + +class DownloadError(Exception): + """Generic error for Download.""" + + message: str + + def __init__(self, message): + """Build a DownloadError instance.""" + self.message = message + + def __str__(self): + """Retrieve the string representation of DownloadError.""" + return f'DownloadError, {self.message}' + + +class DataOfflineError(DownloadError): + """Indicate that the scene_id is not available (Offline). + + Frequently used by Sentinel SciHub Provider. + """ + + scene_id: str + + def __init__(self, scene_id): + """Create a DataOfflineError.""" + super().__init__(f'Scene {scene_id} is offline/not available') + + self.scene_id = scene_id + + def __str__(self): + """Define the string representation for DataOfflineError.""" + return f'DataOfflineError(scene_id={self.scene_id})' diff --git a/bdc_collectors/ext.py b/bdc_collectors/ext.py new file mode 100644 index 0000000..b4fc043 --- /dev/null +++ b/bdc_collectors/ext.py @@ -0,0 +1,233 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the BDC-Collector flask extension.""" + +import logging +import warnings +from threading import Lock +from typing import Dict, List, Type + +import pkg_resources +from bdc_catalog.models import Collection, CollectionsProviders, Provider, db +from flask import Flask + +from .base import BaseProvider + + +class CollectorState: + """Class for holding Collector state of the extension.""" + + providers: Dict[str, BaseProvider] + + lock: Lock = Lock() + + def __init__(self): + """Create the state.""" + self.providers = dict() + + def add_provider(self, provider_name: str, provider: BaseProvider): + """Add a new provider to supports.""" + with self.lock: + assert provider_name not in self.providers + + self.providers[provider_name] = provider + + def get_provider(self, provider: str) -> BaseProvider: + """Try to retrieve the data provider type.""" + with self.lock: + if provider in self.providers: + return self.providers[provider] + return None + + +class DataCollector: + """Data wrapper to store the given instance `bdc_catalog.models.Provider` and the data collector factory.""" + + _db_provider: Provider + _provider: BaseProvider + _collection_provider: CollectionsProviders + + def __init__(self, instance: Provider, provider: Type[BaseProvider], collection_provider: CollectionsProviders, **kwargs): + """Create a data collector instance.""" + self._db_provider = instance + + if isinstance(instance.credentials, dict): + copy_args = instance.credentials.copy() + copy_args.update(**kwargs) + + self._provider = provider(**copy_args) + else: + self._provider = provider(instance.credentials, **kwargs) + + self._collection_provider = collection_provider + + def __str__(self): + """Retrieve String representation for DataCollector.""" + return f'DataCollector({self.provider_name})' + + @property + def active(self) -> bool: + """Retrieve the provider availability in database.""" + return self._collection_provider.active + + @property + def priority(self) -> bool: + """Retrieve the provider priority order in database.""" + return self._collection_provider.priority + + @property + def instance(self) -> Provider: + """Retrieve the database instance of bdc_catalog.models.Provider.""" + return self._db_provider + + @property + def provider_name(self) -> str: + """Retrieve the provider name.""" + return self._db_provider.name + + def download(self, *args, **kwargs): + """Download data from remote provider.""" + return self._provider.download(*args, **kwargs) + + def search(self, *args, **kwargs): + """Search for dataset in the provider.""" + # TODO: Apply adapter in the results here?? + return self._provider.search(*args, **kwargs) + + +class CollectorExtension: + """Define the flask extension of BDC-Collectors. + + You can initialize this extension as following:: + + app = Flask(__name__) + ext = CollectorExtension(app) + + This extension use the `Python Entry points specification `_ + for load data providers dynamically. + By default, we use the entrypoint `bdc_collectors.providers` as defined in `setup.py`:: + + entry_points={ + 'bdc_collectors.providers': [ + 'google = bdc_collectors.google', + 'usgs = bdc_collectors.usgs', + 'onda = bdc_collectors.onda', + 'scihub = bdc_collectors.scihub' + ], + }, + + Each provider is hold in the property `state` and may be accessed using:: + + from flask import current_app + + ext = current_app.extensions['bdc:collector'] + + ext.get_provider('providerName') + + Notes: + Make sure to initialize the CollectorExtension before. + + We also the a command line `bdc-collectors` which provides a way to + consume those providers in terminal:: + + bdc-collectors --help + """ + + state: CollectorState + + def __init__(self, app: Flask, **kwargs): + """Create a instance of extension.""" + self.state = CollectorState() + + if app: + self.init_app(app, **kwargs) + + def init_app(self, app: Flask, **kwargs): + """Initialize the BDC-Collector extension, loading supported providers dynamically.""" + from .cli import cli + + extension_name = 'bdc:collector' + + if extension_name in app.extensions: + warnings.warn(f'The module {extension_name} was already initialized before.') + return + + self.init_providers(**kwargs) + + app.extensions[extension_name] = self + app.cli.add_command(cli, 'bdc-collector') + + def init_providers(self, entry_point: str = 'bdc_collectors.providers', **kwargs): + """Load the supported providers from setup.py entry_point.""" + if entry_point: + for base_entry in pkg_resources.iter_entry_points(entry_point): + provider = base_entry.load() + + if hasattr(provider, 'init_provider') and \ + callable(provider.init_provider): + entry = provider.init_provider() + + for provider_name, provider in entry.items(): + self.state.add_provider(provider_name, provider) + + def get_provider(self, provider: str) -> Type[BaseProvider]: + """Retrieve a provider class.""" + return self.state.get_provider(provider) + + def get_provider_order(self, collection: Collection, include_inactive=False, **kwargs) -> List[DataCollector]: + """Retrieve a list of providers which the bdc_catalog.models.Collection is associated. + + Notes: + This method requires the initialization of extension `bdc_catalog.ext.BDCCatalog`. + + With a given collection, it seeks in `bdc_catalog.models.Provider` + and `bdc_catalog.models.CollectionsProviders` association and then + look for provider supported in the entry point `bdc_collectors.providers`. + + Args: + collection - A collection instance + include_inactive - List also the inactive providers. Default=False + **kwargs - Extra parameters to pass to the Provider instance. + + Returns: + A list of DataCollector, ordered by priority. + """ + where = [] + + if not include_inactive: + where.append(CollectionsProviders.active.is_(True)) + + collection_providers = db.session\ + .query(Provider, CollectionsProviders) \ + .filter( + CollectionsProviders.collection_id == collection.id, + Provider.id == CollectionsProviders.provider_id, + *where + ) \ + .order_by(CollectionsProviders.priority.asc()) \ + .all() + + result = [] + + for collection_provider in collection_providers: + provider_name = collection_provider.Provider.name + + provider_class = self.state.get_provider(provider_name) + + if provider_class is None: + logging.warning(f'The collection requires the provider {provider_name} but it is not supported.') + continue + + result.append(DataCollector(collection_provider.Provider, provider_class, collection_provider, **kwargs)) + + return result + + def list_providers(self) -> List[str]: + """Retrieve a list of supported providers.""" + return list(self.state.providers.keys()) diff --git a/bdc_collectors/google/__init__.py b/bdc_collectors/google/__init__.py new file mode 100644 index 0000000..fb9ce29 --- /dev/null +++ b/bdc_collectors/google/__init__.py @@ -0,0 +1,159 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Defines the structures for Google Provider access.""" + +import logging +import os +import shutil +from pathlib import Path + +from google.cloud import storage + +from ..base import BaseProvider +from ..scihub.sentinel2 import Sentinel2 +from ..usgs.landsat5 import Landsat5 +from ..usgs.landsat7 import Landsat7 +from ..usgs.landsat8 import Landsat8 +from ..utils import working_directory +from .landsat import GoogleLandsat +from .sentinel import GoogleSentinel + + +def init_provider(): + """Register the provider Google.""" + # TODO: Register in bdc_catalog.models.Provider + + return dict( + Google=Google + ) + + +class Google(BaseProvider): + """Google provider definition. + + This providers consumes the `Google Public Data Sets `_ + + Currently, we support both `Sentinel-2` and `Landsat` products. + + Notes: + This provider requires `GOOGLE_APPLICATION_CREDENTIALS` to work properly. + Make sure to set in terminal or pass as variable in constructor. + """ + + storage_client: storage.Client + + def __init__(self, **kwargs): + """Create instance of Google Provider.""" + credentials = kwargs.get('GOOGLE_APPLICATION_CREDENTIALS') + + if credentials is None: + credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS') + + if credentials is None: + raise RuntimeError('The Google Provider requires env GOOGLE_APPLICATION_CREDENTIALS') + + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials + + self.storage_client = storage.Client() + + # Attaching collections to be accessed and get local directory structure + self.collections['LANDSAT_5'] = Landsat5 + self.collections['LANDSAT_7'] = Landsat7 + self.collections['LANDSAT_8'] = Landsat8 + self.collections['LANDSAT_8'] = Landsat8 + self.collections['Sentinel-2'] = Sentinel2 + + def search(self, query, *args, **kwargs): + """Search for data set in Google Provider. + + Currently, it is not supported yet, since requires to download large `.csv` to check. + + TODO: Implement way to download and keep up to dated the `.csv` file. + """ + # TODO: read .csv??? + raise RuntimeError('Search is not supported for this provider') + + def download(self, scene_id: str, *args, **kwargs): + """Download scene from Google buckets.""" + try: + # Creates a GCS Client + storage_client = storage.Client() + + destination = kwargs.get('output') + + data_handler = guess_scene_parser(scene_id) + + bucket = storage_client.bucket(data_handler.bucket) + + blob_name = Path(data_handler.get_url()) + + folder = data_handler.folder + + blobs = list(bucket.list_blobs(prefix=str(blob_name))) + + if len(blobs) == 0: + raise RuntimeError('Scene {} not found on Google Cloud Storage.'.format(scene_id)) + + downloaded_files = [] + + for blob in blobs: + blob_path = Path(blob.name) + + if blob.name.endswith(f'{folder}_$folder$'): + continue + + blob_relative = blob_path.relative_to(blob_name) + + target_path = Path(destination) / folder / str(blob_relative) + target_path.parent.mkdir(parents=True, exist_ok=True) + + if str(blob_path).endswith('$folder$'): + continue + + blob.download_to_filename(str(target_path)) + + data_handler.apply_processing(target_path) + + downloaded_files.append(str(target_path)) + + return data_handler.process(downloaded_files, destination) + except Exception as e: + logging.error(f'Could not download from Google {scene_id} - {str(e)}') + + +def guess_scene_parser(scene_id): + """Try to identify a parser for Scene Id. + + Raises: + RuntimeError when cant parse scene_id. + + Args: + scene_id - Scene id product + + Returns: + A Google Data Set + """ + from ..scihub.parser import Sentinel2Scene + from ..usgs.parser import LandsatScene + + parsers = [GoogleLandsat, GoogleSentinel] + + found = None + + for parser in parsers: + try: + found = parser(scene_id) + break + except RuntimeError: + continue + + if found is None: + raise RuntimeError('Cant guess parser') + + return found diff --git a/bdc_collectors/google/landsat.py b/bdc_collectors/google/landsat.py new file mode 100644 index 0000000..b7e23d6 --- /dev/null +++ b/bdc_collectors/google/landsat.py @@ -0,0 +1,77 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the data set of Google for Landsat products.""" + +import shutil +import tarfile +from pathlib import Path + +import rasterio + +from ..usgs.base import BaseLandsat, LandsatScene +from ..usgs.landsat5 import Landsat5 +from ..usgs.landsat7 import Landsat7 +from ..usgs.landsat8 import Landsat8 +from ..utils import working_directory + + +class GoogleLandsat(BaseLandsat): + """Define the Landsat product definition.""" + + bucket = 'gcp-public-data-landsat' + + def __init__(self, scene_id: str): + """Create the GoogleLandsat instance.""" + self.parser = self.parser_class(scene_id) + + @property + def folder(self): + """Retrieve base folder of Landsat.""" + return self.parser.scene_id + + def get_url(self) -> str: + """Get the relative URL path in the Landsat bucket.""" + source = self.parser.source() + tile = self.parser.tile_id() + scene_id = self.parser.scene_id + + return f'{source}/01/{tile[:3]}/{tile[3:]}/{scene_id}' + + def apply_processing(self, file_path: Path): + """Apply a function in post download processing. + + This function basically removes the file compression of Tile files + to be similar USGS scene. + """ + if file_path.suffix.lower() == '.tif': + with rasterio.open(str(file_path), 'r') as source_data_set: + profile = source_data_set.profile + raster = source_data_set.read(1) + + profile.pop('compress', '') + profile.update(dict( + tiled=False + )) + + with rasterio.open(str(file_path), 'w', **profile) as target_data_set: + target_data_set.write_band(1, raster) + + def process(self, downloaded_files: list, output: str) -> str: + """Compress the downloaded files into scene.tar.gz.""" + compressed_file_path = Path(output) / f'{self.parser.scene_id}.tar.gz' + + with tarfile.open(compressed_file_path, 'w:gz') as compressed_file: + relative = str(Path(output) / self.parser.scene_id) + with working_directory(relative): + for f in downloaded_files: + compressed_file.add(str(Path(f).relative_to(relative))) + + shutil.rmtree(str(Path(output) / self.parser.scene_id)) + + return str(compressed_file_path) diff --git a/bdc_collectors/google/sentinel.py b/bdc_collectors/google/sentinel.py new file mode 100644 index 0000000..66f833e --- /dev/null +++ b/bdc_collectors/google/sentinel.py @@ -0,0 +1,58 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the data set of Google for Sentinel products.""" + +import shutil +from pathlib import Path + +from ..scihub.parser import Sentinel2Scene +from ..utils import working_directory + + +class GoogleSentinel: + """Define the Sentinel product definition.""" + + bucket = 'gcp-public-data-sentinel-2' + + def __init__(self, scene_id: str): + """Create the GoogleSentinel instance.""" + self.parser = Sentinel2Scene(scene_id) + self.keep_folder = True + + @property + def folder(self): + """Retrieve base folder of Sentinel.""" + return f'{self.parser.scene_id}.SAFE' + + def get_url(self) -> str: + """Get the relative URL path in the Sentinel bucket.""" + source = self.parser.source() + tile = self.parser.tile_id() + scene_id = self.parser.scene_id + + # TODO: Add support to download L2. We should just append L2 when MSIL2A found. + + return f'tiles/{tile[:2]}/{tile[2]}/{tile[-2:]}/{scene_id}.SAFE' + + def apply_processing(self, file_path): + """Apply a function in post download processing.""" + pass + + def process(self, downloaded_files: list, output: str) -> str: + """Compress the downloaded files into scene.zip.""" + with working_directory(output): + file_name = shutil.make_archive( + base_dir=self.folder, + format='zip', + base_name=self.parser.scene_id + ) + # Remove .SAFE folder + shutil.rmtree(str(Path(output) / self.folder)) + + return str(Path(output) / file_name) diff --git a/bdc_collectors/onda/__init__.py b/bdc_collectors/onda/__init__.py new file mode 100644 index 0000000..7f1bd4a --- /dev/null +++ b/bdc_collectors/onda/__init__.py @@ -0,0 +1,121 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Defines the structures for ONDA Catalogue.""" + +import concurrent +from datetime import datetime +from typing import List + +from ..base import BaseProvider, SceneResult +from ..exceptions import DataOfflineError +from .api import Api + + +def init_provider(): + """Register the ONDA provider.""" + # TODO: Register in bdc_catalog.models.Provider + + return dict( + ONDA=ONDA + ) + + +class ONDA(BaseProvider): + """ONDA Catalog provider. + + This providers consumes the `ONDA Open Catalogue `_. + + Notes: + This provider requires `username` and `password`, respectively. + You can create an account `ONDA Registration `_ + """ + + def __init__(self, **kwargs): + """Create an instance of ONDA provider.""" + if 'username' not in kwargs or 'password' not in kwargs: + raise RuntimeError('Missing "username"/"password" for ONDA provider.') + + self.api = Api(kwargs['username'], kwargs['password'], progress=kwargs.get('progress', True)) + self.kwargs = kwargs + + def search(self, query, **kwargs): + """Search for data set in ONDA Provider. + + Currently, it is not supported yet. + """ + # TODO: Implement search using https://www.onda-dias.eu/cms/knowledge-base/odata-querying-all-the-entities-of-the-onda-odata-api/ + raise RuntimeError('The method search is not supported yet.') + + def download(self, scene_id: str, output: str, **kwargs): + """Download scene from ONDA catalogue API. + + Raises: + DataOfflineError when scene is not available/offline. + """ + meta = self.api.search_by_scene_id(scene_id) + + if meta['offline']: + self.api.order(meta['id']) + + raise DataOfflineError(scene_id) + + file_name = self.api.download(scene_id, output) + + return file_name + + def download_all(self, scenes: List[SceneResult], output: str, **kwargs): + """Bulk download from ONDA provider. + + Args: + scenes - List of SceneResult to download (Use SciHub to search and pass result here) + output - Directory to save + **kwargs - Optional parameters. You can also set ``max_workers``, which is 2 by default. + """ + max_workers = kwargs.pop('max_workers', 2) + + success = [] + scheduled = [] + failed = [] + + for scene in scenes: + try: + meta = self.api.search_by_scene_id(scene.scene_id) + + scene.update(meta) + except RuntimeError: + failed.append(scene) + + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + tasks = [] + + for scene in scenes: + tasks.append( + executor.submit(self._submit_download, scene, output) + ) + + for task in concurrent.futures.as_completed(tasks): + if not task.exception() and task.result(): + success.append(task.result()) + elif task.exception(): + exception = task.exception() + if isinstance(exception, DataOfflineError): + scheduled.append(exception.scene_id) + + return success, scheduled, failed + + def _submit_download(self, scene: SceneResult, output): + """Download function used by ThreadExecutor.""" + if scene['offline']: + self.api.order(scene['id']) + + raise DataOfflineError(scene.scene_id) + + scene['path'] = self.api.download(scene.scene_id, output) + + return scene diff --git a/bdc_collectors/onda/api.py b/bdc_collectors/onda/api.py new file mode 100644 index 0000000..187876b --- /dev/null +++ b/bdc_collectors/onda/api.py @@ -0,0 +1,101 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Simple implementation of ONDA Catalogue.""" + +from pathlib import Path + +import requests + +from ..utils import download_stream + + +class Api: + """Define a simple abstraction of ONDA catalog.""" + + URL = 'https://catalogue.onda-dias.eu/dias-catalogue/Products' + + username: str + password: str + + def __init__(self, username=None, password=None, progress=True): + """Create catalog instance.""" + self.username = username + self.password = password + self.progress = progress + + def order(self, product_id): + """Order an offline product to ONDA Catalogue.""" + base_uri = '%s({})/Ens.Order' % self.URL + + auth = self.username, self.password + + headers = { + 'Content-Type': 'application/json' + } + + req = requests.post(base_uri.format(product_id), timeout=90, auth=auth, headers=headers) + + req.raise_for_status() + + def download(self, scene_id: str, destination: str) -> str: + """Try to download scene from ONDA Provider. + + Raises: + Exception when scene is offline + RuntimeError when scene not found. + + Notes: + The scene may not be available. In this case, you must order + using "Api.order()". Make sure to set credentials. + + By default, when scene is offline, it will throw Exception. + + Args: + destination: Path to store file + """ + base_uri = '%s({})/$value' % self.URL + + meta = self.search_by_scene_id(scene_id) + product_id = meta['id'] + + auth = self.username, self.password + + destination = Path(str(destination)) / '{}.zip'.format(scene_id) + + req = requests.get(base_uri.format(product_id), stream=True, timeout=90, auth=auth) + + req.raise_for_status() + + download_stream(destination, req, progress=self.progress) + + return str(destination) + + def search(self, search, fmt='json') -> dict: + """Search on ONDA Catalog.""" + query = { + '$search': search, + '$format': fmt + } + + req = requests.get(self.URL, params=query, timeout=90) + + req.raise_for_status() + + content = req.json() + + return content + + def search_by_scene_id(self, scene_id: str) -> dict: + """Search on ONDA Catalogue for Sentinel 2 by scene_id.""" + results = self.search('"name:{}.zip"'.format(scene_id)) + + if len(results['value']) == 0: + raise RuntimeError('{} not found.'.format(scene_id)) + + return results['value'][0] diff --git a/bdc_collectors/scihub/__init__.py b/bdc_collectors/scihub/__init__.py new file mode 100644 index 0000000..8bcaa52 --- /dev/null +++ b/bdc_collectors/scihub/__init__.py @@ -0,0 +1,167 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the implementation of Sentinel Provider.""" + +from datetime import datetime +from typing import List + +from sentinelsat import SentinelAPI, SentinelAPILTAError +from shapely.geometry import box + +from ..base import BaseProvider, SceneResult +from ..exceptions import DataOfflineError, DownloadError +from .clients import UserClients +from .sentinel2 import Sentinel1, Sentinel2 + + +def init_provider(): + """Register sentinel provider.""" + # TODO: Register in bdc_catalog.models.Provider + return dict( + SciHub=SciHub + ) + + +def _get_date_time(date) -> datetime: + """Get a datetime object from entry.""" + if isinstance(date, datetime): + return date + + return datetime.strptime(date, '%Y-%m-%d') + + +class SciHub(BaseProvider): + """Define a simple implementation of Sentinel api. + + This module uses `sentinel-sat` to search and to download files from Copernicus. + + TODO: Document how to download multiple files using multiple accounts. + """ + + def __init__(self, *users, **kwargs): + """Create sentinel api instance.""" + users_context = list(users) + + show_progress = kwargs.get('progress', False) + parallel = kwargs.get('parallel', False) + + self.progress = show_progress + + if not users: + if 'username' not in kwargs or 'password' not in kwargs: + raise RuntimeError('Missing "username"/"password" for USGS provider.') + + auth = kwargs + + self.parallel = parallel + + if parallel: + users_context.append(auth) + + self.kwargs = kwargs + else: + self.parallel = True + auth = users[0] + + self.api = SentinelAPI(auth['username'], auth['password'], show_progressbars=show_progress) + + if self.parallel: + self.clients = UserClients(users_context) + + self.collections['Sentinel-1'] = Sentinel1 + self.collections['GRD'] = Sentinel1 + self.collections['Sentinel-2'] = Sentinel2 + self.collections['S2MSI1C'] = Sentinel2 + self.collections['S2MSI2A'] = Sentinel2 + + def search(self, query, **kwargs): + """Search for products on Sentinel provider. + + Args: + query - Product name + **kwargs - Optional parameters (start_date/end_date/cloud_cover, etc) + """ + bbox = kwargs.pop('bbox', None) + + product_type = query + + # TODO: Support download others sentinel + platform = kwargs.pop('platform') or 'Sentinel-2' + + cloud_cover = kwargs.pop('cloud_cover', None) + + options = kwargs.copy() + options['platformname'] = platform + options['producttype'] = product_type + + if bbox: + envelope = box(*bbox) + options['area'] = envelope.wkt + + if 'start_date' in kwargs and 'end_date': + start_date = _get_date_time(options.pop('start_date')) + end_date = _get_date_time(options.pop('end_date')) + + options['date'] = start_date, end_date + + if platform == 'Sentinel-2' and cloud_cover: + options['cloudcoverpercentage'] = (0, cloud_cover) + + scenes = self.api.query(**options) + + return [ + SceneResult(scenes[scene]['title'], scenes[scene].get('cloudcoverpercentage'), **scenes[scene]) + for scene in scenes + ] + + def download(self, scene_id: str, output: str, **kwargs) -> str: + """Try to download data from Copernicus. + + Raises: + DownloadError when scene not found. + DataOfflineError when scene is not available/offline. + """ + meta = self.api.query(filename=f'{scene_id}*') + + if len(meta) < 0: + raise DownloadError(f'Scene id {scene_id} not found.') + + api = self.api + + # When parallel support set, get an available client from Redis + if self.parallel: + client = self.clients.get_user() + + api = SentinelAPI(client.username, client.password, show_progressbars=self.progress) + + uuid = list(meta)[0] + + entry = api.download(uuid, output) + + if not entry['Online']: + raise DataOfflineError(scene_id) + + return entry['path'] + + def download_all(self, scenes: List[SceneResult], output: str, **kwargs): + """Download multiple scenes from Sentinel-Sat API. + + Args: + scenes - List of scenes found by search method. + output - Output directory + **kwargs - Others parameters to be attached into sentinel-sat. + """ + uuid_scenes_map = {item['uuid']: item.scene_id for item in scenes} + + try: + res = self.api.download_all(uuid_scenes_map, directory_path=output, **kwargs) + + return res + except SentinelAPILTAError as e: + raise DownloadError(f'Error in Sentinel LongTermArchive - {str(e)}') diff --git a/bdc_collectors/scihub/base.py b/bdc_collectors/scihub/base.py new file mode 100644 index 0000000..22ee46d --- /dev/null +++ b/bdc_collectors/scihub/base.py @@ -0,0 +1,55 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Defines the base structure of SciHub api.""" + +from pathlib import Path + +from bdc_catalog.models import Collection +from flask import current_app + +from ..base import BaseCollection +from .parser import Sentinel2Scene + + +class SentinelCollection(BaseCollection): + """Define the base collection schema for Sentinel products.""" + + parser_class = Sentinel2Scene + + def compressed_file(self, collection, prefix=None): + """Retrieve path to the compressed scene (.zip) on local storage.""" + if prefix is None: + prefix = current_app.config.get('DATA_DIR') + + year_month = self.parser.sensing_date().strftime('%Y-%m') + + source = self.parser.source() + + sensor = self.parser.fragments[1][:3] + + folder = '{}_{}'.format(source[:2], sensor) + + scene_path = Path(prefix or '') / 'Repository/Archive' / folder / year_month + + return scene_path / '{}.zip'.format(self.parser.scene_id) + + def path(self, collection: Collection, prefix=None) -> Path: + """Retrieve the relative path to the Collection on Brazil Data Cube cluster.""" + if prefix is None: + prefix = current_app.config.get('DATA_DIR') + + sensing_date = self.parser.sensing_date() + + year_month = sensing_date.strftime('%Y-%m') + + scene_path = Path(prefix or '') / 'Repository/Archive' / collection.name / year_month + + scene_path = scene_path / self.parser.scene_id + + return scene_path diff --git a/bdc_collectors/scihub/clients.py b/bdc_collectors/scihub/clients.py new file mode 100644 index 0000000..787407f --- /dev/null +++ b/bdc_collectors/scihub/clients.py @@ -0,0 +1,130 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Describe Abstraction for Sentinel Data Access on Copernicus.""" + +import json +import logging +import os +import time +from typing import List + +from flask import current_app + + +class AtomicUser: + """An abstraction of Atomic User. You must use it as context manager. + + Make sure to control the access to the shared resource. + + Whenever an instance object out of scope, it automatically releases the user to the + Redis cache. + """ + + def __init__(self, username, password, ref): + """Build an atomic user.""" + self.username = username + self.password = password + self._released = False + self.ref = ref + + def __repr__(self): + """Retrieve string representation of Atomic User.""" + return 'AtomicUser({}, released={})'.format(self.username, self._released) + + def __enter__(self): + """Open atomic user context.""" + return self + + def __del__(self): + """Release atomic user from copernicus.""" + self.release() + + def release(self): + """Release atomic user from redis.""" + if not self._released: + logging.debug('Release {}'.format(self.username)) + self.ref.done(self.username) + + self._released = True + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit context. Release the user from redis client.""" + self.release() + + +class UserClients: + """Global user client for Sentinel Accounts.""" + + def __init__(self, users: List[dict], redis_url=None, lock_name='user-clients'): + """Build user clients interface.""" + import redis + + for user in users: + user['count'] = 0 + + self._key = 'bdc_collection_builder:users' + + if redis_url is None: + redis_url = current_app.config.get('REDIS_URL', os.getenv('REDIS_URL')) + + if redis_url is None: + raise RuntimeError('Parallel support requires Redis instance. Make sure to export REDIS_URL.') + + self._cache = redis.Redis.from_url(redis_url) + self._lock = self._cache.lock(lock_name) + self.users = users + + @property + def users(self): + """Retrieve all users from disk.""" + return json.loads(self._cache.get(self._key)) + + @users.setter + def users(self, obj): + """Update users.""" + self._cache.set(self._key, json.dumps(obj)) + + def use(self): + """Try to lock an atomic user.""" + with self._lock: + users = self.users + + for user in users: + if user['count'] < 2: + logging.debug('User {} - {}'.format(user['username'], user['count'])) + user['count'] += 1 + + self.users = users + + return AtomicUser(user['username'], user['password'], self) + return None + + def done(self, username): + """Release atomic user.""" + with self._lock: + users = self.users + + for user in users: + if user['username'] == username: + user['count'] -= 1 + + self.users = users + + def get_user(self): + """Try to get available user to download.""" + user = None + + while user is None: + user = self.use() + + if user is None: + logging.info('Waiting for available user to download...') + time.sleep(5) + + return user diff --git a/bdc_collectors/scihub/parser.py b/bdc_collectors/scihub/parser.py new file mode 100644 index 0000000..99c0fe5 --- /dev/null +++ b/bdc_collectors/scihub/parser.py @@ -0,0 +1,92 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Defines the base structure of SciHub api.""" + +from datetime import datetime +from typing import List + +from ..base import SceneParser + + +class Sentinel2Scene(SceneParser): + """Define the parser of Sentinel Scene identifiers.""" + + fragments: List[str] + + def __init__(self, scene_id: str): + """Create the parser Sentinel2Scene.""" + super().__init__(scene_id) + + fragments = scene_id.split('_') + + if len(fragments) != 7 or fragments[0] not in ('S2A', 'S2B'): + raise RuntimeError(f'Invalid sentinel scene {scene_id}') + + self.fragments = fragments + + def tile_id(self): + """Retrieve the tile id value.""" + return self.fragments[5][1:] + + def sensing_date(self): + """Retrieve the scene sensing date.""" + return datetime.strptime(self.fragments[2], '%Y%m%dT%H%M%S') + + def processing_date(self): + """Retrieve the scene processing date.""" + return datetime.strptime(self.fragments[-1], '%Y%m%dT%H%M%S') + + def satellite(self): + """Retrieve the Sentinel satellite - 2A/2B.""" + part = self.fragments[0] + + return part[-2:] + + def source(self): + """Retrieve the scene first parameter (S2A/S2B).""" + return self.fragments[0] + + +class Sentinel1Scene(SceneParser): + """Define the parser of Sentinel 1 Scene identifiers.""" + + fragments: List[str] + + def __init__(self, scene_id: str): + """Create the parser SentinelScene.""" + super().__init__(scene_id) + + fragments = scene_id.split('_') + + if len(fragments) != 9 or fragments[0] not in ('S1A', 'S1B'): + raise RuntimeError(f'Invalid sentinel scene {scene_id}') + + self.fragments = fragments + + def tile_id(self): + """Retrieve the tile id value.""" + return self.fragments[6] + + def sensing_date(self): + """Retrieve the scene sensing date.""" + return datetime.strptime(self.fragments[4], '%Y%m%dT%H%M%S') + + def processing_date(self): + """Retrieve the scene processing date.""" + return datetime.strptime(self.fragments[5], '%Y%m%dT%H%M%S') + + def satellite(self): + """Retrieve the Sentinel satellite - 2A/2B.""" + part = self.fragments[0] + + return part[-2:] + + def source(self): + """Retrieve the scene first parameter (S2A/S2B).""" + return self.fragments[0] diff --git a/bdc_collectors/scihub/sentinel2.py b/bdc_collectors/scihub/sentinel2.py new file mode 100644 index 0000000..b8afdcc --- /dev/null +++ b/bdc_collectors/scihub/sentinel2.py @@ -0,0 +1,22 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Defines the structure for Collections on remote SciHub server.""" + +from .base import SentinelCollection +from .parser import Sentinel1Scene + + +class Sentinel1(SentinelCollection): + """Simple abstraction for Sentinel-1.""" + + parser_class = Sentinel1Scene + + +class Sentinel2(SentinelCollection): + """Simple abstraction for Sentinel-2.""" diff --git a/bdc_collectors/scripts/load_creodias_provider.sql b/bdc_collectors/scripts/load_creodias_provider.sql new file mode 100644 index 0000000..4cc032f --- /dev/null +++ b/bdc_collectors/scripts/load_creodias_provider.sql @@ -0,0 +1,5 @@ +INSERT INTO bdc.providers (name, description, uri, credentials) + VALUES ('CREODIAS', 'CREODIAS is a seamless environment that brings processing to Earth Observation data (EODATA - EO DATA Free Archive) . Our platform contains online most of Copernicus Sentinel satellites data and Services, Envisat and ESA/Landsat data and other EODATA. Its design allows Third Party Users to prototype and build their own value-added services and products.', + 'https://creodias.eu/what-is-creodias', + '{"username": "user@email.com", "password": "password"}') + ON CONFLICT DO NOTHING; \ No newline at end of file diff --git a/bdc_collectors/scripts/load_google_provider.sql b/bdc_collectors/scripts/load_google_provider.sql new file mode 100644 index 0000000..a6ec690 --- /dev/null +++ b/bdc_collectors/scripts/load_google_provider.sql @@ -0,0 +1,5 @@ +INSERT INTO bdc.providers (name, description, uri, credentials) + VALUES ('Google', 'Google Cloud Storage of Public data sets - Landsat and Sentinel', + 'https://cloud.google.com/storage/docs/public-datasets', + '{"GOOGLE_APPLICATION_CREDENTIALS": ""}') + ON CONFLICT DO NOTHING; \ No newline at end of file diff --git a/bdc_collectors/scripts/load_onda_provider.sql b/bdc_collectors/scripts/load_onda_provider.sql new file mode 100644 index 0000000..b00800a --- /dev/null +++ b/bdc_collectors/scripts/load_onda_provider.sql @@ -0,0 +1,5 @@ +INSERT INTO bdc.providers (name, description, uri, credentials) + VALUES ('ONDA', 'ONDA DIAS provides free and open access to geospatial data and information, including full availability of Copernicus data.', + 'https://catalogue.onda-dias.eu/catalogue/', + '{"username": "user@email.com", "password": "password"}') + ON CONFLICT DO NOTHING; \ No newline at end of file diff --git a/bdc_collectors/scripts/load_scihub_provider.sql b/bdc_collectors/scripts/load_scihub_provider.sql new file mode 100644 index 0000000..cba05f8 --- /dev/null +++ b/bdc_collectors/scripts/load_scihub_provider.sql @@ -0,0 +1,5 @@ +INSERT INTO bdc.providers (name, description, uri, credentials) + VALUES ('SciHub', '', + 'https://scihub.copernicus.eu/dhus', + '{"username": "user@email.com", "password": "password"}') + ON CONFLICT DO NOTHING; \ No newline at end of file diff --git a/bdc_collectors/scripts/load_usgs_provider.sql b/bdc_collectors/scripts/load_usgs_provider.sql new file mode 100644 index 0000000..6f88704 --- /dev/null +++ b/bdc_collectors/scripts/load_usgs_provider.sql @@ -0,0 +1,5 @@ +INSERT INTO bdc.providers (name, description, uri, credentials) + VALUES ('USGS', 'The USGS Earth Explorer data portal is your one stop shop for obtaining geo-spatial datasets from our extensive collections', + 'https://earthexplorer.usgs.gov/', + '{"username": "user@email.com", "password": "password"}') + ON CONFLICT DO NOTHING; \ No newline at end of file diff --git a/bdc_collectors/usgs/__init__.py b/bdc_collectors/usgs/__init__.py new file mode 100644 index 0000000..34359ff --- /dev/null +++ b/bdc_collectors/usgs/__init__.py @@ -0,0 +1,137 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the structures for USGS Earth Explorer Provider access.""" + +import logging +from typing import List + +from landsatxplore.api import API, is_product_id +from landsatxplore.earthexplorer import EE_DOWNLOAD_URL, EarthExplorer +from landsatxplore.exceptions import EarthExplorerError + +from ..base import BaseProvider, SceneResult +from ..exceptions import DownloadError +from .landsat5 import Landsat5 +from .landsat7 import Landsat7 +from .landsat8 import Landsat8 +from .parser import LandsatScene + + +def init_provider(): + """Register the USGS provider.""" + # TODO: Register in bdc_catalog.models.Provider + + return dict( + USGS=USGS + ) + + +class USGS(BaseProvider): + """Define the USGS provider. + + This providers consumes the `USGS EarthExplorer `_ catalog. + """ + + api: API + + def __init__(self, **kwargs): + """Create instance of USGS provider.""" + self.collections['LANDSAT_TM_C1'] = Landsat5 + self.collections['LANDSAT_ETM_C1'] = Landsat7 + self.collections['LANDSAT_8_C1'] = Landsat8 + + lazy = kwargs.get('lazy') + + if 'username' not in kwargs or 'password' not in kwargs: + raise RuntimeError('Missing "username"/"password" for USGS provider.') + + self.kwargs = kwargs + + if lazy: + self.api = None + else: + self.api = API(self.kwargs['username'], self.kwargs['password']) + + def _api(self): + """Lazy API instance.""" + if self.api is None: + self.api = API(self.kwargs['username'], self.kwargs['password']) + + def __del__(self): + """Logout in USGS on exit.""" + if self.api: + self.api.logout() + + def search(self, query, *args, **kwargs) -> List[SceneResult]: + """Search for data set in USGS catalog.""" + self._api() + + options = dict( + max_cloud_cover=kwargs.get('cloud_cover', 100), + start_date=kwargs.get('start_date'), + end_date=kwargs.get('end_date'), + max_results=kwargs.get('max_results', 50000) + ) + + if 'bbox' in kwargs: + bbox = kwargs['bbox'] + # w,s,e,n => s,w,n,e due bug https://github.com/yannforget/landsatxplore/blob/master/landsatxplore/datamodels.py#L49 + options['bbox'] = [bbox[1], bbox[0], bbox[3], bbox[2]] + + results = self.api.search(query, **options) + + valid_scene = self._valid_scene + + if 'validate' in kwargs: + valid_scene = kwargs['validate'] + + if not callable(valid_scene): + raise ValueError(f'Invalid validate. Expected a callable(scene:dict), but got {valid_scene}') + + return [ + SceneResult(scene['displayId'], scene['cloudCover'], link=scene['downloadUrl'], **scene) + for scene in results if valid_scene(scene) + ] + + def _valid_scene(self, scene: dict) -> bool: + """Filter validator for invalid scenes. + + Sometimes, the USGS Catalog returns wrong scene_ids and this functions removes that holes. + """ + if scene['displayId'].endswith('RT') or scene['displayId'].startswith('LO08'): + return False + + xmin, ymin, xmax, ymax = [float(value) for value in scene['sceneBounds'].split(',')] + + # TODO: Check data integrity + # Sometimes the USGS responds invalid bounding box scenes while searching in EarthExplorer Catalog. + # w=-60.87065, n=-10.18204, e=-57.66829, s=-12.18696 + # The expected scenes are: + # 228067, 228068, 228069, 229067, 229068, 229069, 230067, 230068, 230069. + # However, an invalid scene will be found (074068, 074067). + if xmin - xmax < -3: + logging.warning(f'Scene {scene["displayId"]} inconsistent.') + return False + + return True + + def download(self, scene_id: str, *args, **kwargs): + """Download Landsat product from USGS.""" + self._api() + + destination = kwargs.get('output') + + explorer = EarthExplorer(self.kwargs['username'], self.kwargs['password']) + + try: + file_name = explorer.download(scene_id, destination) + except EarthExplorerError as e: + raise DownloadError(str(e)) + + return file_name diff --git a/bdc_collectors/usgs/base.py b/bdc_collectors/usgs/base.py new file mode 100644 index 0000000..eface55 --- /dev/null +++ b/bdc_collectors/usgs/base.py @@ -0,0 +1,91 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Base definitions for USGS catalog.""" + +from pathlib import Path + +from bdc_catalog.models import Collection +from flask import current_app + +from ..base import BaseCollection +from .parser import LandsatScene + + +class BaseLandsat(BaseCollection): + """Define base Landsat Collection.""" + + parser_class = LandsatScene + + assets = ['MTL.txt', 'ANG.txt'] + + def get_files(self, collection, path=None, prefix=None): + """List all files from Landsat.""" + # TODO: Use parameter path instead + path = self.path(collection, prefix) + + extra = [path / f'{self.parser.scene_id}_{asset}' for asset in self.assets] + + files = [f for f in path.iterdir() if f.is_file() and f.suffix.lower() == '.tif'] + + return files + extra + + def guess_landsat(self, scene_id): + """Try to guess which Landsat collection belongs the given scene_id.""" + parser = self.parser_class(scene_id) + + satellite = int(parser.satellite()) + + product = None + + if satellite in (4, 5,): + product = Landsat5 + elif satellite == 7: + product = Landsat7 + elif satellite == 8: + product = Landsat8 + else: + raise RuntimeError('Invalid landsat') + + return product() + + def path(self, collection: Collection, prefix=None) -> Path: + """Retrieve the relative path to the Collection on Brazil Data Cube cluster. + + Example: + >>> collection = Collection.query().filter(Collection.name == 'LC8_DN').first_or_404() + >>> landsat_parser = LandsatScene('LC08_L1TP_223064_20200831_20200906_01_T1') + >>> scene = BaseCollection(collection=collection, landsat_parser) + >>> print(str(scene.path(prefix='/gfs'))) + ... '/gfs/Repository/Archive/LC8_DN/2015-07/223064/' + """ + if prefix is None: + prefix = current_app.config.get('DATA_DIR') + + sensing_date = self.parser.sensing_date() + + year_month = sensing_date.strftime('%Y-%m') + + scene_path = Path(prefix or '') / 'Repository/Archive' / collection.name / year_month / self.parser.tile_id() + + return scene_path + + def compressed_file(self, collection, prefix=None): + """Retrieve path to the compressed scene .zip.""" + if prefix is None: + prefix = current_app.config.get('DATA_DIR') + + year_month = self.parser.sensing_date().strftime('%Y-%m') + + product_version = int(self.parser.satellite()) + + folder = '{}{}'.format(self.parser.source()[:2], product_version) + + scene_path = Path(prefix or '') / 'Repository/Archive' / folder / year_month / self.parser.tile_id() + + return scene_path / '{}.tar.gz'.format(self.parser.scene_id) diff --git a/bdc_collectors/usgs/landsat5.py b/bdc_collectors/usgs/landsat5.py new file mode 100644 index 0000000..760da84 --- /dev/null +++ b/bdc_collectors/usgs/landsat5.py @@ -0,0 +1,37 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the supported Landsat-5 collections in USGS Earth Explorer catalog.""" + +from .base import BaseLandsat + + +class Landsat5(BaseLandsat): + """Simple abstraction for Landsat-5 DN.""" + + bands = [ + 'B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', 'B7.TIF', 'BQA.TIF' + ] + + assets = [ + 'MTL.txt', + 'ANG.txt' + ] + + +class Landsat5SR(BaseLandsat): + """Simple abstraction for Landsat-5 Surface Reflectance.""" + + bands = [ + 'sr_band1.tif', 'sr_band2.tif', 'sr_band3.tif', 'sr_band4.tif', 'sr_band5.tif', 'sr_band6.tif', 'sr_band7.tif', 'sr_cloud_qa.tif' + ] + + assets = [ + 'MTL.txt', + 'ANG.txt' + ] diff --git a/bdc_collectors/usgs/landsat7.py b/bdc_collectors/usgs/landsat7.py new file mode 100644 index 0000000..efc5037 --- /dev/null +++ b/bdc_collectors/usgs/landsat7.py @@ -0,0 +1,38 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the supported Landsat-7 collections in USGS Earth Explorer catalog.""" + +from .base import BaseLandsat + + +class Landsat7(BaseLandsat): + """Simple abstraction for Landsat-7 DN.""" + + bands = [ + 'B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', + 'B6_VCID_1.TIF', 'B6_VCID_2.TIF', 'B7.TIF', 'B8.TIF', 'BQA.TIF' + ] + + assets = [ + 'MTL.txt', + 'ANG.txt' + ] + + +class Landsat7SR(BaseLandsat): + """Simple abstraction for Landsat-7 Surface Reflectance.""" + + bands = [ + 'sr_band1.tif', 'sr_band2.tif', 'sr_band3.tif', 'sr_band4.tif', 'sr_band5.tif', 'sr_band6.tif', 'sr_band7.tif', 'sr_cloud_qa.tif' + ] + + assets = [ + 'MTL.txt', + 'ANG.txt' + ] diff --git a/bdc_collectors/usgs/landsat8.py b/bdc_collectors/usgs/landsat8.py new file mode 100644 index 0000000..2368f7a --- /dev/null +++ b/bdc_collectors/usgs/landsat8.py @@ -0,0 +1,39 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the supported Landsat-8 collections in USGS Earth Explorer catalog.""" + +from .base import BaseLandsat + + +class Landsat8(BaseLandsat): + """Simple abstraction for Landsat-5 DN.""" + + bands = [ + 'B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', + 'B6.TIF', 'B7.TIF', 'B8.TIF', 'B9.TIF', 'B10.TIF', + 'B11.TIF', 'BQA.TIF' + ] + + assets = [ + 'MTL.txt', + 'ANG.txt' + ] + + +class Landsat8SR(BaseLandsat): + """Simple abstraction for Landsat-8 Surface Reflectance.""" + + bands = [ + 'sr_band1.tif', 'sr_band2.tif', 'sr_band3.tif', 'sr_band4.tif', 'sr_band5.tif', 'sr_band6.tif', 'sr_band7.tif', 'sr_cloud_qa.tif', + ] + + assets = [ + 'MTL.txt', + 'ANG.txt' + ] diff --git a/bdc_collectors/usgs/parser.py b/bdc_collectors/usgs/parser.py new file mode 100644 index 0000000..5127d0a --- /dev/null +++ b/bdc_collectors/usgs/parser.py @@ -0,0 +1,53 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Defines parsers for USGS catalog.""" + +from datetime import datetime +from typing import List + +from ..base import SceneParser as _SceneParser + + +class LandsatScene(_SceneParser): + """Define the parser of Landsat Scene identifiers.""" + + fragments: List[str] + + def __init__(self, scene_id: str): + """Create LandsatScene parser.""" + super().__init__(scene_id) + + fragments = scene_id.split('_') + + if len(fragments) != 7 or fragments[0] not in ('LC08', 'LO08', 'LE07', 'LT05'): + raise RuntimeError(f'Invalid Landsat scene {scene_id}') + + self.fragments = fragments + + def tile_id(self): + """Retrieve the WRS2 path row.""" + return self.fragments[2] + + def sensing_date(self): + """Retrieve the scene sensing date.""" + return datetime.strptime(self.fragments[3], '%Y%m%d') + + def processing_date(self): + """Retrieve the scene processing date.""" + return datetime.strptime(self.fragments[4], '%Y%m%d') + + def satellite(self): + """Retrieve the Landsat satellite value (05,07,08...).""" + part = self.fragments[0] + + return part[-2:] + + def source(self): + """Retrieve first parameter of scene_id (LC08, etc.).""" + return self.fragments[0] diff --git a/bdc_collectors/utils.py b/bdc_collectors/utils.py new file mode 100644 index 0000000..e460631 --- /dev/null +++ b/bdc_collectors/utils.py @@ -0,0 +1,94 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Define the BDC-Collector utilities used along the package.""" + +import contextlib +import logging +import os + +import requests +from tqdm import tqdm + +from .exceptions import DownloadError + + +@contextlib.contextmanager +def working_directory(path): + """Change working directory and returns to previous on exit. + + Exceptions: + FileNotFoundError when could not change to directory provided. + + Args: + path (str): Directory to change + + Returns: + str Path to the changed directory + + Example: + >>> import os + >>> from tempfile import gettempdir + >>> TEMP_DIR = gettempdir() + >>> @working_directory(TEMP_DIR) + ... def create_file(filename): + ... # Create file in Temporary folder + ... print('Current dir: {}'.format(os.getcwd())) + ... with open(filename, 'w') as f: + ... f.write('Hello World') + """ + owd = os.getcwd() + logging.debug("Changing working dir from %s to %s", owd, path) + try: + os.chdir(path) + yield path + finally: + logging.debug("Back to working dir %s", owd) + os.chdir(owd) + + +def download_stream(file_path: str, response: requests.Response, chunk_size=1024*64, progress=False, offset=0, total_size=None): + """Download request stream data to disk. + + Args: + file_path - Absolute file path to save + response - HTTP Response object + """ + parent = os.path.dirname(file_path) + + if parent: + os.makedirs(parent, exist_ok=True) + + if not total_size: + total_size = int(response.headers.get('Content-Length', 0)) + + file_name = os.path.basename(file_path) + + progress_bar = tqdm( + desc=file_name, + total=total_size, + unit="B", + unit_scale=True, + disable=not progress, + initial=offset + ) + + mode = 'a+b' if offset else 'wb' + + # May throw exception for read-only directory + with response: + with open(file_path, mode) as stream: + for chunk in response.iter_content(chunk_size): + stream.write(chunk) + progress_bar.update(chunk_size) + + file_size = os.stat(file_path).st_size + + if file_size != total_size: + os.remove(file_path) + raise DownloadError(f'Download file is corrupt. Expected {total_size} bytes, got {file_size}') diff --git a/bdc_collectors/version.py b/bdc_collectors/version.py new file mode 100644 index 0000000..286b827 --- /dev/null +++ b/bdc_collectors/version.py @@ -0,0 +1,11 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Version information for BDC-Collectors.""" + +__version__ = '0.2.0' \ No newline at end of file diff --git a/docs/sphinx/Makefile b/docs/sphinx/Makefile new file mode 100644 index 0000000..8f98d2c --- /dev/null +++ b/docs/sphinx/Makefile @@ -0,0 +1,25 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2019-2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/sphinx/api.rst b/docs/sphinx/api.rst new file mode 100644 index 0000000..8227dd7 --- /dev/null +++ b/docs/sphinx/api.rst @@ -0,0 +1,33 @@ +.. + This file is part of BDC-Collectors. + Copyright (C) 2019-2020 INPE. + + BDC-Collectors is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + + +API Docs +======== + + +.. automodule:: bdc_collectors.base + :members: + +.. automodule:: bdc_collectors.ext + :members: + + +.. currentmodule:: bdc_collectors + + +.. automodule:: bdc_collectors.scihub + :members: + +.. automodule:: bdc_collectors.google + :members: + +.. automodule:: bdc_collectors.onda + :members: + +.. automodule:: bdc_collectors.creodias + :members: \ No newline at end of file diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py new file mode 100644 index 0000000..f445c9a --- /dev/null +++ b/docs/sphinx/conf.py @@ -0,0 +1,119 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Configuration file for the BDC-Collectors extension's documentation. + +The documentation system is based on Sphinx. If you want to know +more about the options to be used for configuration, please, see: +- https://www.sphinx-doc.org/en/master/usage/configuration.html +""" + +import bdc_collectors +import sphinx_rtd_theme + +# -- Project information ----------------------------------------------------- + +project = 'BDC-Collectors' +copyright = '2019-2020, INPE' +author = 'Brazil Data Cube Team' +release = bdc_collectors.__version__ + +# -- General configuration --------------------------------------------------- + +# Enabled Sphinx extensions. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.napoleon', + 'sphinx.ext.todo', + 'sphinx_copybutton', + 'sphinx_rtd_theme', + # 'sphinx_tabs.tabs', +] + +# Paths that contain templates, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [ + '_build', + 'Thumbs.db', + '.DS_Store' +] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. +html_theme = 'sphinx_rtd_theme' + +html_theme_options = { + 'canonical_url': 'https://brazil-data-cube.github.io/', + 'analytics_id': 'XXXXXXXXXX', + 'logo_only': False, + 'display_version': True, + 'prev_next_buttons_location': 'both', + 'style_external_links': True, + #'vcs_pageview_mode': 'edit', + #'github_url': 'https://github.com/brazil-data-cube/docs-bdc', + 'style_nav_header_background': '#2980B9', + 'collapse_navigation': True, + 'sticky_navigation': False, + 'navigation_depth': 3, + 'includehidden': True, + 'titles_only': False +} + +#html_theme_path = '' + +#html_style = '' + +html_title = 'BDC-Collectors' + +html_context = { + 'display_github': False, + 'github_user': 'brazil-data-cube', + 'github_repo': 'bdc-collectors', + 'last_updated': False, + #'commit': False, +} + +html_show_sourcelink = False + +html_logo = './img/logo-bdc.png' + +html_favicon = './img/favicon.ico' + +#html_static_path = ['_static'] + +html_css_files = [ ] + +html_last_updated_fmt = '%b %d, %Y' + +html_show_sphinx = False + +html_search_language = 'en' + +numfig = True + +numfig_format = { + 'figure': 'Figure %s -', + 'table': 'Table %s -', + 'code-block': 'Code snippet %s -', + 'section': 'Section %s.' +} + +#def setup(app): +# app.add_stylesheet('bdc-db.css') + + +#todo_include_todos = True +#todo_emit_warnings = True +master_doc = 'index' diff --git a/docs/sphinx/img/favicon.ico b/docs/sphinx/img/favicon.ico new file mode 100644 index 0000000..19fcc62 Binary files /dev/null and b/docs/sphinx/img/favicon.ico differ diff --git a/docs/sphinx/img/logo-bdc.png b/docs/sphinx/img/logo-bdc.png new file mode 100644 index 0000000..ba47081 Binary files /dev/null and b/docs/sphinx/img/logo-bdc.png differ diff --git a/docs/sphinx/index.rst b/docs/sphinx/index.rst new file mode 100644 index 0000000..a0983da --- /dev/null +++ b/docs/sphinx/index.rst @@ -0,0 +1,39 @@ +.. + This file is part of BDC-Collectors. + Copyright (C) 2019-2020 INPE. + + BDC-Collectors is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + +.. include:: ../../README.rst + :end-before: Installation + + +.. toctree:: + :hidden: + + self + + +.. toctree:: + :maxdepth: 1 + :caption: Documentation + + installation + usage + api + + +.. toctree:: + :maxdepth: 2 + :caption: Additional Notes + + license + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/sphinx/installation.rst b/docs/sphinx/installation.rst new file mode 100644 index 0000000..14a6f80 --- /dev/null +++ b/docs/sphinx/installation.rst @@ -0,0 +1,9 @@ +.. + This file is part of BDC-Collectors. + Copyright (C) 2019-2020 INPE. + + BDC-Collectors is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + + +.. include:: ../../INSTALL.rst \ No newline at end of file diff --git a/docs/sphinx/license.rst b/docs/sphinx/license.rst new file mode 100644 index 0000000..717a2ff --- /dev/null +++ b/docs/sphinx/license.rst @@ -0,0 +1,20 @@ +.. + This file is part of BDC-Collectors. + Copyright (C) 2019-2020 INPE. + + BDC-Collectors is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + + +License +======= + + +.. include:: ../../LICENSE + + +Contact +======= + + +`Brazil Data Cube Team `_ \ No newline at end of file diff --git a/docs/sphinx/make.bat b/docs/sphinx/make.bat new file mode 100644 index 0000000..922152e --- /dev/null +++ b/docs/sphinx/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/sphinx/usage.rst b/docs/sphinx/usage.rst new file mode 100644 index 0000000..eb35526 --- /dev/null +++ b/docs/sphinx/usage.rst @@ -0,0 +1,9 @@ +.. + This file is part of BDC-Collectors. + Copyright (C) 2019-2020 INPE. + + BDC-Collectors is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + + +.. include:: ../../USAGE.rst \ No newline at end of file diff --git a/examples/Dockerfile b/examples/Dockerfile new file mode 100644 index 0000000..7fadca7 --- /dev/null +++ b/examples/Dockerfile @@ -0,0 +1,30 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +FROM python:3.8.6 + +ENV START_DATE '' +ENV END_DATE '' +ENV DELTA_DAYS '' +ENV DATA_DIR '' +ENV SHP_DATA_DIR '' +ENV S2_GRID_NAME '' +ENV BDC_CREODIAS_USER '' +ENV BDC_CREODIAS_PASSWORD '' + +COPY . /app + +WORKDIR /app + +RUN pip3 install -U pip && \ + pip3 install -U setuptools && \ + pip3 install wheel && \ + pip3 install -e .[all] && \ + pip3 install fiona + +CMD ["python3", "examples/download_sentinel_tile.py"] \ No newline at end of file diff --git a/examples/creodias.py b/examples/creodias.py new file mode 100644 index 0000000..32f65a8 --- /dev/null +++ b/examples/creodias.py @@ -0,0 +1,38 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Download data from CREODIAS.""" + +import os + +from flask import Flask +from bdc_collectors import CollectorExtension + + +app = Flask(__name__) + +ext = CollectorExtension(app) + +provider = ext.get_provider('CREODIAS')(username=os.getenv('BDC_USER', 'user@email.com'), password=os.getenv('BDC_PASSWORD', 'pass')) + +SCENES = [ + "S2B_MSIL1C_20170919T140039_N0205_R067_T22MCB_20170919T140040", + "S2B_MSIL1C_20170906T135109_N0205_R024_T22MCB_20170906T135105", + "S2B_MSIL1C_20170926T135059_N0205_R024_T22MDB_20170926T135300", + "S2B_MSIL1C_20170909T140259_N0205_R067_T22MBB_20170909T140259", + "S2B_MSIL1C_20170919T140039_N0205_R067_T22MBB_20170919T140040", + "S2A_MSIL1C_20170911T135111_N0205_R024_T22MCB_20170911T135110", + "S2A_MSIL1C_20170904T140051_N0205_R067_T22MBB_20170904T140051", + "S2B_MSIL1C_20170909T140259_N0205_R067_T22MCB_20170909T140259", + "S2B_MSIL1C_20170926T135059_N0205_R024_T22MCB_20170926T135300", + "S2A_MSIL1C_20170924T140051_N0205_R067_T22MCB_20170924T140106", + "S2B_MSIL1C_20170906T135109_N0205_R024_T22MDB_20170906T135105", +] + +for scene in SCENES: + res = provider.download(scene, output=os.getenv('DATA_DIR', '/tmp')) diff --git a/examples/download_sentinel_tile.py b/examples/download_sentinel_tile.py new file mode 100644 index 0000000..aca8662 --- /dev/null +++ b/examples/download_sentinel_tile.py @@ -0,0 +1,154 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Download entire Sentinel-2 tile from remote server. + +This example tries to download data from SciHub. When it fails, +seeks in CREODIAS (Requires BDC_CREODIAS_USER and BDC_CREODIAS_PASSWORD set). +""" + +import logging +import os +import time +from datetime import datetime, timedelta +from logging.handlers import TimedRotatingFileHandler + +import fiona + +from bdc_collectors import CollectorExtension +from bdc_collectors.scihub.base import Sentinel2Scene +from bdc_collectors.exceptions import DownloadError +from flask import Flask + + +def setup_logger(): + logger = logging.getLogger(__name__) + logger.setLevel(logging.INFO) + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(formatter) + + handler = TimedRotatingFileHandler( + 'application.log', + when='D', + backupCount=30 + ) + handler.suffix = '%Y%m%d%H%M' + handler.setFormatter(formatter) + + logger.addHandler(stream_handler) + logger.addHandler(handler) + + return logger + +logger = setup_logger() + +S2_GRID_DIR = os.getenv('SHP_DATA_DIR', './shp') +S2_GRID_NAME = os.getenv('SHP_FILENAME', 'grade_mgrs_s2_brasil.shp') +S2_GRID_FILE_NAME = os.path.join(S2_GRID_DIR, S2_GRID_NAME) + +if not os.path.exists(S2_GRID_FILE_NAME): + raise IOError('Sentinel grid shapefile not found') + +# Sentinel +with fiona.open(S2_GRID_FILE_NAME) as dataset: + TILES = [tile['properties']['name'] for tile in dataset.values()] + +START_DATE = datetime.strptime(os.getenv('START_DATE', '2019-08-01'), '%Y-%m-%d') +END_DATE = datetime.strptime(os.getenv('END_DATE', '2019-08-31'), '%Y-%m-%d') +DELTA_DAYS = timedelta(days=int(os.getenv('DELTA_DAYS', 10))) +DATA_DIR = os.getenv('DATA_DIR', '/data') +current_date = START_DATE +USER = os.getenv('BDC_USER', 'user') +PASSWORD = os.getenv('BDC_PASSWORD', 'password') + +# Flask +app = Flask(__name__) +ext = CollectorExtension(app) + +# SciHub Copernicus - https://scihub.copernicus.eu/dhus/#/home +sentinel = ext.get_provider('SciHub')(username=USER, password=PASSWORD, progress=True) + +creodias = None + +if os.getenv('BDC_CREODIAS_USER') and os.getenv('BDC_CREODIAS_PASSWORD'): + user = os.getenv('BDC_CREODIAS_USER') + passwd = os.getenv('BDC_CREODIAS_PASSWORD') + + creodias = ext.get_provider('CREODIAS')(username=user, password=passwd, progress=True) + + +logger.info(f'Download is starting') + +while current_date < END_DATE: + for tile in TILES: + tile_path = os.path.join(DATA_DIR, f'{tile}') + + os.makedirs(tile_path, exist_ok=True) + + try: + result = sentinel.search( + query='S2MSI1C', + platform='Sentinel-2', + date=(current_date, current_date + DELTA_DAYS), + cloudcoverpercentage=(0, 100), + filename=f'*{tile}*' + ) + + time.sleep(6) + + logger.info(f'Download: {tile} - {current_date}/{current_date + DELTA_DAYS}') + + uuid_scene_map = {item['uuid']: item for item in result} + + if len(uuid_scene_map) == 0: + logger.warning(f'No result for {current_date} - {tile}') + continue + + try: + downloaded, scheduled, failed = sentinel.download_all(result, output=tile_path, lta_retry_delay=30) + except: + downloaded = scheduled = {} + logger.error(f'Error in sentinel-sat {list(uuid_scene_map.keys())}') + # Look for local filed already downloaded and then check file integrity. + # TODO: Should we change .incomplete of CREODIAS to avoid byte conflict? + error_map = sentinel.api.check_files(ids=list(uuid_scene_map.keys()), directory=tile_path) + + # Map invalid files by scene + failed = {v[0]['id']: uuid_scene_map[v[0]['id']] for k, v in error_map.items()} + + if scheduled or failed: + logger.info(f'{len(scheduled)} were scheduled by SciHub LTA and {len(failed)} failed.') + if creodias: + total_errors = list(scheduled.keys()) + list(failed.keys()) + + # Adapt to SceneResult + scenes = [uuid_scene_map[uuid] for uuid in total_errors] + + downloaded = [] + failed = [] + for scene in scenes: + try: + creodias.download(scene.scene_id, output=tile_path) + downloaded.append(scene) + except: + failed.append(scene) + + logger.info(f'{len(downloaded)} were downloaded and {len(failed)} failed on creodias.') + + # Download from creodias - In parallel like sentinel-sat api + # downloaded, scheduled, failed = creodias.download_all(scenes, output=tile_path, max_workers=2) + + except DownloadError as e: + logger.error(str(e)) + except Exception as e: + logger.error(f'Exception - {e}', exc_info=True) + + current_date += DELTA_DAYS diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..c39cf1e --- /dev/null +++ b/pytest.ini @@ -0,0 +1,11 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2019-2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +[pytest] +addopts = --color=auto --cov=bdc_collectors --cov-report=term-missing +testpaths = tests \ No newline at end of file diff --git a/run-tests.sh b/run-tests.sh new file mode 100755 index 0000000..fc46917 --- /dev/null +++ b/run-tests.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# +# This file is part of BDC-Collectors. +# Copyright (C) 2019-2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +pydocstyle bdc_collectors tests setup.py && \ +isort bdc_collectors tests setup.py --check-only --diff --skip-glob "bdc_collectors/alembic/*" && \ +check-manifest --ignore ".travis-*" --ignore ".readthedocs.*" && \ +# sphinx-build -qnW --color -b doctest docs/sphinx/ docs/sphinx/_build/doctest && \ +pytest \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..9af823a --- /dev/null +++ b/setup.cfg @@ -0,0 +1,15 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2019-2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +[aliases] +test = pytest + +[build_sphinx] +source-dir = docs/sphinx/ +build-dir = docs/sphinx/_build +all_files = 1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8db2f41 --- /dev/null +++ b/setup.py @@ -0,0 +1,114 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Setup for BDC-Collectors.""" + +import os + +from setuptools import find_packages, setup + +readme = open('README.rst').read() + +history = open('CHANGES.rst').read() + +docs_require = [ + 'Sphinx>=2.2', + 'sphinx_rtd_theme', + 'sphinx-copybutton', + 'sphinx-tabs', +] + +tests_require = [ + 'coverage>=4.5', + 'coveralls>=1.8', + 'pytest>=5.2', + 'pytest-cov>=2.8', + 'pytest-pep8>=1.0', + 'pydocstyle>=4.0', + 'isort>4.3', + 'check-manifest>=0.40', + 'requests-mock>=1.7' +] + +extras_require = { + 'docs': docs_require, + 'tests': tests_require, +} + +extras_require['all'] = [req for _, reqs in extras_require.items() for req in reqs] + +setup_requires = [ + 'pytest-runner>=5.2', +] + +install_requires = [ + 'bdc-catalog @ git+git://github.com/brazil-data-cube/bdc-catalog@v0.6.2', + 'Flask>=1.1.0', + 'google-cloud-storage>=1.28,<2', + 'landsatxplore>=0.6,<1', + 'rasterio>=1.1,<1.2', + 'redis>=3.5,<4', + 'sentinelsat>=0.14,<1', + 'Shapely>=1.7,<2', + 'stac.py>=0.9', + 'tqdm>=4.50' +] + +packages = find_packages() + +g = {} +with open(os.path.join('bdc_collectors', 'version.py'), 'rt') as fp: + exec(fp.read(), g) + version = g['__version__'] + +setup( + name='bdc-collectors', + version=version, + description=__doc__, + long_description=readme + '\n\n' + history, + keywords=['database', 'postgresql'], + license='MIT', + author='Brazil Data Cube Team', + author_email='brazildatacube@inpe.br', + url='https://github.com/brazil-data-cube/bdc-collectors', + packages=packages, + zip_safe=False, + include_package_data=True, + platforms='any', + entry_points={ + 'console_scripts': [ + 'bdc-collector = bdc_collectors.cli:cli' + ], + 'bdc_collectors.providers': [ + 'creodias = bdc_collectors.creodias', + 'earth_search = bdc_collectors.earth_search', + 'google = bdc_collectors.google', + 'usgs = bdc_collectors.usgs', + 'onda = bdc_collectors.onda', + 'scihub = bdc_collectors.scihub' + ], + 'bdc_db.scripts': [ + 'bdc_collectors = bdc_collectors.scripts' + ] + }, + extras_require=extras_require, + install_requires=install_requires, + setup_requires=setup_requires, + tests_require=tests_require, + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Web Environment', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3.7', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Scientific/Engineering :: GIS', + ], +) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..49c17bb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,31 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Pytest fixtures.""" + +import pytest +from flask import Flask + +from bdc_collectors import create_app + + +@pytest.fixture(scope='class') +def flask(): + """Fixture to create Flask App.""" + _app = Flask('test') + + yield _app + + +@pytest.fixture(scope='class') +def app(): + """Fixture to create Flask app and configure BDC-Collectors Extension.""" + _app = create_app() + + with _app.app_context(): + yield _app diff --git a/tests/demo_provider/__init__.py b/tests/demo_provider/__init__.py new file mode 100644 index 0000000..2925f0f --- /dev/null +++ b/tests/demo_provider/__init__.py @@ -0,0 +1,29 @@ +# +# This file is part of BDC-Collectors. +# Copyright (C) 2020 INPE. +# +# BDC-Collectors is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +"""Demo package to emulate external Provider.""" + +from bdc_collectors.base import BaseProvider, SceneResult + + +def init_provider(): + """Register the DEMO provider.""" + return dict( + DEMO=DEMO + ) + + +class DEMO(BaseProvider): + """Define a simple abstraction of provider DEMO.""" + + def search(self, query, *args, **kwargs): + """Search for scenes.""" + return [SceneResult('theid', 100)] + + def download(self, scene_id: str, *args, **kwargs) -> str: + """Pass.""" \ No newline at end of file diff --git a/tests/jsons/scihub-sentinel-2.json b/tests/jsons/scihub-sentinel-2.json new file mode 100644 index 0000000..bb37832 --- /dev/null +++ b/tests/jsons/scihub-sentinel-2.json @@ -0,0 +1,682 @@ +[ + { + "title": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LCM_20200129T153004", + "link": [ + { + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('2afdcdb9-f697-429c-9210-8c50b5dfc4d9')/$value" + }, + { + "rel": "alternative", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('2afdcdb9-f697-429c-9210-8c50b5dfc4d9')/" + }, + { + "rel": "icon", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('2afdcdb9-f697-429c-9210-8c50b5dfc4d9')/Products('Quicklook')/$value" + } + ], + "id": "2afdcdb9-f697-429c-9210-8c50b5dfc4d9", + "summary": "Date: 2020-01-29T13:46:41.024Z, Instrument: MSI, Mode: , Satellite: Sentinel-2, Size: 698.07 MB", + "date": [ + { + "name": "datatakesensingstart", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "beginposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "endposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "ingestiondate", + "content": "2020-01-29T17:04:25.455Z" + } + ], + "int": [ + { + "name": "orbitnumber", + "content": "24045" + }, + { + "name": "relativeorbitnumber", + "content": "24" + } + ], + "double": { + "name": "cloudcoverpercentage", + "content": "53.6758" + }, + "str": [ + { + "name": "sensoroperationalmode", + "content": "INS-NOBS" + }, + { + "name": "gmlfootprint", + "content": "\n \n \n -11.8744213137028,-51.828186 -12.01004845598258,-51.858215 -12.158635741504792,-51.89102 -12.30708703160243,-51.92383 -12.455344953369442,-51.956787 -12.603549663595215,-51.989655 -12.750174880597198,-52.022125 -12.746030391294726,-52.842163 -11.753598992978919,-52.835297 -11.758300454469065,-51.82785 -11.8744213137028,-51.828186\n \n \n" + }, + { + "name": "footprint", + "content": "MULTIPOLYGON (((-52.022125 -12.750174880597198, -51.989655 -12.603549663595215, -51.956787 -12.455344953369442, -51.92383 -12.30708703160243, -51.89102 -12.158635741504792, -51.858215 -12.01004845598258, -51.828186 -11.8744213137028, -51.82785 -11.758300454469065, -52.835297 -11.753598992978919, -52.842163 -12.746030391294726, -52.022125 -12.750174880597198)))" + }, + { + "name": "tileid", + "content": "22LCM" + }, + { + "name": "hv_order_tileid", + "content": "LM22C" + }, + { + "name": "format", + "content": "SAFE" + }, + { + "name": "processingbaseline", + "content": "02.08" + }, + { + "name": "platformname", + "content": "Sentinel-2" + }, + { + "name": "filename", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LCM_20200129T153004.SAFE" + }, + { + "name": "instrumentname", + "content": "Multi-Spectral Instrument" + }, + { + "name": "instrumentshortname", + "content": "MSI" + }, + { + "name": "size", + "content": "698.07 MB" + }, + { + "name": "s2datatakeid", + "content": "GS2A_20200129T134641_024045_N02.08" + }, + { + "name": "producttype", + "content": "S2MSI1C" + }, + { + "name": "platformidentifier", + "content": "2015-028A" + }, + { + "name": "orbitdirection", + "content": "DESCENDING" + }, + { + "name": "platformserialidentifier", + "content": "Sentinel-2A" + }, + { + "name": "processinglevel", + "content": "Level-1C" + }, + { + "name": "identifier", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LCM_20200129T153004" + }, + { + "name": "level1cpdiidentifier", + "content": "S2A_OPER_MSI_L1C_TL_SGS__20200129T153004_A024045_T22LCM_N02.08" + }, + { + "name": "uuid", + "content": "2afdcdb9-f697-429c-9210-8c50b5dfc4d9" + } + ] + }, + { + "title": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LCN_20200129T153004", + "link": [ + { + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('b5b9e963-412b-4021-acfd-ec3a765c4e2b')/$value" + }, + { + "rel": "alternative", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('b5b9e963-412b-4021-acfd-ec3a765c4e2b')/" + }, + { + "rel": "icon", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('b5b9e963-412b-4021-acfd-ec3a765c4e2b')/Products('Quicklook')/$value" + } + ], + "id": "b5b9e963-412b-4021-acfd-ec3a765c4e2b", + "summary": "Date: 2020-01-29T13:46:41.024Z, Instrument: MSI, Mode: , Satellite: Sentinel-2, Size: 718.34 MB", + "date": [ + { + "name": "datatakesensingstart", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "beginposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "endposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "ingestiondate", + "content": "2020-01-29T17:04:22.143Z" + } + ], + "int": [ + { + "name": "orbitnumber", + "content": "24045" + }, + { + "name": "relativeorbitnumber", + "content": "24" + } + ], + "double": { + "name": "cloudcoverpercentage", + "content": "77.4568" + }, + "str": [ + { + "name": "sensoroperationalmode", + "content": "INS-NOBS" + }, + { + "name": "gmlfootprint", + "content": "\n \n \n -10.850043609987958,-52.82956 -10.854374836688937,-51.825256 -11.84727927337389,-51.828094 -11.842541243727453,-52.835876 -10.850043609987958,-52.82956\n \n \n" + }, + { + "name": "footprint", + "content": "MULTIPOLYGON (((-51.828094 -11.84727927337389, -51.825256 -10.854374836688937, -52.82956 -10.850043609987958, -52.835876 -11.842541243727453, -51.828094 -11.84727927337389)))" + }, + { + "name": "tileid", + "content": "22LCN" + }, + { + "name": "hv_order_tileid", + "content": "LN22C" + }, + { + "name": "format", + "content": "SAFE" + }, + { + "name": "processingbaseline", + "content": "02.08" + }, + { + "name": "platformname", + "content": "Sentinel-2" + }, + { + "name": "filename", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LCN_20200129T153004.SAFE" + }, + { + "name": "instrumentname", + "content": "Multi-Spectral Instrument" + }, + { + "name": "instrumentshortname", + "content": "MSI" + }, + { + "name": "size", + "content": "718.34 MB" + }, + { + "name": "s2datatakeid", + "content": "GS2A_20200129T134641_024045_N02.08" + }, + { + "name": "producttype", + "content": "S2MSI1C" + }, + { + "name": "platformidentifier", + "content": "2015-028A" + }, + { + "name": "orbitdirection", + "content": "DESCENDING" + }, + { + "name": "platformserialidentifier", + "content": "Sentinel-2A" + }, + { + "name": "processinglevel", + "content": "Level-1C" + }, + { + "name": "identifier", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LCN_20200129T153004" + }, + { + "name": "level1cpdiidentifier", + "content": "S2A_OPER_MSI_L1C_TL_SGS__20200129T153004_A024045_T22LCN_N02.08" + }, + { + "name": "uuid", + "content": "b5b9e963-412b-4021-acfd-ec3a765c4e2b" + } + ] + }, + { + "title": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LBQ_20200129T153004", + "link": [ + { + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('49e5de73-8d8e-4322-b1ac-bfdc990365d1')/$value" + }, + { + "rel": "alternative", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('49e5de73-8d8e-4322-b1ac-bfdc990365d1')/" + }, + { + "rel": "icon", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('49e5de73-8d8e-4322-b1ac-bfdc990365d1')/Products('Quicklook')/$value" + } + ], + "id": "49e5de73-8d8e-4322-b1ac-bfdc990365d1", + "summary": "Date: 2020-01-29T13:46:41.024Z, Instrument: MSI, Mode: , Satellite: Sentinel-2, Size: 579.72 MB", + "date": [ + { + "name": "datatakesensingstart", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "beginposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "endposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "ingestiondate", + "content": "2020-01-29T17:04:21.007Z" + } + ], + "int": [ + { + "name": "orbitnumber", + "content": "24045" + }, + { + "name": "relativeorbitnumber", + "content": "24" + } + ], + "double": { + "name": "cloudcoverpercentage", + "content": "99.621" + }, + "str": [ + { + "name": "sensoroperationalmode", + "content": "INS-NOBS" + }, + { + "name": "gmlfootprint", + "content": "\n \n \n -9.036045362835539,-53.72888 -9.04211607613231,-52.73056 -10.034779818934492,-52.735596 -10.028030036603742,-53.736786 -9.036045362835539,-53.72888\n \n \n" + }, + { + "name": "footprint", + "content": "MULTIPOLYGON (((-52.735596 -10.034779818934492, -52.73056 -9.04211607613231, -53.72888 -9.036045362835539, -53.736786 -10.028030036603742, -52.735596 -10.034779818934492)))" + }, + { + "name": "tileid", + "content": "22LBQ" + }, + { + "name": "hv_order_tileid", + "content": "LQ22B" + }, + { + "name": "format", + "content": "SAFE" + }, + { + "name": "processingbaseline", + "content": "02.08" + }, + { + "name": "platformname", + "content": "Sentinel-2" + }, + { + "name": "filename", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LBQ_20200129T153004.SAFE" + }, + { + "name": "instrumentname", + "content": "Multi-Spectral Instrument" + }, + { + "name": "instrumentshortname", + "content": "MSI" + }, + { + "name": "size", + "content": "579.72 MB" + }, + { + "name": "s2datatakeid", + "content": "GS2A_20200129T134641_024045_N02.08" + }, + { + "name": "producttype", + "content": "S2MSI1C" + }, + { + "name": "platformidentifier", + "content": "2015-028A" + }, + { + "name": "orbitdirection", + "content": "DESCENDING" + }, + { + "name": "platformserialidentifier", + "content": "Sentinel-2A" + }, + { + "name": "processinglevel", + "content": "Level-1C" + }, + { + "name": "identifier", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LBQ_20200129T153004" + }, + { + "name": "level1cpdiidentifier", + "content": "S2A_OPER_MSI_L1C_TL_SGS__20200129T153004_A024045_T22LBQ_N02.08" + }, + { + "name": "uuid", + "content": "49e5de73-8d8e-4322-b1ac-bfdc990365d1" + } + ] + }, + { + "title": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LDQ_20200129T153004", + "link": [ + { + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('e02b26ba-b924-4d43-ba2b-50118a7f0f39')/$value" + }, + { + "rel": "alternative", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('e02b26ba-b924-4d43-ba2b-50118a7f0f39')/" + }, + { + "rel": "icon", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('e02b26ba-b924-4d43-ba2b-50118a7f0f39')/Products('Quicklook')/$value" + } + ], + "id": "e02b26ba-b924-4d43-ba2b-50118a7f0f39", + "summary": "Date: 2020-01-29T13:46:41.024Z, Instrument: MSI, Mode: , Satellite: Sentinel-2, Size: 464.33 MB", + "date": [ + { + "name": "datatakesensingstart", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "beginposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "endposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "ingestiondate", + "content": "2020-01-29T17:04:05.933Z" + } + ], + "int": [ + { + "name": "orbitnumber", + "content": "24045" + }, + { + "name": "relativeorbitnumber", + "content": "24" + } + ], + "double": { + "name": "cloudcoverpercentage", + "content": "68.4646" + }, + "str": [ + { + "name": "sensoroperationalmode", + "content": "INS-NOBS" + }, + { + "name": "gmlfootprint", + "content": "\n \n \n -9.045860660703191,-51.20508 -9.188198371545266,-51.23633 -9.336641376279744,-51.269073 -9.485180930664972,-51.301697 -9.633670617204393,-51.334503 -9.78228037102006,-51.367065 -9.930925719142737,-51.399628 -10.03867301309183,-51.42325 -10.038064846577397,-51.912872 -9.045070601654063,-51.910248 -9.045860660703191,-51.20508\n \n \n" + }, + { + "name": "footprint", + "content": "MULTIPOLYGON (((-51.42325 -10.03867301309183, -51.399628 -9.930925719142737, -51.367065 -9.78228037102006, -51.334503 -9.633670617204393, -51.301697 -9.485180930664972, -51.269073 -9.336641376279744, -51.23633 -9.188198371545266, -51.20508 -9.045860660703191, -51.910248 -9.045070601654063, -51.912872 -10.038064846577397, -51.42325 -10.03867301309183)))" + }, + { + "name": "tileid", + "content": "22LDQ" + }, + { + "name": "hv_order_tileid", + "content": "LQ22D" + }, + { + "name": "format", + "content": "SAFE" + }, + { + "name": "processingbaseline", + "content": "02.08" + }, + { + "name": "platformname", + "content": "Sentinel-2" + }, + { + "name": "filename", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LDQ_20200129T153004.SAFE" + }, + { + "name": "instrumentname", + "content": "Multi-Spectral Instrument" + }, + { + "name": "instrumentshortname", + "content": "MSI" + }, + { + "name": "size", + "content": "464.33 MB" + }, + { + "name": "s2datatakeid", + "content": "GS2A_20200129T134641_024045_N02.08" + }, + { + "name": "producttype", + "content": "S2MSI1C" + }, + { + "name": "platformidentifier", + "content": "2015-028A" + }, + { + "name": "orbitdirection", + "content": "DESCENDING" + }, + { + "name": "platformserialidentifier", + "content": "Sentinel-2A" + }, + { + "name": "processinglevel", + "content": "Level-1C" + }, + { + "name": "identifier", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LDQ_20200129T153004" + }, + { + "name": "level1cpdiidentifier", + "content": "S2A_OPER_MSI_L1C_TL_SGS__20200129T153004_A024045_T22LDQ_N02.08" + }, + { + "name": "uuid", + "content": "e02b26ba-b924-4d43-ba2b-50118a7f0f39" + } + ] + }, + { + "title": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LCP_20200129T153004", + "link": [ + { + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('fd41ec65-dd9c-4024-ae63-12364b3913e8')/$value" + }, + { + "rel": "alternative", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('fd41ec65-dd9c-4024-ae63-12364b3913e8')/" + }, + { + "rel": "icon", + "href": "https://scihub.copernicus.eu/apihub/odata/v1/Products('fd41ec65-dd9c-4024-ae63-12364b3913e8')/Products('Quicklook')/$value" + } + ], + "id": "fd41ec65-dd9c-4024-ae63-12364b3913e8", + "summary": "Date: 2020-01-29T13:46:41.024Z, Instrument: MSI, Mode: , Satellite: Sentinel-2, Size: 678.26 MB", + "date": [ + { + "name": "datatakesensingstart", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "beginposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "endposition", + "content": "2020-01-29T13:46:41.024Z" + }, + { + "name": "ingestiondate", + "content": "2020-01-29T17:03:45.07Z" + } + ], + "int": [ + { + "name": "orbitnumber", + "content": "24045" + }, + { + "name": "relativeorbitnumber", + "content": "24" + } + ], + "double": { + "name": "cloudcoverpercentage", + "content": "90.9345" + }, + "str": [ + { + "name": "sensoroperationalmode", + "content": "INS-NOBS" + }, + { + "name": "gmlfootprint", + "content": "\n \n \n -9.945890159770508,-52.82431 -9.949853059397606,-51.822876 -10.942816569269826,-51.8255 -10.93844921618605,-52.83011 -9.945890159770508,-52.82431\n \n \n" + }, + { + "name": "footprint", + "content": "MULTIPOLYGON (((-51.8255 -10.942816569269826, -51.822876 -9.949853059397606, -52.82431 -9.945890159770508, -52.83011 -10.93844921618605, -51.8255 -10.942816569269826)))" + }, + { + "name": "tileid", + "content": "22LCP" + }, + { + "name": "hv_order_tileid", + "content": "LP22C" + }, + { + "name": "format", + "content": "SAFE" + }, + { + "name": "processingbaseline", + "content": "02.08" + }, + { + "name": "platformname", + "content": "Sentinel-2" + }, + { + "name": "filename", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LCP_20200129T153004.SAFE" + }, + { + "name": "instrumentname", + "content": "Multi-Spectral Instrument" + }, + { + "name": "instrumentshortname", + "content": "MSI" + }, + { + "name": "size", + "content": "678.26 MB" + }, + { + "name": "s2datatakeid", + "content": "GS2A_20200129T134641_024045_N02.08" + }, + { + "name": "producttype", + "content": "S2MSI1C" + }, + { + "name": "platformidentifier", + "content": "2015-028A" + }, + { + "name": "orbitdirection", + "content": "DESCENDING" + }, + { + "name": "platformserialidentifier", + "content": "Sentinel-2A" + }, + { + "name": "processinglevel", + "content": "Level-1C" + }, + { + "name": "identifier", + "content": "S2A_MSIL1C_20200129T134641_N0208_R024_T22LCP_20200129T153004" + }, + { + "name": "level1cpdiidentifier", + "content": "S2A_OPER_MSI_L1C_TL_SGS__20200129T153004_A024045_T22LCP_N02.08" + }, + { + "name": "uuid", + "content": "fd41ec65-dd9c-4024-ae63-12364b3913e8" + } + ] + } +] \ No newline at end of file diff --git a/tests/jsons/usgs-landsat-8.json b/tests/jsons/usgs-landsat-8.json new file mode 100644 index 0000000..49c3b55 --- /dev/null +++ b/tests/jsons/usgs-landsat-8.json @@ -0,0 +1,232 @@ +[ + { + "acquisitionDate": "2020-01-02", + "startTime": "2020-01-02", + "endTime": "2020-01-02", + "spatialFootprint": { + "type": "Polygon", + "coordinates": [ + [ + [ + -53.85356, + -10.80827 + ], + [ + -52.1449, + -11.17345 + ], + [ + -51.7735, + -9.43737 + ], + [ + -53.47286, + -9.07603 + ], + [ + -53.85356, + -10.80827 + ] + ] + ] + }, + "sceneBounds": "-53.85356,-11.17345,-51.7735,-9.07603", + "browseUrl": "https://ims.cr.usgs.gov/browse/landsat_8_c1/2020/225/067/LC08_L1GT_225067_20200102_20200113_01_T2.jpg", + "dataAccessUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82250672020002LGN00/", + "downloadUrl": "https://earthexplorer.usgs.gov/download/external/options/landsat_8_c1/LC82250672020002LGN00/M2M/", + "entityId": "LC82250672020002LGN00", + "displayId": "LC08_L1GT_225067_20200102_20200113_01_T2", + "cloudCover": "85.09", + "metadataUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82250672020002LGN00/?responseType=viewXml", + "fgdcMetadataUrl": "https://earthexplorer.usgs.gov/metadata/fgdc/landsat_8_c1/LC82250672020002LGN00/", + "modifiedDate": "2020-01-02 13:34:07", + "orderUrl": null, + "bulkOrdered": false, + "ordered": false, + "summary": "ID: LC08_L1GT_225067_20200102_20200113_01_T2, Acquisition Date: 2020-01-02, Path: 225, Row: 67" + }, + { + "acquisitionDate": "2020-01-02", + "startTime": "2020-01-02", + "endTime": "2020-01-02", + "spatialFootprint": { + "type": "Polygon", + "coordinates": [ + [ + [ + -54.17401, + -12.2528 + ], + [ + -52.45584, + -12.6188 + ], + [ + -52.08267, + -10.88331 + ], + [ + -53.79014, + -10.52125 + ], + [ + -54.17401, + -12.2528 + ] + ] + ] + }, + "sceneBounds": "-54.17401,-12.6188,-52.08267,-10.52125", + "browseUrl": "https://ims.cr.usgs.gov/browse/landsat_8_c1/2020/225/068/LC08_L1TP_225068_20200102_20200113_01_T2.jpg", + "dataAccessUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82250682020002LGN00/", + "downloadUrl": "https://earthexplorer.usgs.gov/download/external/options/landsat_8_c1/LC82250682020002LGN00/M2M/", + "entityId": "LC82250682020002LGN00", + "displayId": "LC08_L1TP_225068_20200102_20200113_01_T2", + "cloudCover": "71.70", + "metadataUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82250682020002LGN00/?responseType=viewXml", + "fgdcMetadataUrl": "https://earthexplorer.usgs.gov/metadata/fgdc/landsat_8_c1/LC82250682020002LGN00/", + "modifiedDate": "2020-01-02 13:34:07", + "orderUrl": null, + "bulkOrdered": false, + "ordered": false, + "summary": "ID: LC08_L1TP_225068_20200102_20200113_01_T2, Acquisition Date: 2020-01-02, Path: 225, Row: 68" + }, + { + "acquisitionDate": "2020-01-02", + "startTime": "2020-01-02", + "endTime": "2020-01-02", + "spatialFootprint": { + "type": "Polygon", + "coordinates": [ + [ + [ + -54.49737, + -13.69661 + ], + [ + -52.76837, + -14.06356 + ], + [ + -52.39321, + -12.3286 + ], + [ + -54.11006, + -11.9657 + ], + [ + -54.49737, + -13.69661 + ] + ] + ] + }, + "sceneBounds": "-54.49737,-14.06356,-52.39321,-11.9657", + "browseUrl": "https://ims.cr.usgs.gov/browse/landsat_8_c1/2020/225/069/LC08_L1TP_225069_20200102_20200113_01_T2.jpg", + "dataAccessUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82250692020002LGN00/", + "downloadUrl": "https://earthexplorer.usgs.gov/download/external/options/landsat_8_c1/LC82250692020002LGN00/M2M/", + "entityId": "LC82250692020002LGN00", + "displayId": "LC08_L1TP_225069_20200102_20200113_01_T2", + "cloudCover": "77.22", + "metadataUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82250692020002LGN00/?responseType=viewXml", + "fgdcMetadataUrl": "https://earthexplorer.usgs.gov/metadata/fgdc/landsat_8_c1/LC82250692020002LGN00/", + "modifiedDate": "2020-01-02 13:34:07", + "orderUrl": null, + "bulkOrdered": false, + "ordered": false, + "summary": "ID: LC08_L1TP_225069_20200102_20200113_01_T2, Acquisition Date: 2020-01-02, Path: 225, Row: 69" + }, + { + "acquisitionDate": "2020-01-04", + "startTime": "2020-01-04", + "endTime": "2020-01-04", + "spatialFootprint": { + "type": "Polygon", + "coordinates": [ + [ + [ + -50.76421, + -10.80856 + ], + [ + -49.05534, + -11.17381 + ], + [ + -48.68391, + -9.43778 + ], + [ + -50.38349, + -9.07637 + ], + [ + -50.76421, + -10.80856 + ] + ] + ] + }, + "sceneBounds": "-50.76421,-11.17381,-48.68391,-9.07637", + "browseUrl": "https://ims.cr.usgs.gov/browse/landsat_8_c1/2020/223/067/LC08_L1GT_223067_20200104_20200113_01_T2.jpg", + "dataAccessUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82230672020004LGN00/", + "downloadUrl": "https://earthexplorer.usgs.gov/download/external/options/landsat_8_c1/LC82230672020004LGN00/M2M/", + "entityId": "LC82230672020004LGN00", + "displayId": "LC08_L1GT_223067_20200104_20200113_01_T2", + "cloudCover": "99.94", + "metadataUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82230672020004LGN00/?responseType=viewXml", + "fgdcMetadataUrl": "https://earthexplorer.usgs.gov/metadata/fgdc/landsat_8_c1/LC82230672020004LGN00/", + "modifiedDate": "2020-01-04 13:22:33", + "orderUrl": null, + "bulkOrdered": false, + "ordered": false, + "summary": "ID: LC08_L1GT_223067_20200104_20200113_01_T2, Acquisition Date: 2020-01-04, Path: 223, Row: 67" + }, + { + "acquisitionDate": "2020-01-04", + "startTime": "2020-01-04", + "endTime": "2020-01-04", + "spatialFootprint": { + "type": "Polygon", + "coordinates": [ + [ + [ + -51.0846, + -12.25287 + ], + [ + -49.36616, + -12.61894 + ], + [ + -48.99304, + -10.88344 + ], + [ + -50.70078, + -10.5213 + ], + [ + -51.0846, + -12.25287 + ] + ] + ] + }, + "sceneBounds": "-51.0846,-12.61894,-48.99304,-10.5213", + "browseUrl": "https://ims.cr.usgs.gov/browse/landsat_8_c1/2020/223/068/LC08_L1GT_223068_20200104_20200113_01_T2.jpg", + "dataAccessUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82230682020004LGN00/", + "downloadUrl": "https://earthexplorer.usgs.gov/download/external/options/landsat_8_c1/LC82230682020004LGN00/M2M/", + "entityId": "LC82230682020004LGN00", + "displayId": "LC08_L1GT_223068_20200104_20200113_01_T2", + "cloudCover": "99.99", + "metadataUrl": "https://earthexplorer.usgs.gov/metadata/full/landsat_8_c1/LC82230682020004LGN00/?responseType=viewXml", + "fgdcMetadataUrl": "https://earthexplorer.usgs.gov/metadata/fgdc/landsat_8_c1/LC82230682020004LGN00/", + "modifiedDate": "2020-01-04 13:22:33", + "orderUrl": null, + "bulkOrdered": false, + "ordered": false, + "summary": "ID: LC08_L1GT_223068_20200104_20200113_01_T2, Acquisition Date: 2020-01-04, Path: 223, Row: 68" + } +] \ No newline at end of file diff --git a/tests/test_ext.py b/tests/test_ext.py new file mode 100644 index 0000000..132fd24 --- /dev/null +++ b/tests/test_ext.py @@ -0,0 +1,81 @@ +import json +import re +from typing import Type +from unittest import mock + +import pytest +from pkg_resources import EntryPoint, resource_string +from werkzeug.utils import import_string + +from bdc_collectors.base import BaseProvider, SceneResult +from bdc_collectors.ext import CollectorExtension + + +def _provider(app, name='USGS') -> Type[BaseProvider]: + ext = _extension(app) + + return ext.get_provider(name) + + +def _extension(app) -> CollectorExtension: + return app.extensions['bdc:collector'] + + +class MockEntryPoint(EntryPoint): + def load(self): + if self.name == 'importfail': + raise ImportError() + else: + return import_string(self.name) + + +def _mock_entry_points(name): + data = { + 'bdc_collectors.providers': [ + MockEntryPoint('demo_provider', 'demo_provider'), + ], + } + names = data.keys() if name is None else [name] + for key in names: + for entry_point in data.get(key, []): + yield entry_point + + +class FakeProvider(BaseProvider): + def search(self, query, *args, **kwargs): + return [SceneResult('theid', 100)] + + def download(self, scene_id: str, *args, **kwargs) -> str: + """Pass""" + + +class TestCollectorExtension: + def test_get_provider(self, app, requests_mock): + provider_class = _provider(app, 'USGS') + + assert provider_class is not None + + def test_add_provider(self, app): + ext = _extension(app) + + ext.state.add_provider('FAKE', FakeProvider) + + assert 'FAKE' in ext.state.providers + + with pytest.raises(AssertionError): + ext.state.add_provider('FAKE', FakeProvider) + + @mock.patch('pkg_resources.iter_entry_points', _mock_entry_points) + def test_load_provider_through_entrypoint(self, flask): + ext = CollectorExtension(flask) + + provider_class = ext.get_provider('DEMO') + + assert provider_class is not None + + def test_list_providers(self, app): + ext = _extension(app) + + providers = ext.list_providers() + + assert len(providers) > 0 \ No newline at end of file diff --git a/tests/test_scihub.py b/tests/test_scihub.py new file mode 100644 index 0000000..7893c64 --- /dev/null +++ b/tests/test_scihub.py @@ -0,0 +1,80 @@ +import json +import re +from typing import Type + +import pytest +from pkg_resources import resource_string + +from bdc_collectors.base import BaseProvider, SceneResult +from bdc_collectors.ext import CollectorExtension + + +def _provider(app, name='SciHub') -> Type[BaseProvider]: + ext: CollectorExtension = app.extensions['bdc:collector'] + + return ext.get_provider(name) + + +base_url = 'https://scihub.copernicus.eu/apihub/' +search_url = base_url + 'search' + + +@pytest.fixture +def requests_mock(requests_mock): + requests_mock.get(re.compile('https://geojson.org/')) + yield requests_mock + + +@pytest.fixture(scope='session') +def catalog_scihub(): + search_s2 = resource_string(__name__, 'jsons/scihub-sentinel-2.json') + + return json.loads(search_s2) + + +class TestSciHub: + def test_missing_credentials(self, app): + provider_class = _provider(app) + + assert provider_class is not None + + with pytest.raises(RuntimeError): + provider_class() + + def test_lazy_scihub_connection(self, app, requests_mock): + provider_class = _provider(app) + + requests_mock.post(base_url, json={'error': [], 'data': {}}, status_code=200, headers={'content-type':'application/json'}) + + provider = provider_class(username='theuser', password='thepassword', lazy=True) + + assert hasattr(provider, 'api') + + def test_search(self, app, requests_mock, catalog_scihub): + provider_class = _provider(app) + + requests_mock.post(base_url, json={'error': [], 'data': {}}, status_code=200, + headers={'content-type': 'application/json'}) + + provider = provider_class(username='theuser', password='thepassword', lazy=True) + + json_result = dict( + feed={ + "opensearch:totalResults": len(catalog_scihub), + "entry": catalog_scihub + } + ) + + requests_mock.post(search_url, json=json_result, status_code=200, headers={'content-type': 'application/json'}) + + res = provider.search('S2MSI1C', start_date='2020-01-01', end_date='2020-01-10', + platform='Sentinel-2', cloud_cover=100, bbox=[-54, -12, -52, -10]) + + assert len(res) > 0 + + for scene in res: + assert isinstance(scene, SceneResult) + + # TODO: Implement test download + # def test_download(self): + # pass \ No newline at end of file diff --git a/tests/test_usgs.py b/tests/test_usgs.py new file mode 100644 index 0000000..ec94d2e --- /dev/null +++ b/tests/test_usgs.py @@ -0,0 +1,141 @@ +import json +import re +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Type +from unittest import mock + +import pytest +from pkg_resources import resource_string + +from bdc_collectors.base import BaseProvider, SceneResult +from bdc_collectors.ext import CollectorExtension + + +def _provider(app, name='USGS') -> Type[BaseProvider]: + ext: CollectorExtension = app.extensions['bdc:collector'] + + return ext.get_provider(name) + + +base_url = 'https://earthexplorer.usgs.gov/inventory/json/v/1.4.1/{context}' +mock_login_url = base_url.format(context='login') +mock_logout_url = base_url.format(context='logout') +mock_search_url = base_url.format(context='search') +mock_download_url = 'https://earthexplorer.usgs.gov/download/5e83d0b84df8d8c2/LC82250672020002LGN00/EE/' + +@pytest.fixture +def requests_mock(requests_mock): + requests_mock.get(re.compile('https://geojson.org/')) + yield requests_mock + + +@pytest.fixture(scope='session') +def catalog_usgs(): + search_l8 = resource_string(__name__, 'jsons/usgs-landsat-8.json') + + return json.loads(search_l8) + + +class TestUSGS: + def test_missing_credentials(self, app): + provider_class = _provider(app, 'USGS') + + assert provider_class is not None + + with pytest.raises(RuntimeError): + provider_class() + + def test_lazy_usgs_connection(self, app, requests_mock): + provider_class = _provider(app, 'USGS') + + requests_mock.post(mock_login_url, json={'error': [], 'data': {}}, status_code=200, headers={'content-type':'application/json'}) + + provider = provider_class(username='theuser', password='thepassword', lazy=True) + + assert hasattr(provider, 'api') and provider.api is None + + def test_provider_search(self, app, requests_mock, catalog_usgs): + provider_class = _provider(app, 'USGS') + + assert provider_class is not None + + requests_mock.post(mock_login_url, json={'error': [], 'data': {}}, status_code=200, headers={'content-type':'application/json'}) + + provider = provider_class(username='theuser', password='thepassword') + + assert requests_mock.called + + bbox = [-54, -12, -50, -10] + + requests_mock.get(mock_search_url, json={'error': [], 'data': dict(results=catalog_usgs)}, status_code=200, headers={'content-type':'application/json'}) + + res = provider.search('LANDSAT_8_C1', start_date='2020-01-01', end_date='2020-01-31', bbox=bbox) + + for found in res: + assert isinstance(found, SceneResult) + + # Mock logout url since it is attached to destructor + requests_mock.get(mock_logout_url, json={'error': [], 'data': {}}, status_code=200, headers={'content-type':'application/json'}) + + def test_search_and_custom_validate(self, app, requests_mock, catalog_usgs): + provider_class = _provider(app, 'USGS') + + requests_mock.post(mock_login_url, json={'error': [], 'data': {}}, status_code=200, headers={'content-type':'application/json'}) + + provider = provider_class(username='theuser', password='thepassword') + + bbox = [-54, -12, -50, -10] + + requests_mock.get(mock_search_url, json={'error': [], 'data': dict(results=catalog_usgs)}, status_code=200, headers={'content-type':'application/json'}) + + def _custom_validate(scene: dict) -> bool: + return scene['displayId'].endswith('T1') # Only T1 files + + res = provider.search('LANDSAT_8_C1', start_date='2020-01-01', + end_date='2020-01-31', bbox=bbox, validate=_custom_validate) + + assert len(res) == 0 + + invalid_validate = 'invalid_validate' + + with pytest.raises(ValueError): + provider.search('LANDSAT_8_C1', start_date='2020-01-01', + end_date='2020-01-31', bbox=bbox, validate=invalid_validate) + + # Mock logout url since it is attached to destructor + requests_mock.get(mock_logout_url, json={'error': [], 'data': {}}, status_code=200, headers={'content-type':'application/json'}) + + @mock.patch('landsatxplore.earthexplorer.EarthExplorer.login') + def test_download(self, mock_ee, app, requests_mock, catalog_usgs): + provider_class = _provider(app, 'USGS') + + requests_mock.post(mock_login_url, json={'error': [], 'data': {}}, status_code=200, + headers={'content-type': 'application/json'}) + + provider = provider_class(username='theuser', password='thepassword') + + bbox = [-54, -12, -50, -10] + + requests_mock.get(mock_search_url, json={'error': [], 'data': dict(results=catalog_usgs)}, status_code=200, + headers={'content-type': 'application/json'}) + + res = provider.search('LANDSAT_8_C1', start_date='2020-01-01', end_date='2020-01-31', bbox=bbox) + + requests_mock.get(mock_download_url, content=b'', status_code=200, + headers={ + 'content-type': 'application/gzip', + 'Content-Length': '0', + 'Content-Disposition': f'{res[0].scene_id}.tar.gz' + }) + + with TemporaryDirectory() as tmp: + destination = provider.download(res[0]['entityId'], output=tmp) + + path = Path(destination) + assert path.name == f'{res[0].scene_id}.tar.gz' + assert path.exists() and path.stat().st_size == 0 + + # Mock logout url since it is attached to destructor + requests_mock.get(mock_logout_url, json={'error': [], 'data': {}}, status_code=200, + headers={'content-type': 'application/json'}) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..a6deb58 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,65 @@ +import os +import re +from tempfile import TemporaryDirectory + +import pytest +import requests + +from bdc_collectors.exceptions import DownloadError +from bdc_collectors.utils import download_stream, working_directory + + +@pytest.fixture +def requests_mock(requests_mock): + requests_mock.get(re.compile('https://geojson.org/')) + yield requests_mock + + +mock_url = 'http://localhost' + + +def test_download_stream(requests_mock): + requests_mock.get(mock_url, content=b'1', + status_code=200, + headers={ + 'content-type': 'application/gzip', + 'Content-Length': '1', + }) + + resp = requests.get(mock_url, stream=True) + + with TemporaryDirectory() as tmp: + out = os.path.join(tmp, 'file') + + download_stream(out, resp) + + assert os.path.exists(out) and os.stat(out).st_size == 1 + + +def test_remove_file_corrupt_download_stream(requests_mock): + requests_mock.get(mock_url, content=b'', + status_code=200, + headers={ + 'content-type': 'application/gzip', + 'Content-Length': '1', + }) + + resp = requests.get(mock_url, stream=True) + + with TemporaryDirectory() as tmp: + out = os.path.join(tmp, 'file') + + with pytest.raises(DownloadError): + download_stream(out, resp) + + assert not os.path.exists(out) + + +def test_change_work_dir(): + old = os.getcwd() + + with TemporaryDirectory() as tmp: + with working_directory(tmp): + assert os.getcwd() == tmp + + assert os.getcwd() == old