diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d75651197..bfbde2287 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -3,9 +3,10 @@ Include a brief description of the changes being proposed, and why they are nece ### Closes issues (optional) - Closes Issue #000 -- Closes Issue #000 -### Checklist (replace `[ ]` with `[x]` to check off) +### Checklist +(Replace `[ ]` with `[x]` to check off) + - [ ] Notebook created using the [DEA-notebooks template](https://github.com/GeoscienceAustralia/dea-notebooks/tree/develop) - [ ] Remove any unused Python packages from `Load packages` - [ ] Remove any unused/empty code cells @@ -15,5 +16,6 @@ Include a brief description of the changes being proposed, and why they are nece - [ ] Clear all outputs, run notebook from start to finish, and save the notebook in the state where all cells have been sequentially evaluated - [ ] Test notebook on both the `NCI` and `DEA Sandbox` (flag if not working as part of PR and ask for help to solve if needed) - [ ] If applicable, update the `Notebook currently compatible with the NCI|DEA Sandbox environment only` line below the notebook title to reflect the environments the notebook is compatible with +- [ ] Check for any spelling mistakes using the DEA Sandbox's built-in spellchecker (double click on markdown cells then right-click on pink highlighted words). For example: - +![sandbox_spellchecker](https://github.com/GeoscienceAustralia/dea-notebooks/assets/17680388/c5e5848b-fd54-4eb5-aae9-29838761f2af) diff --git a/.github/workflows/configs/spellcheck_config.yaml b/.github/workflows/configs/spellcheck_config.yaml deleted file mode 100644 index f2a46844d..000000000 --- a/.github/workflows/configs/spellcheck_config.yaml +++ /dev/null @@ -1,19 +0,0 @@ -matrix: -- name: Markdown - sources: - - '**/*.md' - - '**/*.rst' - default_encoding: utf-8 - aspell: - lang: en - dictionary: - wordlists: - - .github/workflows/configs/spellcheck_wordlist.txt - encoding: utf-8 - pipeline: - - pyspelling.filters.markdown: - - pyspelling.filters.html: - comments: false - ignores: - - code - - pre diff --git a/.github/workflows/configs/spellcheck_wordlist.txt b/.github/workflows/configs/spellcheck_wordlist.txt deleted file mode 100644 index 7d6fb50a3..000000000 --- a/.github/workflows/configs/spellcheck_wordlist.txt +++ /dev/null @@ -1,216 +0,0 @@ -Abhik -Abreu -actioncheckout -AEO -Ai -Alam -Almeida -ANU -apache -ard -autosummary -bandindices -Biogeosciences -bom -Brovey -changefilmstrips -Cherukuru -Choo -compag -conda -creativecommons -crophealth -CRSs -CSVs -Customizable -da -DaSilva -Dask -dask -datahandling -dataset -datasets -De -de -dea -deacoastlines -DEA's -decadal -Deckker -Dieback -DOI -doi -Downes -Drosdowsky -Drylands -Dunefield -dx -ecss -El -else's -EO -ESA -Estuarine -Eucalypt -Fanson -faq -Fäth -fbf -ffgc -Fickas -Flinders -Förtsch -Freebairn -Gb -Geoinformation -geojson -geomedian -Geomorphic -Geopython -Geoscience -GeoscienceAustralia -Geospatial -geospatial -GeoTIFF -GeoTIFFs -GIS -gis -GitHub -github -Github's -Grayson -graysoncooke -gridded -Guerschman -Hendon -Hesp -Hillman -Hsin -html -http -https -Hutley -hyperparameter -Ierodiaconou -IGARSS -imageexport -img -interpretable -intertidal -io -ipynb -jpg -Jupyter -Karim -Kemmerer -Krause -Kuhnert -Kuo -landcover -Landforms -Lawes -Lehmann -Lun -Lymburner -Marvanek -maxdepth -McPhail -mdpi -Metternicht -miningrehab -Mujahid -Müller -Multidecadal -Nanson -NDVI -NDWI -NetCDFs -Newey -Niño -NSW -numpy -ODC -Opdyke -opendatacube -opensource -Otte -Paget -pansharpening -parallelization -pdf -Penton -phenology -Photogrammetry -Pillans -Polygonise -postgrad -pre -Preprint -Pucino -PyPI -pypi -PySpelling -pyspelling -Ransley -rasterising -Rasterize -rasters -README -realtime -Reflectance -Renzullo -Reprojecting -RG -RGB -rgb -Ronan -rse -RSGISLIB -rst -Sagar -Salles -scalability -scalable -Schuldt -Sengupta -shapefile -shapefiles -Shendryk -Sixsmith -spatio -STAC -stackexchange -Streeton -submodule -Subpixel -subpixel -Sunglint -svg -sys -tandfonline -Teng -Thiel -Ticehurst -timeseries -toctree -titlesonly -toolchain -Tsai -Tseng -UAV -ubuntu -Ullmann -Usback -USGS's -Vaze -vectorising -vectorize -Waterbodies -waterbodies -waterbody -widgetconstructors -WR -www -xarray -yaml -yml diff --git a/.github/workflows/spellcheck.yaml b/.github/workflows/spellcheck.yaml deleted file mode 100644 index 881d47928..000000000 --- a/.github/workflows/spellcheck.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: Spell check -on: - pull_request: - branches: - - 'develop' - push: - branches: - - 'develop' - - workflow_dispatch: - -jobs: - pyspelling: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: rojopolis/spellcheck-github-actions@0.29.0 - name: Spellcheck - with: - config_path: .github/workflows/configs/spellcheck_config.yaml \ No newline at end of file diff --git a/.github/workflows/test_notebooks.yml b/.github/workflows/test_notebooks.yml index af20b9402..045d60f09 100644 --- a/.github/workflows/test_notebooks.yml +++ b/.github/workflows/test_notebooks.yml @@ -36,7 +36,9 @@ jobs: aws-region: ap-southeast-2 - name: Copy tide modelling files with the AWS CLI - run: aws s3 sync s3://dea-non-public-data/tide_models/tide_models tide_models + run: | + aws s3 sync s3://dea-non-public-data/tide_models/tide_models/fes2014 tide_models/fes2014 + aws s3 sync s3://dea-non-public-data/tide_models/tide_models/hamtide tide_models/hamtide - name: Login to Amazon ECR Private id: login-ecr diff --git a/Beginners_guide/README.rst b/Beginners_guide/README.rst index 62eaa1025..9eab47ca8 100644 --- a/Beginners_guide/README.rst +++ b/Beginners_guide/README.rst @@ -5,7 +5,6 @@ The Beginners Guide contains introductory notebooks aimed at introducing Jupyter .. toctree:: :maxdepth: 1 - :caption: Beginner's Guide 01_Jupyter_notebooks.ipynb 02_DEA.ipynb @@ -25,7 +24,6 @@ Once you have you have completed the beginner tutorials, join advanced users in .. toctree:: :maxdepth: 2 - :caption: Guided Tutorial Guided_tutorial.ipynb diff --git a/DEA_products/DEA_Wetlands_Insight_Tool.ipynb b/DEA_products/DEA_Wetlands_Insight_Tool.ipynb index 529795dde..b8b80595a 100755 --- a/DEA_products/DEA_Wetlands_Insight_Tool.ipynb +++ b/DEA_products/DEA_Wetlands_Insight_Tool.ipynb @@ -70,7 +70,7 @@ "\n", "## Related products\n", "* [DEA Fractional Cover (Landsat)](../DEA_products/DEA_Fractional_Cover.ipynb)\n", - "* [DEA Tasseled Cap Indices Percentiles Calendar Year (Landsat)](https://docs.dea.ga.gov.au/data/product/dea-wetness-percentiles-landsat)\n", + "* [DEA Tasseled Cap Indices Percentiles Calendar Year (Landsat)](https://docs.dea.ga.gov.au/data/product/dea-tasseled-cap-percentiles-landsat/)\n", "* [DEA Water Observations (Landsat)](../DEA_products/DEA_Water_Observations.ipynb)" ] }, diff --git a/DEA_products/README.rst b/DEA_products/README.rst index b4641b605..90c875eef 100644 --- a/DEA_products/README.rst +++ b/DEA_products/README.rst @@ -5,7 +5,6 @@ Notebooks introducing DEA's satellite datasets and derived products, including h .. toctree:: :maxdepth: 1 - :caption: DEA products DEA_Landsat_Surface_Reflectance.ipynb DEA_Sentinel2_Surface_Reflectance.ipynb diff --git a/How_to_guides/README.rst b/How_to_guides/README.rst index 675dd5dec..ef8d21d10 100644 --- a/How_to_guides/README.rst +++ b/How_to_guides/README.rst @@ -5,7 +5,6 @@ A recipe book of simple code examples demonstrating how to perform common geospa .. toctree:: :maxdepth: 1 - :caption: How to guides Analyse_multiple_polygons.ipynb Animated_timeseries.ipynb diff --git a/How_to_guides/Reprojecting_data.ipynb b/How_to_guides/Reprojecting_data.ipynb index 492a4182b..e45986ae8 100644 --- a/How_to_guides/Reprojecting_data.ipynb +++ b/How_to_guides/Reprojecting_data.ipynb @@ -169,6 +169,7 @@ "Now we have loaded our raster dataset, we can inspect its `GeoBox` object that we will use to allow us to reproject data.\n", "The `GeoBox` can be accessed using the `.geobox` method.\n", "It includes a set of information that together completely define the spatial grid of our data:\n", + "\n", "* The width (e.g. `95`) and height (e.g. `90`) of our data in pixels\n", "* An `Affine` object which defines the spatial resolution (e.g. `-0.0020751667555555255` and `0.0020769114631576106`) and spatial position (e.g. `149.036528545` and `-35.198132594`) of our data\n", "* The coordinate reference system of our data (e.g. `+init=epsg:4326`)\n" @@ -779,7 +780,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/Interactive_apps/Generating_satellite_animations.ipynb b/Interactive_apps/Generating_satellite_animations.ipynb index 382af8271..e2f39e0a5 100644 --- a/Interactive_apps/Generating_satellite_animations.ipynb +++ b/Interactive_apps/Generating_satellite_animations.ipynb @@ -79,7 +79,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -130,6 +129,7 @@ "\n", "The animation tool supports several advanced features. \n", "To access these, click on the `Advanced` tab in the menu to the left of the map to expand it:\n", + "\n", "* `Frame interval`: The frame rate used to animate the satellite data. \n", "Values are in milliseconds - larger values will produce a longer, slower animation.\n", "* `Resolution`: The spatial resolution to load data (in metres). By default, the tool will automatically set the best possible resolution depending on the satellites selected (i.e. 30 m for Landsat, 10 m for Sentinel-2). \n", @@ -251,7 +251,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/Interactive_apps/README.rst b/Interactive_apps/README.rst index aa004d08c..43dd18d79 100644 --- a/Interactive_apps/README.rst +++ b/Interactive_apps/README.rst @@ -5,7 +5,6 @@ Interactive widgets and apps that require little or no coding to run .. toctree:: :maxdepth: 1 - :caption: Interactive apps Change_filmstrips.ipynb Coastal_transects.ipynb diff --git a/README.rst b/README.rst index 5305142b8..fd4ac3bd1 100644 --- a/README.rst +++ b/README.rst @@ -4,8 +4,10 @@ :width: 900 :alt: Digital Earth Australia logo -Digital Earth Australia notebooks and tools repository -###################################################### +DEA Notebooks +############# + +This is the notebooks and tools repository of Digital Earth Australia (DEA). .. image:: https://img.shields.io/badge/DOI-10.26186/145234-0e7fbf.svg :target: https://doi.org/10.26186/145234 @@ -20,21 +22,26 @@ Digital Earth Australia notebooks and tools repository :target: https://github.com/GeoscienceAustralia/dea-notebooks/actions/workflows/test_notebooks.yml :alt: Notebook testing -**Contribute:** We'd love your contribution! DEA Notebooks is an open-source project and welcomes contributions from everyone. Please use this handy `template notebook `_ to construct your contribution! +**Contribute:** We'd love your contribution! DEA Notebooks is an open-source project and welcomes contributions from everyone. + +**License:** The code in this repository is licensed under the `Apache License, Version 2.0 `_. DEA data is licensed under the `Creative Commons by Attribution 4.0 license `_. -**License:** The code in this repository is licensed under the `Apache License, Version 2.0 `_. Digital Earth Australia data is licensed under the `Creative Commons by Attribution 4.0 license `_. +**Documentation:** See the `DEA Notebooks Wiki `_. -**Contact:** If you need assistance with any of the Jupyter Notebooks or Python code in this repository, please post a question on the `Open Data Cube Slack channel `_ or on the `GIS Stack Exchange `_ using the ``open-data-cube`` tag (you can view `previously asked questions here `_). If you would like to report an issue with this notebook, you can `file one on Github `_. +**Contact:** For assistance with any of these notebooks and tools, please ask a question on our `Open Data Cube Slack channel `_ or on the `GIS Stack Exchange `_ using the ``open-data-cube`` tag. You can also `report an issue on this repository `_. -**Citing DEA Notebooks:** If you use any of the notebooks, code or tools in this repository in your work, please reference them using the following citation: +**Citation:** If you use this repository in your work, please reference it with the following citation. Krause, C., Dunn, B., Bishop-Taylor, R., Adams, C., Burton, C., Alger, M., Chua, S., Phillips, C., Newey, V., Kouzoubov, K., Leith, A., Ayers, D., Hicks, A., DEA Notebooks contributors 2021. Digital Earth Australia notebooks and tools repository. Geoscience Australia, Canberra. https://doi.org/10.26186/145234 - -We encourage you to check out the other usages of our notebooks, code and tools at our `USAGE `_ page, and to add your paper, training course, creative work or other exciting project there as well! + +We would also appreciate it if you add a citation of your work to our `USAGE `_ page. ---------- -The Digital Earth Australia notebooks and tools repository (``dea-notebooks``) hosts Jupyter Notebooks, Python scripts and workflows for analysing `Digital Earth Australia (DEA) `_ satellite data and derived products. This documentation is designed to provide a guide to getting started with DEA, and to showcase the wide range of geospatial analyses that can be achieved using DEA data and open-source software including `Open Data Cube `_ and `xarray `_. +Introduction +============ + +This repository hosts Jupyter Notebooks, Python scripts and workflows for analysing `Digital Earth Australia (DEA) `_ satellite data and derived products. This documentation is designed to provide a guide to getting started with DEA, and to showcase the wide range of geospatial analyses that can be achieved using DEA data and open-source software including `Open Data Cube `_ and `xarray `_. The repository is based around the following directory structure (from simple to increasingly complex applications): @@ -54,31 +61,24 @@ Supporting functions and data for the notebooks are kept in the following direct - `Supplementary_data `_: *Supplementary files required for the analyses above (e.g. images, rasters, shapefiles, training data)* -The Jupyter notebooks on the ``stable`` branch of this repository are used to generate the **Digital Earth Australia User Guide** located at: ``_ - All notebooks in the ``dea-notebooks`` repository contain tags describing their functionality. If you are searching for a specific functionality, use the `Tags Index `_ to search for a suitable example. If there is a functionality that has not been documented that you think should be, please create an 'Issue' in the `dea-notebooks repository. `_ ----------- - -Getting started with DEA Notebooks -================================== - -To get started with using ``dea-notebooks``, `visit the DEA Notebooks Wiki page `_. This page includes guides for getting started on both the `DEA Sandbox `_ and `NCI environments `_. +We encourage you to check out the other usages of our notebooks, code and tools at our `USAGE `_ page. -Once you're set up, there are two main options for interacting with ``dea-notebooks`` and contributing back to the repository: - -* **DEA notebooks using git**: Git is a version-control software designed to help track changes to files and collaborate with multiple users on a project. Using ``git`` is the recommended workflow for working with ``dea-notebooks`` as it makes it easy to stay up to date with the latest versions of functions and code and makes it impossible to lose your work. +---------- - * Refer to the repository's `Guide to using DEA Notebooks with git `_ wiki article. +Contributing to DEA Notebooks +============================= -* **DEA notebooks using Github**: Alternatively, the Github website can be used to upload and modify the ``dea-notebooks`` repository directly. This can be a good way to get started with ``dea-notebooks``. +To get started, see either of these articles. - * Refer to the repository's `Guide to DEA Notebooks using the Github website `_ wiki article. +* `Create a DEA Notebook `_ +* `Edit a DEA Notebook `_ ---------- -Contributing to DEA Notebooks -============================= +Git workflow +============ Develop, stable and working branches ------------------------------------ @@ -86,14 +86,14 @@ Develop, stable and working branches The ``dea-notebooks`` repository uses 'branches' to manage individuals' notebooks, and to allow easy publishing of notebooks ready to be shared. There are two main types of branches: * `develop branch `_: The ``develop`` branch is the **default branch** where notebooks are put as they are being prepared to be shared publicly. Notebooks added to this branch will be periodically merged into the ``stable`` branch after testing and evaluation. The ``develop`` branch is protected and requires changes to be approved via a 'pull request' and review checklist before they appear on the branch. -* `stable branch `_: The ``stable`` branch contains DEA's collection of publicly available notebooks. Notebooks added to this branch will become part of the official DEA documentation and are published on the `DEA User Guide `_. The ``stable`` branch is protected, and is periodically updated with new content from the ``develop`` branch via a 'pull request' (for ``develop`` > ``stable`` pull requests, *merge using the 'Create a merge commit' option*). +* `stable branch `_: The ``stable`` branch contains DEA's collection of publicly available notebooks. Notebooks added to this branch will become part of the official DEA documentation and are published on the `DEA Knowledge Hub `_. The ``stable`` branch is protected, and is periodically updated with new content from the ``develop`` branch via a 'pull request' (for ``develop`` > ``stable`` pull requests, *merge using the 'Create a merge commit' option*). * `Working branches `_: All other branches in the repository are working spaces for users of ``dea-notebooks``. They have a unique name (typically named after the user, e.g. ``ClaireK``, ``BexDunn``). The notebooks on these branches can be works-in-progress and do not need to be pretty or complete. By using a working branch, it is easy to use scripts and algorithms from ``dea-notebooks`` in your own work or share and collaborate on a working version of a notebook or code. Publishing notebooks to the stable branch ----------------------------------------- -Once you have a notebook that is ready to be published on the ``develop`` branch, you can submit a 'pull request' in the `Pull requests tab at the top of the repository `_. The default pull request template contains a check-list to ensure that all ``stable`` branch Jupyter notebooks are consistent and well-documented so they can be understood by future users, and rendered correctly in the `DEA User Guide `_. Please ensure that as many of these checklist items are complete as possible or leave a comment in the pull request asking for help with any remaining checklist items. +Once you have a notebook that is ready to be published from the ``develop`` branch to the DEA Knowledge Hub, you can submit a 'pull request' in the `Pull requests tab at the top of the repository `_. The default pull request template contains a check-list to ensure that all ``stable`` branch Jupyter notebooks are consistent and well-documented so they can be understood by future users, and rendered correctly in the `DEA Knowledge Hub `_. Please ensure that as many of these checklist items are complete as possible or leave a comment in the pull request asking for help with any remaining checklist items. Draft pull requests ^^^^^^^^^^^^^^^^^^^ @@ -123,14 +123,3 @@ If the notebook meets all the checklist requirements, click the green 'Review' b Once the pull request has been approved, you can merge it into the ``develop`` branch. Select the 'Squash and merge' option from the drop-down menu to the right of the green 'merge' button. Once you have merged the new branch in, you need to delete the branch. There is a button on the page that asks you if you would like to delete the now merged branch. Select 'Yes' to delete it. -See also `wiki `_. - -.. toctree:: - :hidden: - - Beginners_guide/README - DEA_products/README - How_to_guides/README - Interactive_apps/README - Real_world_examples/README - Tools/index diff --git a/Real_world_examples/README.rst b/Real_world_examples/README.rst index 025de5112..4ed6fd733 100644 --- a/Real_world_examples/README.rst +++ b/Real_world_examples/README.rst @@ -5,7 +5,6 @@ More complex case study-based workflows demonstrating how DEA can be used to add .. toctree:: :maxdepth: 1 - :caption: Real World Examples Burnt_area_mapping.ipynb Burnt_area_mapping_near_realtime.ipynb diff --git a/Real_world_examples/Scalable_machine_learning/0_README.ipynb b/Real_world_examples/Scalable_machine_learning/0_README.ipynb index b06b5f2d0..bb1302880 100644 --- a/Real_world_examples/Scalable_machine_learning/0_README.ipynb +++ b/Real_world_examples/Scalable_machine_learning/0_README.ipynb @@ -49,7 +49,7 @@ "* There are many online courses that can help you understand the fundamentals of machine learning with python e.g. [edX](https://www.edx.org/course/machine-learning-with-python-a-practical-introduct), [coursera](https://www.coursera.org/learn/machine-learning-with-python). \n", "* The [Scikit-learn](https://scikit-learn.org/stable/supervised_learning.html) documentation provides information on the available models and their parameters.\n", "* This [review article](https://www.tandfonline.com/doi/full/10.1080/01431161.2018.1433343) provides a nice overview of machine learning in the context of remote sensing.\n", - "* The stand alone notebook, [Machine_learning_with_ODC](../How_to_guides/Machine_learning_with_ODC.ipynb), in the `Real_world_examples/` folder is a companion piece to these notebooks and provides a more succint (but less descriptive) version of the workflow demonstrated here.\n", + "* The stand alone notebook, [Machine_learning_with_ODC](../../How_to_guides/Machine_learning_with_ODC.ipynb), in the `Real_world_examples/` folder is a companion piece to these notebooks and provides a more succint (but less descriptive) version of the workflow demonstrated here.\n", "___\n" ] }, diff --git a/Real_world_examples/Scalable_machine_learning/1_Extract_training_data.ipynb b/Real_world_examples/Scalable_machine_learning/1_Extract_training_data.ipynb index c3814f4aa..3abf7962b 100644 --- a/Real_world_examples/Scalable_machine_learning/1_Extract_training_data.ipynb +++ b/Real_world_examples/Scalable_machine_learning/1_Extract_training_data.ipynb @@ -261,7 +261,7 @@ "\n", "In addition to the `zonal_stats` parameter, we also need to set up a datacube query dictionary for the Open Data Cube query such as `measurements` (the bands to load from the satellite), the `resolution` (the cell size), and the `output_crs` (the output projection). These options will be added to a query dictionary that will be passed into `collect_training_data` using the parameter `collect_training_data(dc_query=query, ...)`. The query dictionary will be the only argument in the **feature layer function** which we will define and describe in a moment.\n", "\n", - "> Note: `collect_training_data` also has a number of additional parameters for handling ODC I/O read failures, where polygons that return an excessive number of null values can be resubmitted to the multiprocessing queue. Check out the [docs](../Tools/gen/dea_tools.classification.ipynb) to learn more.\n" + "> Note: `collect_training_data` also has a number of additional parameters for handling ODC I/O read failures, where polygons that return an excessive number of null values can be resubmitted to the multiprocessing queue. Check out the [docs](../../Tools/dea_tools/classification.py) to learn more.\n" ] }, { diff --git a/Real_world_examples/Scalable_machine_learning/4_Classify_satellite_data.ipynb b/Real_world_examples/Scalable_machine_learning/4_Classify_satellite_data.ipynb index c1e6917ec..9ec9ddeaa 100644 --- a/Real_world_examples/Scalable_machine_learning/4_Classify_satellite_data.ipynb +++ b/Real_world_examples/Scalable_machine_learning/4_Classify_satellite_data.ipynb @@ -289,9 +289,9 @@ "source": [ "### Loop through test locations and predict\n", "\n", - "For every location we listed in the `test_locations` dictionary, we calculate the feature layers, and then use the DEA function [predict_xr](../Tools/gen/dea_tools.classification.ipynb#L231) to classify the data.\n", + "For every location we listed in the `test_locations` dictionary, we calculate the feature layers, and then use the DEA function [predict_xr](../../Tools/dea_tools/classification.py) to classify the data.\n", "\n", - "The `predict_xr` function is an xarray wrapper around the sklearn estimator `.predict()` and `.predict_proba()` methods, and relies on [dask-ml](https://ml.dask.org/) [ParallelPostfit](https://ml.dask.org/modules/generated/dask_ml.wrappers.ParallelPostFit.html) to run the predictions with dask. `Predict_xr` can compute predictions, prediction probabilites, and return the input feature layers. Read the [documentation](../Tools/gen/dea_tools.classification.ipynb#L239) for more insights into this function's capabilities." + "The `predict_xr` function is an xarray wrapper around the sklearn estimator `.predict()` and `.predict_proba()` methods, and relies on [dask-ml](https://ml.dask.org/) [ParallelPostfit](https://ml.dask.org/modules/generated/dask_ml.wrappers.ParallelPostFit.html) to run the predictions with dask. `Predict_xr` can compute predictions, prediction probabilites, and return the input feature layers. Read the [documentation](../../Tools/dea_tools/classification.py) for more insights into this function's capabilities." ] }, { diff --git a/Real_world_examples/Scalable_machine_learning/README.rst b/Real_world_examples/Scalable_machine_learning/README.rst index 0057a441f..02a336fca 100644 --- a/Real_world_examples/Scalable_machine_learning/README.rst +++ b/Real_world_examples/Scalable_machine_learning/README.rst @@ -142,7 +142,6 @@ To begin working through the notebooks in this ``Scalable Supervised Machine Lea .. toctree:: :maxdepth: 1 - :caption: Scalable Supervised Machine Learning on the Open Data Cube 1_Extract_training_data.ipynb 2_Inspect_training_data.ipynb diff --git a/Real_world_examples/Water_quality_suspended_matter.ipynb b/Real_world_examples/Water_quality_suspended_matter.ipynb index 4622ed087..2df327e05 100644 --- a/Real_world_examples/Water_quality_suspended_matter.ipynb +++ b/Real_world_examples/Water_quality_suspended_matter.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Monitoring Water Quality\n", + "# Monitoring Water Quality \n", "\n", "* **[Sign up to the DEA Sandbox](https://app.sandbox.dea.ga.gov.au/)** to run this notebook interactively from a browser\n", "* **Compatibility:** Notebook currently compatible with the `DEA Sandbox` environment\n", diff --git a/Tools/dea_tools/datahandling.py b/Tools/dea_tools/datahandling.py index ed6395307..a75fea51a 100644 --- a/Tools/dea_tools/datahandling.py +++ b/Tools/dea_tools/datahandling.py @@ -1,4 +1,4 @@ -## dea_datahandling.py +# dea_datahandling.py """ Loading and manipulating Digital Earth Australia products and data using the Open Data Cube and xarray. @@ -17,31 +17,28 @@ If you would like to report an issue with this script, you can file one on Github (https://github.com/GeoscienceAustralia/dea-notebooks/issues/new). -Last modified: June 2023 +Last modified: Jan 2024 """ +import datetime + # Import required packages import os -import zipfile -import datetime -import requests import warnings -import odc.algo -import dask +import zipfile +from collections import Counter + import numpy as np +import odc.algo import pandas as pd -import dask.array as da -import xarray as xr -import skimage.transform +import requests import sklearn.decomposition -from skimage.exposure import match_histograms -from skimage.color import rgb2hsv, hsv2rgb -from random import randint -from collections import Counter +import xarray as xr +from datacube.utils.dates import normalise_dt from odc.algo import mask_cleanup -from datacube.utils import masking from scipy.ndimage import binary_dilation -from datacube.utils.dates import normalise_dt +from skimage.color import hsv2rgb, rgb2hsv +from skimage.exposure import match_histograms def _dc_query_only(**kw): @@ -905,7 +902,7 @@ def nearest( return nearest_array -def parallel_apply(ds, dim, func, *args, **kwargs): +def parallel_apply(ds, dim, func, use_threads=False, *args, **kwargs): """ Applies a custom function in parallel along the dimension of an xarray.Dataset or xarray.DataArray. @@ -929,6 +926,11 @@ def parallel_apply(ds, dim, func, *args, **kwargs): The function that will be applied in parallel to each array along dimension `dim`. The first argument passed to this function should be the array along `dim`. + use_threads : bool, optional + Whether to use threads instead of processes for parallelisation. + Defaults to False, which means it'll use multi-processing. + In brief, the difference between threads and processes is that threads + share memory, while processes have separate memory. *args : Any number of arguments that will be passed to `func`. **kwargs : @@ -941,13 +943,19 @@ def parallel_apply(ds, dim, func, *args, **kwargs): along the input `dim` dimension. """ - from concurrent.futures import ProcessPoolExecutor - from tqdm import tqdm - from itertools import repeat + from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor from functools import partial + from itertools import repeat - with ProcessPoolExecutor() as executor: - + from tqdm import tqdm + + # Use threads or processes + if use_threads: + Executor = ThreadPoolExecutor + else: + Executor = ProcessPoolExecutor + + with Executor as executor: # Update func to add kwargs func = partial(func, **kwargs) diff --git a/USAGE.rst b/USAGE.rst index 01a9f0339..7df5b184b 100644 --- a/USAGE.rst +++ b/USAGE.rst @@ -29,6 +29,8 @@ Scientific papers - Krause, C.E., Newey, V., Alger, M.J. and Lymburner, L., 2021. Mapping and monitoring the multi-decadal dynamics of Australia’s open waterbodies using Landsat. Remote Sensing, 13(8), p.1437. +- Malan, N., Roughan, M., Hemming, M. et al. Quantifying coastal freshwater extremes during unprecedented rainfall using long timeseries multi-platform salinity observations. Nat Commun 15, 424 (2024). https://doi.org/10.1038/s41467-023-44398-2 + - Nanson, R., Bishop-Taylor, R., Sagar, S., Lymburner, L., (2022). Geomorphic insights into Australia's coastal change using a national dataset derived from the multi-decadal Landsat archive. Estuarine, Coastal and Shelf Science, 265, p.107712. Available: https://doi.org/10.1016/j.ecss.2021.107712 - Pucino, N., Kennedy, D.M., Young, M. and Ierodiaconou, D., 2022. Assessing the accuracy of Sentinel-2 instantaneous subpixel shorelines using synchronous UAV ground truth surveys. Remote Sensing of Environment, 282, p.113293. @@ -55,6 +57,7 @@ Conferences Courses and training -------------------- +- University of New England, 2023. `GISC436 : Remote Sensing and Image analysis `_ - Swinburne University of Technology, Space Technology and Industry Institute, 2023. `Earth Observation and Data Analysis Short Course `_ - Australian National University, Centre for Water and Landscape Dynamics `(WALD) `_ under contract for `Geoscience Australia `_, 2022. `Digital Earth Australia for Geospatial Analysts `_. Materials available under Apache 2.0 Licence `here `_ and link to github repository: https://github.com/ANU-WALD/dea_training - Flinders University, 2021, 2022. Remote Sensing for All Disciplines. Undergrad and postgrad course.