Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accept MAST URIs as input to get_cloud_uris() #3193

Merged
merged 5 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@
New Tools and Services
----------------------


API changes
-----------

mast
^^^^

- Handle a MAST URI string as input for ``Observations.get_cloud_uri`` and a list of MAST URIs as input for
``Observations.get_cloud_uris``. [#3193]

Comment on lines +11 to +16
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fyi: I'll move this section down below as we keep this section to highlight potentially breaking API changes rather than for additions/enhacements.

simbad
^^^^^^

Expand All @@ -26,6 +33,13 @@ ipac.nexsci.nasa_exoplanet_archive

- Fixed InvalidTableError for DI_STARS_EXEP and TD tables. [#3189]

mast
^^^^

- Bugfix where users are unnecessarily warned about a query limit while fetching products in ``MastMissions.get_product_list``. [#3193]

- Bugfix where ``Observations.get_cloud_uri`` and ``Observations.get_cloud_uris`` fail if the MAST relative path is not found. [#3193]

simbad
^^^^^^

Expand Down
21 changes: 10 additions & 11 deletions astroquery/mast/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
import boto3
import botocore

self.supported_missions = ["mast:hst/product", "mast:tess/product", "mast:kepler", "mast:galex", "mast:ps1"]
self.supported_missions = ["mast:hst/product", "mast:tess/product", "mast:kepler", "mast:galex", "mast:ps1",
"mast:jwst/product"]

self.boto3 = boto3
self.botocore = botocore
Expand All @@ -77,11 +78,7 @@
response : bool
Is the product from a supported mission.
"""

for mission in self.supported_missions:
if data_product['dataURI'].lower().startswith(mission):
return True
return False
return any(data_product['dataURI'].lower().startswith(mission) for mission in self.supported_missions)

Check warning on line 81 in astroquery/mast/cloud.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/cloud.py#L81

Added line #L81 was not covered by tests

def get_cloud_uri(self, data_product, include_bucket=True, full_url=False):
"""
Expand All @@ -92,7 +89,7 @@

Parameters
----------
data_product : `~astropy.table.Row`
data_product : `~astropy.table.Row`, str
Product to be converted into cloud data uri.
include_bucket : bool
Default True. When false returns the path of the file relative to the
Expand All @@ -108,6 +105,8 @@
Cloud URI generated from the data product. If the product cannot be
found in the cloud, None is returned.
"""
# If data_product is a string, convert to a list
data_product = [data_product] if isinstance(data_product, str) else data_product

uri_list = self.get_cloud_uri_list(data_product, include_bucket=include_bucket, full_url=full_url)

Expand All @@ -124,8 +123,8 @@

Parameters
----------
data_products : `~astropy.table.Table`
Table containing products to be converted into cloud data uris.
data_products : `~astropy.table.Table`, list
Table containing products or list of MAST uris to be converted into cloud data uris.
include_bucket : bool
Default True. When false returns the path of the file relative to the
top level cloud storage location.
Expand All @@ -141,8 +140,8 @@
if data_products includes products not found in the cloud.
"""
s3_client = self.boto3.client('s3', config=self.config)

paths = utils.mast_relative_path(data_products["dataURI"])
data_uris = data_products if isinstance(data_products, list) else data_products['dataURI']
paths = utils.mast_relative_path(data_uris)
if isinstance(paths, str): # Handle the case where only one product was requested
paths = [paths]

Expand Down
9 changes: 5 additions & 4 deletions astroquery/mast/missions.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,15 @@ def _parse_result(self, response, *, verbose=False): # Used by the async_to_syn

if self.service == self._search:
results = self._service_api_connection._parse_result(response, verbose, data_key='results')

# Warn if maximum results are returned
if len(results) >= self.limit:
warnings.warn("Maximum results returned, may not include all sources within radius.",
MaxResultsWarning)
elif self.service == self._list_products:
# Results from post_list_products endpoint need to be handled differently
results = Table(response.json()['products'])

if len(results) >= self.limit:
warnings.warn("Maximum results returned, may not include all sources within radius.",
MaxResultsWarning)

return results

def _validate_criteria(self, **criteria):
Expand Down
23 changes: 15 additions & 8 deletions astroquery/mast/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,9 +854,9 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa

Parameters
----------
data_products : `~astropy.table.Table`
Table containing products to be converted into cloud data uris. If provided, this will supercede
page_size, page, or any keyword arguments passed in as criteria.
data_products : `~astropy.table.Table`, list
Table containing products or list of MAST uris to be converted into cloud data uris.
If provided, this will supercede page_size, page, or any keyword arguments passed in as criteria.
include_bucket : bool
Default True. When False, returns the path of the file relative to the
top level cloud storage location.
Expand Down Expand Up @@ -920,16 +920,23 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa
# Return list of associated data products
data_products = self.get_product_list(obs)

# Filter product list
data_products = self.filter_products(data_products, mrp_only=mrp_only, extension=extension, **filter_products)
if isinstance(data_products, Table):
# Filter product list
data_products = self.filter_products(data_products, mrp_only=mrp_only, extension=extension,
**filter_products)
else: # data_products is a list of URIs
# Warn if trying to supply filters
if filter_products or extension or mrp_only:
warnings.warn('Filtering is not supported when providing a list of MAST URIs. '
'To apply filters, please provide query criteria or a table of data products '
'as returned by `Observations.get_product_list`', InputWarning)

if not len(data_products):
warnings.warn("No matching products to fetch associated cloud URIs.", NoResultsWarning)
warnings.warn('No matching products to fetch associated cloud URIs.', NoResultsWarning)
return

# Remove duplicate products
data_products = utils.remove_duplicate_products(data_products, 'dataURI')

return self._cloud_connection.get_cloud_uri_list(data_products, include_bucket, full_url)

def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):
Expand All @@ -941,7 +948,7 @@ def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):

Parameters
----------
data_product : `~astropy.table.Row`
data_product : `~astropy.table.Row`, str
Product to be converted into cloud data uri.
include_bucket : bool
Default True. When false returns the path of the file relative to the
Expand Down
12 changes: 12 additions & 0 deletions astroquery/mast/tests/data/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,15 @@ To generate `~astroquery.mast.tests.data.mission_products.json`, use the followi
>>> resp = utils._simple_request('https://mast.stsci.edu/search/hst/api/v0.1/list_products', {'dataset_ids': 'Z14Z0104T'})
>>> with open('panstarrs_columns.json', 'w') as file:
... json.dump(resp.json(), file, indent=4) # doctest: +SKIP

To generate `~astroquery.mast.tests.data.mast_relative_path.json`, use the following:

.. doctest-remote-data::

>>> import json
>>> from astroquery.mast import utils
...
>>> resp = utils._simple_request('https://mast.stsci.edu/api/v0.1/path_lookup/',
... {'uri': ['mast:HST/product/u9o40504m_c3m.fits', 'mast:HST/product/does_not_exist.fits']})
>>> with open('mast_relative_path.json', 'w') as file:
... json.dump(resp.json(), file, indent=4) # doctest: +SKIP
10 changes: 10 additions & 0 deletions astroquery/mast/tests/data/mast_relative_path.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"mast:HST/product/u9o40504m_c3m.fits": {
"status_code": 200,
"path": "/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits"
},
"mast:HST/product/does_not_exist.fits": {
"status_code": 404,
"path": null
}
}
96 changes: 95 additions & 1 deletion astroquery/mast/tests/test_mast.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import re
from shutil import copyfile
from unittest.mock import patch

import pytest

Expand All @@ -16,7 +17,8 @@

from astroquery.mast.services import _json_to_table
from astroquery.utils.mocks import MockResponse
from astroquery.exceptions import InvalidQueryError, InputWarning, MaxResultsWarning
from astroquery.exceptions import (InvalidQueryError, InputWarning, MaxResultsWarning, NoResultsWarning,
RemoteServiceError)

from astroquery import mast

Expand Down Expand Up @@ -48,6 +50,7 @@
'Mast.HscMatches.Db.v3': 'matchid.json',
'Mast.HscMatches.Db.v2': 'matchid.json',
'Mast.HscSpectra.Db.All': 'spectra.json',
'mast_relative_path': 'mast_relative_path.json',
'panstarrs': 'panstarrs.json',
'panstarrs_columns': 'panstarrs_columns.json',
'tess_cutout': 'astrocut_107.27_-70.0_5x5.zip',
Expand Down Expand Up @@ -142,6 +145,8 @@ def request_mockreturn(url, params={}):
filename = data_path(DATA_FILES["Mast.Name.Lookup"])
elif 'panstarrs' in url:
filename = data_path(DATA_FILES['panstarrs_columns'])
elif 'path_lookup' in url:
filename = data_path(DATA_FILES['mast_relative_path'])
with open(filename, 'rb') as infile:
content = infile.read()
return MockResponse(content)
Expand Down Expand Up @@ -678,6 +683,95 @@ def test_observations_download_file(patch_post, tmpdir):
assert result == ('COMPLETE', None, None)


@patch('boto3.client')
def test_observations_get_cloud_uri(mock_client, patch_post):
pytest.importorskip("boto3")

mast_uri = 'mast:HST/product/u9o40504m_c3m.fits'
expected = 's3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits'

# Error without cloud connection
with pytest.raises(RemoteServiceError):
mast.Observations.get_cloud_uri('mast:HST/product/u9o40504m_c3m.fits')

# Enable access to public AWS S3 bucket
mast.Observations.enable_cloud_dataset()

# Row input
product = Table()
product['dataURI'] = [mast_uri]
uri = mast.Observations.get_cloud_uri(product[0])
assert isinstance(uri, str)
assert uri == expected

# String input
uri = mast.Observations.get_cloud_uri(mast_uri)
assert uri == expected

mast.Observations.disable_cloud_dataset()


@patch('boto3.client')
def test_observations_get_cloud_uris(mock_client, patch_post):
pytest.importorskip("boto3")

mast_uri = 'mast:HST/product/u9o40504m_c3m.fits'
expected = 's3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits'

# Error without cloud connection
with pytest.raises(RemoteServiceError):
mast.Observations.get_cloud_uris(['mast:HST/product/u9o40504m_c3m.fits'])

# Enable access to public AWS S3 bucket
mast.Observations.enable_cloud_dataset()

# Get the cloud URIs
# Table input
product = Table()
product['dataURI'] = [mast_uri]
uris = mast.Observations.get_cloud_uris([mast_uri])
assert isinstance(uris, list)
assert len(uris) == 1
assert uris[0] == expected

# List input
uris = mast.Observations.get_cloud_uris([mast_uri])
assert isinstance(uris, list)
assert len(uris) == 1
assert uris[0] == expected

# Warn if attempting to filter with list input
with pytest.warns(InputWarning, match='Filtering is not supported'):
mast.Observations.get_cloud_uris([mast_uri],
extension='png')

# Warn if not found
with pytest.warns(NoResultsWarning, match='Failed to retrieve MAST relative path'):
mast.Observations.get_cloud_uris(['mast:HST/product/does_not_exist.fits'])


@patch('boto3.client')
def test_observations_get_cloud_uris_query(mock_client, patch_post):
pytest.importorskip("boto3")

# enable access to public AWS S3 bucket
mast.Observations.enable_cloud_dataset()

# get uris with streamlined function
uris = mast.Observations.get_cloud_uris(target_name=234295610,
filter_products={'productSubGroupDescription': 'C3M'})
assert isinstance(uris, list)

# check that InvalidQueryError is thrown if neither data_products or **criteria are defined
with pytest.raises(InvalidQueryError):
mast.Observations.get_cloud_uris(filter_products={'productSubGroupDescription': 'C3M'})

# warn if no data products match filters
with pytest.warns(NoResultsWarning, match='No matching products'):
mast.Observations.get_cloud_uris(target_name=234295610,
filter_products={'productSubGroupDescription': 'LC'})


######################
# CatalogClass tests #
######################
Expand Down
Loading
Loading