Skip to content

Commit

Permalink
Merge pull request #3155 from snbianco/ASB-29334-download-missions
Browse files Browse the repository at this point in the history
Download Products with MastMissions
  • Loading branch information
bsipocz authored Jan 24, 2025
2 parents 68b0f5b + aab812a commit 5a9633c
Show file tree
Hide file tree
Showing 11 changed files with 1,178 additions and 104 deletions.
13 changes: 13 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ jplspec

- minor improvement to lookuptable behavior [#3173,#2901]

mast
^^^^

- Retrieve data products from the Missions-MAST API with ``mast.MastMissions.get_product_list``. Retrieve unique data
products only with ``mast.MastMissions.get_unique_product_list``. [#3155]

- Filter data products retrieved from the Missions-MAST API with ``mast.MastMissions.filter_products``. [#3155]

- Download data products from the Missions-MAST API with ``mast.MastMissions.download_products``.
Download a single data product using ``mast.MastMissions.download_file``. [#3155]

- Get the keyword corresponding to the dataset ID for a specific mission with ``mast.MastMissions.get_dataset_kwd``. [#3155]

mocserver
^^^^^^^^^

Expand Down
424 changes: 401 additions & 23 deletions astroquery/mast/missions.py

Large diffs are not rendered by default.

32 changes: 4 additions & 28 deletions astroquery/mast/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import astropy.units as u
import astropy.coordinates as coord

from astropy.table import Table, Row, unique, vstack
from astropy.table import Table, Row, vstack
from astroquery import log
from astroquery.mast.cloud import CloudAccess

Expand Down Expand Up @@ -816,7 +816,7 @@ def download_products(self, products, *, download_dir=None, flat=False,
products = self.filter_products(products, mrp_only=mrp_only, **filters)

# remove duplicate products
products = self._remove_duplicate_products(products)
products = utils.remove_duplicate_products(products, 'dataURI')

if not len(products):
warnings.warn("No products to download.", NoResultsWarning)
Expand Down Expand Up @@ -928,7 +928,7 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa
return

# Remove duplicate products
data_products = self._remove_duplicate_products(data_products)
data_products = utils.remove_duplicate_products(data_products, 'dataURI')

return self._cloud_connection.get_cloud_uri_list(data_products, include_bucket, full_url)

Expand Down Expand Up @@ -966,30 +966,6 @@ def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):
# Query for product URIs
return self._cloud_connection.get_cloud_uri(data_product, include_bucket, full_url)

def _remove_duplicate_products(self, data_products):
"""
Removes duplicate data products that have the same dataURI.
Parameters
----------
data_products : `~astropy.table.Table`
Table containing products to be checked for duplicates.
Returns
-------
unique_products : `~astropy.table.Table`
Table containing products with unique dataURIs.
"""
number = len(data_products)
unique_products = unique(data_products, keys="dataURI")
number_unique = len(unique_products)
if number_unique < number:
log.info(f"{number - number_unique} of {number} products were duplicates. "
f"Only returning {number_unique} unique product(s).")

return unique_products

def get_unique_product_list(self, observations):
"""
Given a "Product Group Id" (column name obsid), returns a list of associated data products with
Expand All @@ -1009,7 +985,7 @@ def get_unique_product_list(self, observations):
Table containing products with unique dataURIs.
"""
products = self.get_product_list(observations)
unique_products = self._remove_duplicate_products(products)
unique_products = utils.remove_duplicate_products(products, 'dataURI')
if len(unique_products) < len(products):
log.info("To return all products, use `Observations.get_product_list`")
return unique_products
Expand Down
76 changes: 57 additions & 19 deletions astroquery/mast/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from astropy.table import Table, MaskedColumn
from astropy.utils.decorators import deprecated_renamed_argument

from .. import log
from ..query import BaseQuery
from ..utils import async_to_sync
from ..utils.class_or_instance import class_or_instance
Expand Down Expand Up @@ -84,7 +85,12 @@ def _json_to_table(json_obj, data_key='data'):
col_data = np.array([x[idx] for x in json_obj[data_key]], dtype=object)
except KeyError:
# it's not a data array, fall back to using column name as it is array of dictionaries
col_data = np.array([x[col_name] for x in json_obj[data_key]], dtype=object)
try:
col_data = np.array([x[col_name] for x in json_obj[data_key]], dtype=object)
except KeyError:
# Skip column names not found in data
log.debug('Column %s was not found in data. Skipping...', col_name)
continue
if ignore_value is not None:
col_data[np.where(np.equal(col_data, None))] = ignore_value

Expand Down Expand Up @@ -112,6 +118,8 @@ class ServiceAPI(BaseQuery):

SERVICE_URL = conf.server
REQUEST_URL = conf.server + "/api/v0.1/"
MISSIONS_DOWNLOAD_URL = conf.server + "/search/"
MAST_DOWNLOAD_URL = conf.server + "/api/v0.1/Download/file"
SERVICES = {}

def __init__(self, session=None):
Expand Down Expand Up @@ -270,27 +278,28 @@ def service_request_async(self, service, params, pagesize=None, page=None, use_j

request_url = self.REQUEST_URL + service_url.format(**compiled_service_args)

# Default headers
headers = {
'User-Agent': self._session.headers['User-Agent'],
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': 'application/json'
}

# Params as a list of tuples to allow for multiple parameters added
catalogs_request = []
if not page:
page = params.pop('page', None)
if not pagesize:
pagesize = params.pop('pagesize', None)
page = page or params.pop('page', None)
pagesize = pagesize or params.pop('pagesize', None)

# Add pagination if specified
if page is not None:
catalogs_request.append(('page', page))
if pagesize is not None:
catalogs_request.append(('pagesize', pagesize))

# Populate parameters based on `use_json`
if not use_json:
# Decompose filters, sort
for prop, value in kwargs.items():
params[prop] = value
# When not using JSON, merge kwargs into params and build query
params.update(kwargs)
catalogs_request.extend(self._build_catalogs_params(params))
else:
headers['Content-Type'] = 'application/json'
Expand All @@ -307,9 +316,10 @@ def service_request_async(self, service, params, pagesize=None, page=None, use_j
catalogs_request = params_dict

# Removing single-element lists. Single values will live on their own (except for `sort_by`)
for key in catalogs_request.keys():
if (key != 'sort_by') & (len(catalogs_request[key]) == 1):
catalogs_request[key] = catalogs_request[key][0]
catalogs_request = {
k: v if k == 'sort_by' or len(v) > 1 else v[0]
for k, v in params_dict.items()
}

# Otherwise, catalogs_request can remain as the original params dict
else:
Expand All @@ -318,6 +328,40 @@ def service_request_async(self, service, params, pagesize=None, page=None, use_j
response = self._request('POST', request_url, data=catalogs_request, headers=headers, use_json=use_json)
return response

@class_or_instance
def missions_request_async(self, service, params):
"""
Builds and executes an asynchronous query to the MAST Search API.
Parameters
----------
service : str
The MAST Search API service to query. Should be present in self.SERVICES.
params : dict
JSON object containing service parameters.
Returns
-------
response : list of `~requests.Response`
"""
service_config = self.SERVICES.get(service.lower())
request_url = self.REQUEST_URL + service_config.get('path')

# Default headers
headers = {
'User-Agent': self._session.headers['User-Agent'],
'Content-Type': 'application/json',
'Accept': 'application/json'
}

# make request
data, params = (params, None)
response = self._request(method='POST',
url=request_url,
params=params,
data=data,
headers=headers,
use_json=True)
return response

def _build_catalogs_params(self, params):
"""
Gathers parameters for Catalogs.MAST usage and translates to valid API syntax tuples
Expand Down Expand Up @@ -387,12 +431,6 @@ def check_catalogs_criteria_params(self, criteria):
response : boolean
Whether the passed dict has at least one criteria parameter
"""
criteria_check = False
non_criteria_params = ["columns", "sort_by", "page_size", "pagesize", "page"]
criteria_keys = criteria.keys()
for key in criteria_keys:
if key not in non_criteria_params:
criteria_check = True
break

return criteria_check
non_criteria_params = ["columns", "sort_by", "page_size", "pagesize", "page"]
return any(key not in non_criteria_params for key in criteria)
11 changes: 11 additions & 0 deletions astroquery/mast/tests/data/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,14 @@ To generate `~astroquery.mast.tests.data.panstarrs_columns.json`, use the follow
>>> resp = utils._simple_request('https://catalogs.mast.stsci.edu/api/v0.1/panstarrs/dr2/mean/metadata.json')
>>> with open('panstarrs_columns.json', 'w') as file:
... json.dump(resp.json(), file, indent=4) # doctest: +SKIP

To generate `~astroquery.mast.tests.data.mission_products.json`, use the following:

.. doctest-remote-data::

>>> import json
>>> from astroquery.mast import utils
...
>>> resp = utils._simple_request('https://mast.stsci.edu/search/hst/api/v0.1/list_products', {'dataset_ids': 'Z14Z0104T'})
>>> with open('panstarrs_columns.json', 'w') as file:
... json.dump(resp.json(), file, indent=4) # doctest: +SKIP
79 changes: 79 additions & 0 deletions astroquery/mast/tests/data/mission_products.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{
"products": [
{
"product_key": "Z14Z0104T_z14z0104t_shf.fits",
"access": "PUBLIC",
"dataset": "Z14Z0104T",
"instrument_name": "HRS ",
"filters": "MIRROR-N2",
"filename": "z14z0104t_shf.fits",
"uri": "Z14Z0104T/z14z0104t_shf.fits",
"authz_primary_identifier": "Z14Z0104T",
"authz_secondary_identifier": "CAL",
"file_suffix": "SHF",
"category": "UNCALIBRATED",
"size": 31680,
"type": "science"
},
{
"product_key": "Z14Z0104T_z14z0104t_trl.fits",
"access": "PUBLIC",
"dataset": "Z14Z0104T",
"instrument_name": "HRS ",
"filters": "MIRROR-N2",
"filename": "z14z0104t_trl.fits",
"uri": "Z14Z0104T/z14z0104t_trl.fits",
"authz_primary_identifier": "Z14Z0104T",
"authz_secondary_identifier": "CAL",
"file_suffix": "TRL",
"category": "AUX",
"size": 17280,
"type": "science"
},
{
"product_key": "Z14Z0104T_z14z0104t_ulf.fits",
"access": "PUBLIC",
"dataset": "Z14Z0104T",
"instrument_name": "HRS ",
"filters": "MIRROR-N2",
"filename": "z14z0104t_ulf.fits",
"uri": "Z14Z0104T/z14z0104t_ulf.fits",
"authz_primary_identifier": "Z14Z0104T",
"authz_secondary_identifier": "CAL",
"file_suffix": "ULF",
"category": "UNCALIBRATED",
"size": 14400,
"type": "science"
},
{
"product_key": "Z14Z0104T_z14z0104t_pdq.fits",
"access": "PUBLIC",
"dataset": "Z14Z0104T",
"instrument_name": "HRS ",
"filters": "MIRROR-N2",
"filename": "z14z0104t_pdq.fits",
"uri": "Z14Z0104T/z14z0104t_pdq.fits",
"authz_primary_identifier": "Z14Z0104T",
"authz_secondary_identifier": "PDQ",
"file_suffix": "PDQ",
"category": "AUX",
"size": 11520,
"type": "science"
},
{
"product_key": "Z14Z0104T_z14z0104x_ocx.fits",
"access": "PUBLIC",
"dataset": "Z14Z0104T",
"instrument_name": "HRS ",
"filters": "MIRROR-N2",
"filename": "z14z0104x_ocx.fits",
"uri": "Z14Z0104T/z14z0104x_ocx.fits",
"authz_primary_identifier": "Z14Z0104X",
"authz_secondary_identifier": "OCX",
"file_suffix": "OCX",
"category": "OTHER",
"size": 11520,
"type": "science"
}
]
}
Loading

0 comments on commit 5a9633c

Please sign in to comment.