From 0a46a03964ec3230e2f1b2abd34c50b0edaa4b5d Mon Sep 17 00:00:00 2001 From: Sam Bianco Date: Mon, 18 Nov 2024 20:31:37 -0500 Subject: [PATCH 1/7] Implementation, remote tests --- astroquery/mast/missions.py | 362 ++++++++++++++++++++-- astroquery/mast/observations.py | 32 +- astroquery/mast/services.py | 72 +++-- astroquery/mast/tests/test_mast_remote.py | 159 ++++++++++ astroquery/mast/utils.py | 26 ++ docs/mast/mast_obsquery.rst | 4 +- 6 files changed, 586 insertions(+), 69 deletions(-) diff --git a/astroquery/mast/missions.py b/astroquery/mast/missions.py index 0d56fcda11..8dd7886584 100644 --- a/astroquery/mast/missions.py +++ b/astroquery/mast/missions.py @@ -7,17 +7,21 @@ """ import difflib -from json import JSONDecodeError import warnings +from json import JSONDecodeError +from pathlib import Path +from urllib.parse import quote -from astropy.table import Table import astropy.units as u import astropy.coordinates as coord -from requests import RequestException +import numpy as np +from astropy.table import Table, Row, Column, vstack +from requests import HTTPError, RequestException +from astroquery import log from astroquery.utils import commons, async_to_sync from astroquery.utils.class_or_instance import class_or_instance -from astroquery.exceptions import InvalidQueryError, MaxResultsWarning +from astroquery.exceptions import InvalidQueryError, MaxResultsWarning, InputWarning, NoResultsWarning from astroquery.mast import utils from astroquery.mast.core import MastQueryWithLogin @@ -34,18 +38,39 @@ class MastMissionsClass(MastQueryWithLogin): Class that allows direct programmatic access to retrieve metadata via the MAST search API for a given mission. """ - def __init__(self, *, mission='hst', service='search'): + # Static class variables + _search = 'search' + _list_products = 'list_products' + + def __init__(self, *, mission='hst'): super().__init__() + self.dataset_kwds = { # column keywords corresponding to dataset ID + 'hst': 'sci_data_set_name', + 'jwst': 'fileSetName' + } + + # Service attributes + self.service = self._search # current API service + self.service_dict = {self._search: {'path': 'search'}, + self._list_products: {'path': 'list_products'}} + + # Search attributes self._search_option_fields = ['limit', 'offset', 'sort_by', 'search_key', 'sort_desc', 'select_cols', 'skip_count', 'user_fields'] - self.service = service - self.mission = mission - self.limit = 5000 - self.columns = dict() # Info about columns for each mission + self.mission = mission # current mission + self.limit = 5000 # maximum number of results + self.columns = dict() # columns configuration for each mission - service_dict = {self.service: {'path': self.service, 'args': {}}} - self._service_api_connection.set_service_params(service_dict, f"{self.service}/{self.mission}") + @property + def mission(self): + return self._mission + + @mission.setter + def mission(self, value): + # Need to update the service parameters if the mission is changed + self._mission = value + self._service_api_connection.set_service_params(self.service_dict, f'search/{self.mission}') def _parse_result(self, response, *, verbose=False): # Used by the async_to_sync decorator functionality """ @@ -65,7 +90,12 @@ def _parse_result(self, response, *, verbose=False): # Used by the async_to_syn response : `~astropy.table.Table` """ - results = self._service_api_connection._parse_result(response, verbose, data_key='results') + if self.service == self._search: + results = self._service_api_connection._parse_result(response, verbose, data_key='results') + elif self.service == self._list_products: + # Results from list_products endpoint need to be handled differently + results = Table(response.json()['products']) + if len(results) >= self.limit: warnings.warn("Maximum results returned, may not include all sources within radius.", MaxResultsWarning) @@ -105,7 +135,8 @@ def _validate_criteria(self, **criteria): raise InvalidQueryError(error_msg) @class_or_instance - def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offset=0, **criteria): + def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offset=0, + select_cols=None, **criteria): """ Given a sky position and radius, returns a list of matching dataset IDs. @@ -125,6 +156,8 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs offset : int Optional and default is 0 the number of records you wish to skip before selecting records. + select_cols: list, None + Default None. Names of columns that will be included in the astropy table **criteria Other mission-specific criteria arguments. All valid filters can be found using `~astroquery.mast.missions.MastMissionsClass.get_column_list` @@ -137,6 +170,7 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs """ self.limit = limit + self.service = self._search # Check that criteria arguments are valid self._validate_criteria(**criteria) @@ -147,12 +181,17 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs # if radius is just a number we assume degrees radius = coord.Angle(radius, u.arcmin) + # Dataset ID column should always be returned + if select_cols: + select_cols.append(self.dataset_kwds[self.mission]) + # basic params params = {'target': [f"{coordinates.ra.deg} {coordinates.dec.deg}"], 'radius': radius.arcsec, 'radius_units': 'arcseconds', 'limit': limit, - 'offset': offset} + 'offset': offset, + 'select_cols': select_cols} params['conditions'] = [] # adding additional user specified parameters @@ -162,11 +201,11 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs else: params[prop] = value - return self._service_api_connection.service_request_async(self.service, params, use_json=True) + return self._service_api_connection.missions_request_async(self.service, params) @class_or_instance def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u.arcmin, - limit=5000, offset=0, select_cols=[], **criteria): + limit=5000, offset=0, select_cols=None, **criteria): """ Given a set of search criteria, returns a list of mission metadata. @@ -188,8 +227,8 @@ def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u. offset : int Optional and default is 0. the number of records you wish to skip before selecting records. - select_cols: list - names of columns that will be included in the astropy table + select_cols: list, None + Default None. Names of columns that will be included in the astropy table **criteria Criteria to apply. At least one non-positional criterion must be supplied. Valid criteria are coordinates, objectname, radius (as in @@ -205,16 +244,22 @@ def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u. """ self.limit = limit + self.service = self._search # Check that criteria arguments are valid self._validate_criteria(**criteria) + # Parse user input location if objectname or coordinates: coordinates = utils.parse_input_location(coordinates, objectname) # if radius is just a number we assume degrees radius = coord.Angle(radius, u.arcmin) + # Dataset ID column should always be returned + if select_cols: + select_cols.append(self.dataset_kwds[self.mission]) + # build query params = {"limit": self.limit, "offset": offset, 'select_cols': select_cols} if coordinates: @@ -232,10 +277,11 @@ def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u. else: params[prop] = value - return self._service_api_connection.service_request_async(self.service, params, use_json=True) + return self._service_api_connection.missions_request_async(self.service, params) @class_or_instance - def query_object_async(self, objectname, *, radius=3*u.arcmin, limit=5000, offset=0, **criteria): + def query_object_async(self, objectname, *, radius=3*u.arcmin, limit=5000, offset=0, + select_cols=None, **criteria): """ Given an object name, returns a list of matching rows. @@ -254,6 +300,8 @@ def query_object_async(self, objectname, *, radius=3*u.arcmin, limit=5000, offse offset : int Optional and default is 0. the number of records you wish to skip before selecting records. + select_cols: list, None + Default None. Names of columns that will be included in the astropy table **criteria Other mission-specific criteria arguments. All valid filters can be found using `~astroquery.mast.missions.MastMissionsClass.get_column_list` @@ -267,7 +315,279 @@ def query_object_async(self, objectname, *, radius=3*u.arcmin, limit=5000, offse coordinates = utils.resolve_object(objectname) - return self.query_region_async(coordinates, radius=radius, limit=limit, offset=offset, **criteria) + return self.query_region_async(coordinates, radius=radius, limit=limit, offset=offset, + select_cols=select_cols, **criteria) + + @class_or_instance + def get_product_list_async(self, datasets): + """ + Given a dataset ID or list of dataset IDs, returns a list of associated data products. + + To return unique data products, use ``MastMissions.get_unique_product_list``. + + Parameters + ---------- + datasets : str, list, `~astropy.table.Row`, `~astropy.table.Column`, `~astropy.table.Table` + Row/Table of MastMissions query results (e.g. output from `query_object`) + or single/list of dataset ID(s). + + Returns + ------- + response : list of `~requests.Response` + """ + + self.service = self._list_products + + # Extract dataset IDs based on input type and mission + if isinstance(datasets, Table): + datasets = datasets[self.dataset_kwds[self.mission]] + elif isinstance(datasets, Row): + datasets = np.array([datasets[self.dataset_kwds[self.mission]]]) + elif isinstance(datasets, str) or isinstance(datasets, Column): + datasets = np.array([datasets]) + elif isinstance(datasets, list): + datasets = np.array(datasets) + else: + raise TypeError('Unsupported data type for `datasets`. Expected string, ' + 'list of strings, Astropy row, or Astropy Table.') + + # Filter out empty strings from IDs + datasets = datasets[np.char.strip(datasets) != ''] + if datasets.size == 0: + raise InvalidQueryError("Dataset list is empty, no associated products.") + + # Send async service request + params = {'dataset_ids': ','.join(datasets)} + return self._service_api_connection.missions_request_async(self.service, params) + + def get_unique_product_list(self, datasets): + """ + Given a dataset ID or list of dataset IDs, returns a list of associated data products with unique + URIs. + + Parameters + ---------- + datasets : str, list, `~astropy.table.Row`, `~astropy.table.Column`, `~astropy.table.Table` + Row/Table of MastMissions query results (e.g. output from `query_object`) + or single/list of dataset ID(s). + + Returns + ------- + unique_products : `~astropy.table.Table` + Table containing products with unique URIs. + """ + products = self.get_product_list(datasets) + unique_products = utils.remove_duplicate_products(products, 'filename') + if len(unique_products) < len(products): + log.info("To return all products, use `MastMissions.get_product_list`") + return unique_products + + def filter_products(self, products, *, extension=None, **filters): + """ + Filters an `~astropy.table.Table` of mission data products based on given filters. + Parameters + ---------- + products : `~astropy.table.Table` + Table containing data products to be filtered. + extension : string or array, optional + Default is None. Filters by file extension(s), matching any specified extensions. + **filters : + Column-based filters to be applied. + Each keyword corresponds to a column name in the table, with the argument being one or more + acceptable values for that column. AND logic is applied between filters, OR logic within + each filter set. + For example: type="science", extension=["fits","jpg"] + Returns + ------- + response : `~astropy.table.Table` + Filtered Table of data products. + """ + + # Start with a mask of True for all entries + filter_mask = np.full(len(products), True, dtype=bool) + + # Filter by file extension, if provided + if extension: + extensions = [extension] if isinstance(extension, str) else extension + ext_mask = np.array( + [not isinstance(x, np.ma.core.MaskedConstant) and any(x.endswith(ext) for ext in extensions) + for x in products["filename"]], + dtype=bool + ) + filter_mask &= ext_mask + + # Applying column-based filters + for colname, vals in filters.items(): + if colname not in products.colnames: + warnings.warn(f"Column '{colname}' not found in product table.", InputWarning) + continue + + vals = [vals] if isinstance(vals, str) else vals + col_mask = np.isin(products[colname], vals) + filter_mask &= col_mask + + # Return filtered products + return products[filter_mask] + + def download_file(self, uri, *, local_path=None, cache=True, verbose=True): + """ + Downloads a single file based on the data URI. + Parameters + ---------- + uri : str + The product dataURI + local_path : str + Directory or filename to which the file will be downloaded. Defaults to current working directory. + cache : bool + Default is True. If file is found on disk, it will not be downloaded again. + verbose : bool, optional + Default is True. Whether to show download progress in the console. + Returns + ------- + status: str + Download status message. Either COMPLETE, SKIPPED, or ERROR. + msg : str + An error status message, if any. + url : str + The full URL download path. + """ + + # Construct the full data URL + base_url = self._service_api_connection.MISSIONS_DOWNLOAD_URL + self.mission + '/api/v0.1/retrieve_product' + data_url = base_url + '?product_name=' + uri + escaped_url = base_url + '?product_name=' + quote(uri, safe=':') + + # Determine local file path. Use current directory as default. + filename = Path(uri).name + local_path = Path(local_path or filename) + if not local_path.suffix: # Append filename if local path is directory + local_path = local_path / filename + local_path.parent.mkdir(parents=True, exist_ok=True) + + status = 'COMPLETE' + msg = None + url = None + + try: + # Attempt file download + self._download_file(escaped_url, local_path, cache=cache, continuation=False, verbose=verbose) + + # Check if file exists + if not local_path.is_file() and status != 'SKIPPED': + status = 'ERROR' + msg = 'File was not downloaded' + url = data_url + + except HTTPError as err: + status = 'ERROR' + msg = 'HTTPError: {0}'.format(err) + url = data_url + + return status, msg, url + + def _download_files(self, products, base_dir, *, flat=False, cache=True, verbose=True): + """ + Downloads files listed in an `~astropy.table.Table` of data products to a specified directory. + Parameters + ---------- + products : `~astropy.table.Table` + Table containing products to be downloaded. + base_dir : str + Directory in which files will be downloaded. + flat : bool + Default is False. If True, all files are downloaded directly to `base_dir`, and no subdirectories + will be created. + cache : bool + Default is True. If file is found on disk, it will not be downloaded again. + verbose : bool, optional + Default is True. Whether to show download progress in the console. + Returns + ------- + response : `~astropy.table.Table` + Table containing download results for each data product file. + """ + + manifest_entries = [] + base_dir = Path(base_dir) + + for data_product in products: + # Determine local path for each file + local_path = base_dir / data_product['dataset'] if not flat else base_dir + local_path.mkdir(parents=True, exist_ok=True) + local_file_path = local_path / Path(data_product['filename']).name + + # Download files and record status + status, msg, url = self.download_file(data_product['uri'], + local_path=local_file_path, + cache=cache, + verbose=verbose) + manifest_entries.append([local_file_path, status, msg or '', url or '']) + + # Return manifest as Astropy Table + manifest = Table(rows=manifest_entries, names=('Local Path', 'Status', 'Message', 'URL')) + return manifest + + def download_products(self, products, *, download_dir=None, flat=False, + cache=True, extension=None, verbose=True, **filters): + """ + Download specified data products. + Parameters + ---------- + products : str, list, `~astropy.table.Table` + Either a single or list of dataset IDs (e.g., as input for `get_product_list`), + or a Table of products (e.g., as output from `get_product_list`) + download_dir : str or Path, optional + Directory for file downloads. Defaults to current directory. + flat : bool, optional + Default is False. If False, puts files into the standard + directory structure of "mastDownload///". + If True, places files directly in `download_dir` without subdirectories. + cache : bool, optional + Default is True. If file is found on disc, it will not be downloaded again. + extension : string or list, optional + Default is None. Filter by file extension. + verbose : bool, optional + Default is True. Whether to show download progress in the console. + **filters : + Column-based filters to be applied. + Each keyword corresponds to a column name in the table, with the argument being one or more + acceptable values for that column. AND logic is applied between filters, OR logic within + each filter set. + For example: type="science", extension=["fits","jpg"] + Returns + ------- + manifest : `~astropy.table.Table` + A table manifest showing downloaded file locations and statuses. + """ + # Ensure `products` is a Table, collecting products if necessary + if isinstance(products, (str, list)): + products = [products] if isinstance(products, str) else products + products = vstack([self.get_product_list(oid) for oid in products]) + elif isinstance(products, Row): + products = Table(products, masked=True) + + # Apply filters + products = self.filter_products(products, extension=extension, **filters) + + # Remove duplicates + products = utils.remove_duplicate_products(products, 'filename') + + if not len(products): + warnings.warn("No products to download.", NoResultsWarning) + return + + # Set up base directory for downloads + download_dir = Path(download_dir or '.') + base_dir = download_dir if flat else download_dir / 'mastDownload' / self.mission + + # Download files + manifest = self._download_files(products, + base_dir=base_dir, + flat=flat, + cache=cache, + verbose=verbose) + + return manifest @class_or_instance def get_column_list(self): diff --git a/astroquery/mast/observations.py b/astroquery/mast/observations.py index a0e595c6ba..ef1bfd1e49 100644 --- a/astroquery/mast/observations.py +++ b/astroquery/mast/observations.py @@ -19,7 +19,7 @@ import astropy.units as u import astropy.coordinates as coord -from astropy.table import Table, Row, unique, vstack +from astropy.table import Table, Row, vstack from astroquery import log from astroquery.mast.cloud import CloudAccess @@ -816,7 +816,7 @@ def download_products(self, products, *, download_dir=None, flat=False, products = self.filter_products(products, mrp_only=mrp_only, **filters) # remove duplicate products - products = self._remove_duplicate_products(products) + products = utils.remove_duplicate_products(products, 'dataURI') if not len(products): warnings.warn("No products to download.", NoResultsWarning) @@ -928,7 +928,7 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa return # Remove duplicate products - data_products = self._remove_duplicate_products(data_products) + data_products = utils.remove_duplicate_products(data_products, 'dataURI') return self._cloud_connection.get_cloud_uri_list(data_products, include_bucket, full_url) @@ -966,30 +966,6 @@ def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False): # Query for product URIs return self._cloud_connection.get_cloud_uri(data_product, include_bucket, full_url) - def _remove_duplicate_products(self, data_products): - """ - Removes duplicate data products that have the same dataURI. - - Parameters - ---------- - data_products : `~astropy.table.Table` - Table containing products to be checked for duplicates. - - Returns - ------- - unique_products : `~astropy.table.Table` - Table containing products with unique dataURIs. - - """ - number = len(data_products) - unique_products = unique(data_products, keys="dataURI") - number_unique = len(unique_products) - if number_unique < number: - log.info(f"{number - number_unique} of {number} products were duplicates. " - f"Only returning {number_unique} unique product(s).") - - return unique_products - def get_unique_product_list(self, observations): """ Given a "Product Group Id" (column name obsid), returns a list of associated data products with @@ -1009,7 +985,7 @@ def get_unique_product_list(self, observations): Table containing products with unique dataURIs. """ products = self.get_product_list(observations) - unique_products = self._remove_duplicate_products(products) + unique_products = utils.remove_duplicate_products(products, 'dataURI') if len(unique_products) < len(products): log.info("To return all products, use `Observations.get_product_list`") return unique_products diff --git a/astroquery/mast/services.py b/astroquery/mast/services.py index 7ca19faaca..8ba79aac43 100644 --- a/astroquery/mast/services.py +++ b/astroquery/mast/services.py @@ -112,6 +112,7 @@ class ServiceAPI(BaseQuery): SERVICE_URL = conf.server REQUEST_URL = conf.server + "/api/v0.1/" + MISSIONS_DOWNLOAD_URL = conf.server + "/search/" SERVICES = {} def __init__(self, session=None): @@ -122,6 +123,8 @@ def __init__(self, session=None): self.TIMEOUT = conf.timeout + self._column_configs = {} # Dict to hold column configurations for services + def set_service_params(self, service_dict, service_name="", server_prefix=False): """ Initialize the request url and available queries for a given service. @@ -270,27 +273,28 @@ def service_request_async(self, service, params, pagesize=None, page=None, use_j request_url = self.REQUEST_URL + service_url.format(**compiled_service_args) + # Default headers headers = { 'User-Agent': self._session.headers['User-Agent'], 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json' } + # Params as a list of tuples to allow for multiple parameters added catalogs_request = [] - if not page: - page = params.pop('page', None) - if not pagesize: - pagesize = params.pop('pagesize', None) + page = page or params.pop('page', None) + pagesize = pagesize or params.pop('pagesize', None) + # Add pagination if specified if page is not None: catalogs_request.append(('page', page)) if pagesize is not None: catalogs_request.append(('pagesize', pagesize)) + # Populate parameters based on `use_json` if not use_json: - # Decompose filters, sort - for prop, value in kwargs.items(): - params[prop] = value + # When not using JSON, merge kwargs into params and build query + params.update(kwargs) catalogs_request.extend(self._build_catalogs_params(params)) else: headers['Content-Type'] = 'application/json' @@ -307,9 +311,10 @@ def service_request_async(self, service, params, pagesize=None, page=None, use_j catalogs_request = params_dict # Removing single-element lists. Single values will live on their own (except for `sort_by`) - for key in catalogs_request.keys(): - if (key != 'sort_by') & (len(catalogs_request[key]) == 1): - catalogs_request[key] = catalogs_request[key][0] + catalogs_request = { + k: v if k == 'sort_by' or len(v) > 1 else v[0] + for k, v in params_dict.items() + } # Otherwise, catalogs_request can remain as the original params dict else: @@ -318,6 +323,43 @@ def service_request_async(self, service, params, pagesize=None, page=None, use_j response = self._request('POST', request_url, data=catalogs_request, headers=headers, use_json=use_json) return response + @class_or_instance + def missions_request_async(self, service, params): + """ + Builds and executes an asynchronous query to the MAST Search API. + Parameters + ---------- + service : str + The MAST Search API service to query. Should be present in self.SERVICES. + params : dict + JSON object containing service parameters. + Returns + ------- + response : list of `~requests.Response` + """ + service_config = self.SERVICES.get(service.lower()) + request_url = self.REQUEST_URL + service_config.get('path') + + # Default headers + headers = { + 'User-Agent': self._session.headers['User-Agent'], + 'Content-Type': 'application/json', + 'Accept': 'application/json' + } + + # Determine request method and payload based on service + method = 'POST' if service == 'search' else 'GET' + data, params = (params, None) if method == 'POST' else (None, params) + + # make request + response = self._request(method=method, + url=request_url, + params=params, + data=data, + headers=headers, + use_json=True) + return response + def _build_catalogs_params(self, params): """ Gathers parameters for Catalogs.MAST usage and translates to valid API syntax tuples @@ -387,12 +429,6 @@ def check_catalogs_criteria_params(self, criteria): response : boolean Whether the passed dict has at least one criteria parameter """ - criteria_check = False - non_criteria_params = ["columns", "sort_by", "page_size", "pagesize", "page"] - criteria_keys = criteria.keys() - for key in criteria_keys: - if key not in non_criteria_params: - criteria_check = True - break - return criteria_check + non_criteria_params = ["columns", "sort_by", "page_size", "pagesize", "page"] + return any(key not in non_criteria_params for key in criteria) diff --git a/astroquery/mast/tests/test_mast_remote.py b/astroquery/mast/tests/test_mast_remote.py index 08f960c647..b47905cf19 100644 --- a/astroquery/mast/tests/test_mast_remote.py +++ b/astroquery/mast/tests/test_mast_remote.py @@ -154,6 +154,165 @@ def test_missions_query_criteria_invalid_keyword(self): MastMissions.query_criteria(search_position='30 30') assert 'search_pos' in str(err_with_alt.value) + def test_missions_get_product_list_async(self): + datasets = MastMissions.query_object("M4", radius=0.1) + + # Table as input + responses = MastMissions.get_product_list_async(datasets[:3]) + assert isinstance(responses, Response) + + # Row as input + responses = MastMissions.get_product_list_async(datasets[0]) + assert isinstance(responses, Response) + + # String as input + responses = MastMissions.get_product_list_async(datasets[0]['sci_data_set_name']) + assert isinstance(responses, Response) + + # Column as input + responses = MastMissions.get_product_list_async(datasets[:3]['sci_data_set_name']) + assert isinstance(responses, Response) + + # Unsupported data type for datasets + with pytest.raises(TypeError) as err_type: + MastMissions.get_product_list_async(1) + assert 'Unsupported data type' in str(err_type.value) + + # Empty dataset list + with pytest.raises(InvalidQueryError) as err_empty: + MastMissions.get_product_list_async([' ']) + assert 'Dataset list is empty' in str(err_empty.value) + + def test_missions_get_product_list(self): + datasets = MastMissions.query_object("M4", radius=0.1) + test_dataset = datasets[0]['sci_data_set_name'] + multi_dataset = list(datasets[:2]['sci_data_set_name']) + + # Compare Row input and string input + result1 = MastMissions.get_product_list(test_dataset) + result2 = MastMissions.get_product_list(datasets[0]) + assert isinstance(result1, Table) + assert len(result1) == len(result2) + assert set(result1['filename']) == set(result2['filename']) + + # Compare Table input and list input + result1 = MastMissions.get_product_list(multi_dataset) + result2 = MastMissions.get_product_list(datasets[:2]) + assert isinstance(result1, Table) + assert len(result1) == len(result2) + assert set(result1['filename']) == set(result2['filename']) + + # Filter datasets based on sci_data_set_name and verify products + filtered = datasets[datasets['sci_data_set_name'] == 'IBKH03020'] + result = MastMissions.get_product_list(filtered) + assert isinstance(result, Table) + assert (result['dataset'] == 'IBKH03020').all() + + def test_missions_get_unique_product_list(self, caplog): + # Check that no rows are filtered out when all products are unique + dataset_ids = ['JBTAA8010'] + products = MastMissions.get_product_list(dataset_ids) + unique_products = MastMissions.get_unique_product_list(dataset_ids) + + # Should have the same length + assert len(products) == len(unique_products) + # No INFO messages should be logged + with caplog.at_level('INFO', logger='astroquery'): + assert caplog.text == '' + + # Check that rows are filtered out when products are not unique + dataset_ids.append('JBTAA8020') + products = MastMissions.get_product_list(dataset_ids) + unique_products = MastMissions.get_unique_product_list(dataset_ids) + + # Unique product list should have fewer rows + assert len(products) > len(unique_products) + # Rows should be unique based on dataURI + assert (unique_products == unique(unique_products, keys='filename')).all() + # Check that INFO messages were logged + with caplog.at_level('INFO', logger='astroquery'): + assert 'products were duplicates' in caplog.text + assert 'To return all products' in caplog.text + + def test_missions_filter_products(self): + # Filter by extension + products = MastMissions.get_product_list('W0FX0301T') + filtered = MastMissions.filter_products(products, + extension='jpg') + assert isinstance(filtered, Table) + assert all(filename.endswith('.jpg') for filename in filtered['filename']) + + # Filter by existing column + filtered = MastMissions.filter_products(products, + category='CALIBRATED') + assert isinstance(filtered, Table) + assert all(filtered['category'] == 'CALIBRATED') + + # Filter by non-existing column + with pytest.warns(InputWarning): + filtered = MastMissions.filter_products(products, + invalid=True) + + def test_missions_download_products(self, tmp_path): + def check_filepath(path): + assert path.is_file() + + # Check string input + test_dataset_id = 'Z14Z0104T' + result = MastMissions.download_products(test_dataset_id, + download_dir=tmp_path) + for row in result: + if row['Status'] == 'COMPLETE': + check_filepath(row['Local Path']) + + # Check Row input + datasets = MastMissions.query_object("M4", radius=0.1) + prods = MastMissions.get_product_list(datasets[0])[0] + result = MastMissions.download_products(prods, + download_dir=tmp_path) + check_filepath(result['Local Path'][0]) + + # Warn about no products + with pytest.warns(NoResultsWarning): + result = MastMissions.download_products(test_dataset_id, + extension='jpg', + download_dir=tmp_path) + + def test_missions_download_products_flat(self, tmp_path): + # Download products without creating subdirectories + result = MastMissions.download_products('Z14Z0104T', + flat=True, + download_dir=tmp_path) + for row in result: + if row['Status'] == 'COMPLETE': + assert row['Local Path'].parent == tmp_path + + def test_missions_download_file(self, tmp_path): + def check_result(result, path): + assert result == ('COMPLETE', None, None) + assert path.is_file() + + # Get URI from data product + product = MastMissions.get_product_list('Z14Z0104T')[0] + uri = product['uri'] + filename = Path(uri).name + + # Download with unspecified local_path + # Should download to current working directory + result = MastMissions.download_file(uri) + check_result(result, Path(os.getcwd(), filename)) + Path.unlink(filename) # clean up file + + # Download with directory as local_path parameter + local_path = Path(tmp_path, filename) + result = MastMissions.download_file(uri, local_path=tmp_path) + check_result(result, local_path) + + # Download with filename as local_path parameter + local_path_file = Path(tmp_path, 'test.fits') + result = MastMissions.download_file(uri, local_path=local_path_file) + check_result(result, local_path_file) + ################### # MastClass tests # ################### diff --git a/astroquery/mast/utils.py b/astroquery/mast/utils.py index 56bfa5810a..cfe5ccba85 100644 --- a/astroquery/mast/utils.py +++ b/astroquery/mast/utils.py @@ -14,7 +14,9 @@ from urllib import parse import astropy.coordinates as coord +from astropy.table import unique +from .. import log from ..version import version from ..exceptions import ResolverError, InvalidQueryError from ..utils import commons @@ -211,3 +213,27 @@ def _split_list_into_chunks(input_list, chunk_size): """Helper function for `mast_relative_path`.""" for idx in range(0, len(input_list), chunk_size): yield input_list[idx:idx + chunk_size] + + +def remove_duplicate_products(data_products, uri_key): + """ + Removes duplicate data products that have the same data URI. + Parameters + ---------- + data_products : `~astropy.table.Table` + Table containing products to be checked for duplicates. + uri_key : str + Column name representing the URI of a product. + Returns + ------- + unique_products : `~astropy.table.Table` + Table containing products with unique dataURIs. + """ + number = len(data_products) + unique_products = unique(data_products, keys=uri_key) + number_unique = len(unique_products) + if number_unique < number: + log.info(f"{number - number_unique} of {number} products were duplicates. " + f"Only returning {number_unique} unique product(s).") + + return unique_products diff --git a/docs/mast/mast_obsquery.rst b/docs/mast/mast_obsquery.rst index c827ed2b6b..abf3ee0e76 100644 --- a/docs/mast/mast_obsquery.rst +++ b/docs/mast/mast_obsquery.rst @@ -294,7 +294,7 @@ To return only unique data products for an observation, use `~astroquery.mast.Ob ... proposal_id=['12062'], ... dataRights='PUBLIC') >>> unique_products = Observations.get_unique_product_list(obs) - INFO: 180 of 370 products were duplicates. Only returning 190 unique product(s). [astroquery.mast.observations] + INFO: 180 of 370 products were duplicates. Only returning 190 unique product(s). [astroquery.mast.utils] INFO: To return all products, use `Observations.get_product_list` [astroquery.mast.observations] >>> print(unique_products[:10]['dataURI']) dataURI @@ -503,7 +503,7 @@ This approach is recommended for code brevity. Query criteria are supplied as ke ... proposal_id=['12062'], ... dataRights='PUBLIC', ... filter_products={'productSubGroupDescription': 'DRZ'}) - INFO: 2 of 4 products were duplicates. Only returning 2 unique product(s). [astroquery.mast.observations] + INFO: 2 of 4 products were duplicates. Only returning 2 unique product(s). [astroquery.mast.utils] >>> print(s3_uris) ['s3://stpubdata/hst/public/jbev/jbeveo010/jbeveo010_drz.fits', 's3://stpubdata/hst/public/jbev/jbevet010/jbevet010_drz.fits'] >>> Observations.disable_cloud_dataset() From 79e76f4b0583ccd735975ca5378702a17256c4cf Mon Sep 17 00:00:00 2001 From: Sam Bianco Date: Tue, 19 Nov 2024 15:45:54 -0500 Subject: [PATCH 2/7] More tests, download from HLSPs --- astroquery/mast/missions.py | 30 +++- astroquery/mast/services.py | 9 +- astroquery/mast/tests/data/README.rst | 11 ++ .../mast/tests/data/mission_products.json | 79 ++++++++++ astroquery/mast/tests/test_mast.py | 142 +++++++++++++++++- astroquery/mast/tests/test_mast_remote.py | 28 +++- 6 files changed, 287 insertions(+), 12 deletions(-) create mode 100644 astroquery/mast/tests/data/mission_products.json diff --git a/astroquery/mast/missions.py b/astroquery/mast/missions.py index 8dd7886584..7cccf6d82e 100644 --- a/astroquery/mast/missions.py +++ b/astroquery/mast/missions.py @@ -42,12 +42,19 @@ class MastMissionsClass(MastQueryWithLogin): _search = 'search' _list_products = 'list_products' + # Workaround so that observation_id is returned in ULLYSES queries that do not specify columns + _default_ulysses_cols = ['target_name_ulysses', 'target_classification', 'targ_ra', 'targ_dec', 'host_galaxy_name', + 'spectral_type', 'bmv0_mag', 'u_mag', 'b_mag', 'v_mag', 'gaia_g_mean_mag', 'star_mass', + 'instrument', 'grating', 'filter', 'observation_id'] + def __init__(self, *, mission='hst'): super().__init__() self.dataset_kwds = { # column keywords corresponding to dataset ID 'hst': 'sci_data_set_name', - 'jwst': 'fileSetName' + 'jwst': 'fileSetName', + 'classy': 'Target', + 'ullyses': 'observation_id' } # Service attributes @@ -69,7 +76,7 @@ def mission(self): @mission.setter def mission(self, value): # Need to update the service parameters if the mission is changed - self._mission = value + self._mission = value.lower() self._service_api_connection.set_service_params(self.service_dict, f'search/{self.mission}') def _parse_result(self, response, *, verbose=False): # Used by the async_to_sync decorator functionality @@ -184,6 +191,8 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs # Dataset ID column should always be returned if select_cols: select_cols.append(self.dataset_kwds[self.mission]) + elif self.mission == 'ullyses': + select_cols = self._default_ulysses_cols # basic params params = {'target': [f"{coordinates.ra.deg} {coordinates.dec.deg}"], @@ -259,6 +268,8 @@ def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u. # Dataset ID column should always be returned if select_cols: select_cols.append(self.dataset_kwds[self.mission]) + elif self.mission == 'ullyses': + select_cols = self._default_ulysses_cols # build query params = {"limit": self.limit, "offset": offset, 'select_cols': select_cols} @@ -452,10 +463,17 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True): The full URL download path. """ - # Construct the full data URL - base_url = self._service_api_connection.MISSIONS_DOWNLOAD_URL + self.mission + '/api/v0.1/retrieve_product' - data_url = base_url + '?product_name=' + uri - escaped_url = base_url + '?product_name=' + quote(uri, safe=':') + # Construct the full data URL based on mission + if self.mission in ['hst', 'jwst']: + # HST and JWST have a dedicated endpoint for retrieving products + base_url = self._service_api_connection.MISSIONS_DOWNLOAD_URL + self.mission + '/api/v0.1/retrieve_product' + keyword = 'product_name' + else: + # HLSPs use MAST download URL + base_url = self._service_api_connection.MAST_DOWNLOAD_URL + keyword = 'uri' + data_url = base_url + f'?{keyword}=' + uri + escaped_url = base_url + f'?{keyword}=' + quote(uri, safe='') # Determine local file path. Use current directory as default. filename = Path(uri).name diff --git a/astroquery/mast/services.py b/astroquery/mast/services.py index 8ba79aac43..7a20a70baf 100644 --- a/astroquery/mast/services.py +++ b/astroquery/mast/services.py @@ -14,6 +14,7 @@ from astropy.table import Table, MaskedColumn from astropy.utils.decorators import deprecated_renamed_argument +from .. import log from ..query import BaseQuery from ..utils import async_to_sync from ..utils.class_or_instance import class_or_instance @@ -84,7 +85,12 @@ def _json_to_table(json_obj, data_key='data'): col_data = np.array([x[idx] for x in json_obj[data_key]], dtype=object) except KeyError: # it's not a data array, fall back to using column name as it is array of dictionaries - col_data = np.array([x[col_name] for x in json_obj[data_key]], dtype=object) + try: + col_data = np.array([x[col_name] for x in json_obj[data_key]], dtype=object) + except KeyError: + # Skip column names not found in data + log.debug('Column %s was not found in data. Skipping...', col_name) + continue if ignore_value is not None: col_data[np.where(np.equal(col_data, None))] = ignore_value @@ -113,6 +119,7 @@ class ServiceAPI(BaseQuery): SERVICE_URL = conf.server REQUEST_URL = conf.server + "/api/v0.1/" MISSIONS_DOWNLOAD_URL = conf.server + "/search/" + MAST_DOWNLOAD_URL = conf.server + "/api/v0.1/Download/file" SERVICES = {} def __init__(self, session=None): diff --git a/astroquery/mast/tests/data/README.rst b/astroquery/mast/tests/data/README.rst index f15b6a1ad9..42001794db 100644 --- a/astroquery/mast/tests/data/README.rst +++ b/astroquery/mast/tests/data/README.rst @@ -25,3 +25,14 @@ To generate `~astroquery.mast.tests.data.panstarrs_columns.json`, use the follow >>> resp = utils._simple_request('https://catalogs.mast.stsci.edu/api/v0.1/panstarrs/dr2/mean/metadata.json') >>> with open('panstarrs_columns.json', 'w') as file: ... json.dump(resp.json(), file, indent=4) # doctest: +SKIP + +To generate `~astroquery.mast.tests.data.mission_products.json`, use the following: + +.. doctest-remote-data:: + + >>> import json + >>> from astroquery.mast import utils + ... + >>> resp = utils._simple_request('https://mast.stsci.edu/search/hst/api/v0.1/list_products', {'dataset_ids': 'Z14Z0104T'}) + >>> with open('panstarrs_columns.json', 'w') as file: + ... json.dump(resp.json(), file, indent=4) # doctest: +SKIP diff --git a/astroquery/mast/tests/data/mission_products.json b/astroquery/mast/tests/data/mission_products.json new file mode 100644 index 0000000000..68873d1e1c --- /dev/null +++ b/astroquery/mast/tests/data/mission_products.json @@ -0,0 +1,79 @@ +{ + "products": [ + { + "product_key": "Z14Z0104T_z14z0104t_shf.fits", + "access": "PUBLIC", + "dataset": "Z14Z0104T", + "instrument_name": "HRS ", + "filters": "MIRROR-N2", + "filename": "z14z0104t_shf.fits", + "uri": "Z14Z0104T/z14z0104t_shf.fits", + "authz_primary_identifier": "Z14Z0104T", + "authz_secondary_identifier": "CAL", + "file_suffix": "SHF", + "category": "UNCALIBRATED", + "size": 31680, + "type": "science" + }, + { + "product_key": "Z14Z0104T_z14z0104t_trl.fits", + "access": "PUBLIC", + "dataset": "Z14Z0104T", + "instrument_name": "HRS ", + "filters": "MIRROR-N2", + "filename": "z14z0104t_trl.fits", + "uri": "Z14Z0104T/z14z0104t_trl.fits", + "authz_primary_identifier": "Z14Z0104T", + "authz_secondary_identifier": "CAL", + "file_suffix": "TRL", + "category": "AUX", + "size": 17280, + "type": "science" + }, + { + "product_key": "Z14Z0104T_z14z0104t_ulf.fits", + "access": "PUBLIC", + "dataset": "Z14Z0104T", + "instrument_name": "HRS ", + "filters": "MIRROR-N2", + "filename": "z14z0104t_ulf.fits", + "uri": "Z14Z0104T/z14z0104t_ulf.fits", + "authz_primary_identifier": "Z14Z0104T", + "authz_secondary_identifier": "CAL", + "file_suffix": "ULF", + "category": "UNCALIBRATED", + "size": 14400, + "type": "science" + }, + { + "product_key": "Z14Z0104T_z14z0104t_pdq.fits", + "access": "PUBLIC", + "dataset": "Z14Z0104T", + "instrument_name": "HRS ", + "filters": "MIRROR-N2", + "filename": "z14z0104t_pdq.fits", + "uri": "Z14Z0104T/z14z0104t_pdq.fits", + "authz_primary_identifier": "Z14Z0104T", + "authz_secondary_identifier": "PDQ", + "file_suffix": "PDQ", + "category": "AUX", + "size": 11520, + "type": "science" + }, + { + "product_key": "Z14Z0104T_z14z0104x_ocx.fits", + "access": "PUBLIC", + "dataset": "Z14Z0104T", + "instrument_name": "HRS ", + "filters": "MIRROR-N2", + "filename": "z14z0104x_ocx.fits", + "uri": "Z14Z0104T/z14z0104x_ocx.fits", + "authz_primary_identifier": "Z14Z0104X", + "authz_secondary_identifier": "OCX", + "file_suffix": "OCX", + "category": "OTHER", + "size": 11520, + "type": "science" + } + ] +} \ No newline at end of file diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index 282d9f0648..6616e33f06 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -7,7 +7,7 @@ import pytest -from astropy.table import Table +from astropy.table import Table, unique from astropy.coordinates import SkyCoord from astropy.io import fits @@ -15,7 +15,7 @@ from astroquery.mast.services import _json_to_table from astroquery.utils.mocks import MockResponse -from astroquery.exceptions import InvalidQueryError, InputWarning +from astroquery.exceptions import InvalidQueryError, InputWarning, MaxResultsWarning, NoResultsWarning from astroquery import mast @@ -23,6 +23,7 @@ 'Mast.Name.Lookup': 'resolver.json', 'mission_search_results': 'mission_results.json', 'mission_columns': 'mission_columns.json', + 'mission_products': 'mission_products.json', 'columnsconfig': 'columnsconfig.json', 'ticcolumns': 'ticcolumns.json', 'ticcol_filtered': 'ticcolumns_filtered.json', @@ -72,6 +73,7 @@ def patch_post(request): mp.setattr(mast.Observations, '_download_file', download_mockreturn) mp.setattr(mast.Observations, 'download_file', download_mockreturn) mp.setattr(mast.Catalogs, '_download_file', download_mockreturn) + mp.setattr(mast.MastMissions, '_download_file', download_mockreturn) mp.setattr(mast.Tesscut, '_download_file', tesscut_download_mockreturn) mp.setattr(mast.Zcut, '_download_file', zcut_download_mockreturn) @@ -108,7 +110,7 @@ def post_mockreturn(self, method="POST", url=None, data=None, timeout=10, **kwar return [MockResponse(content)] -def service_mockreturn(self, method="POST", url=None, data=None, timeout=10, use_json=False, **kwargs): +def service_mockreturn(self, method="POST", url=None, data=None, params=None, timeout=10, use_json=False, **kwargs): if "panstarrs" in url: filename = data_path(DATA_FILES["panstarrs"]) elif "tesscut" in url: @@ -121,6 +123,8 @@ def service_mockreturn(self, method="POST", url=None, data=None, timeout=10, use filename = data_path(DATA_FILES['z_survey']) else: filename = data_path(DATA_FILES['z_cutout_fit']) + elif use_json and 'list_products' in url: + filename = data_path(DATA_FILES['mission_products']) elif use_json and data['radius'] == 300: filename = data_path(DATA_FILES["mission_incorrect_results"]) elif use_json: @@ -211,7 +215,9 @@ def test_missions_query_object(patch_post): def test_missions_query_region(patch_post): - result = mast.MastMissions.query_region(regionCoords, radius=0.002 * u.deg) + result = mast.MastMissions.query_region(regionCoords, + radius=0.002 * u.deg, + select_cols=['sci_pep_id']) assert isinstance(result, Table) assert len(result) > 0 @@ -242,6 +248,134 @@ def test_missions_query_criteria_async_with_missing_results(patch_post): _json_to_table(json.loads(responses), 'results') +def test_missions_query_criteria(patch_post): + result = mast.MastMissions.query_criteria( + coordinates=regionCoords, + radius=3, + sci_pep_id=12556, + sci_obs_type='SPECTRUM', + sci_instrume='stis,acs,wfc3,cos,fos,foc,nicmos,ghrs', + sci_aec='S', + select_cols=['sci_pep_id', 'sci_instrume'] + ) + assert isinstance(result, Table) + assert len(result) > 0 + + # Raise error if non-positional criteria is not supplied + with pytest.raises(InvalidQueryError): + mast.MastMissions.query_criteria( + coordinates=regionCoords, + radius=3 + ) + + # Raise error if invalid criteria is supplied + with pytest.raises(InvalidQueryError): + mast.MastMissions.query_criteria( + coordinates=regionCoords, + invalid=True + ) + + # Maximum results warning + with pytest.warns(MaxResultsWarning): + mast.MastMissions.query_criteria( + coordinates=regionCoords, + sci_aec='S', + limit=1 + ) + + +def test_missions_get_product_list_async(patch_post): + # String input + result = mast.MastMissions.get_product_list_async('Z14Z0104T') + assert isinstance(result, MockResponse) + + # List input + in_datasets = ['Z14Z0104T', 'Z14Z0102T'] + result = mast.MastMissions.get_product_list_async(in_datasets) + assert isinstance(result, MockResponse) + + # Row input + datasets = mast.MastMissions.query_object("M101", radius=".002 deg") + result = mast.MastMissions.get_product_list_async(datasets[:3]) + assert isinstance(result, MockResponse) + + # Table input + result = mast.MastMissions.get_product_list_async(datasets[0]) + assert isinstance(result, MockResponse) + + # Unsupported data type for datasets + with pytest.raises(TypeError) as err_type: + mast.MastMissions.get_product_list_async(1) + assert 'Unsupported data type' in str(err_type.value) + + # Empty dataset list + with pytest.raises(InvalidQueryError) as err_empty: + mast.MastMissions.get_product_list_async([' ']) + assert 'Dataset list is empty' in str(err_empty.value) + + +def test_missions_get_product_list(patch_post): + # String input + result = mast.MastMissions.get_product_list('Z14Z0104T') + assert isinstance(result, Table) + + # List input + in_datasets = ['Z14Z0104T', 'Z14Z0102T'] + result = mast.MastMissions.get_product_list(in_datasets) + assert isinstance(result, Table) + + # Row input + datasets = mast.MastMissions.query_object("M101", radius=".002 deg") + result = mast.MastMissions.get_product_list(datasets[:3]) + assert isinstance(result, Table) + + # Table input + result = mast.MastMissions.get_product_list(datasets[0]) + assert isinstance(result, Table) + + +def test_missions_get_unique_product_list(patch_post, caplog): + unique_products = mast.MastMissions.get_unique_product_list('Z14Z0104T') + assert isinstance(unique_products, Table) + assert (unique_products == unique(unique_products, keys='filename')).all() + # No INFO messages should be logged + with caplog.at_level('INFO', logger='astroquery'): + assert caplog.text == '' + + +def test_missions_filter_products(patch_post): + # Filter products list by column + products = mast.MastMissions.get_product_list('Z14Z0104T') + filtered = mast.MastMissions.filter_products(products, + category='CALIBRATED') + assert isinstance(filtered, Table) + assert all(filtered['category'] == 'CALIBRATED') + + # Filter by non-existing column + with pytest.warns(InputWarning): + mast.MastMissions.filter_products(products, + invalid=True) + + +def test_missions_download_products(patch_post, tmp_path): + # Check string input + test_dataset_id = 'Z14Z0104T' + result = mast.MastMissions.download_products(test_dataset_id, + download_dir=tmp_path) + assert isinstance(result, Table) + + # Check Row input + prods = mast.MastMissions.get_product_list('Z14Z0104T') + result = mast.MastMissions.download_products(prods[0], + download_dir=tmp_path) + assert isinstance(result, Table) + + # Warn about no products + with pytest.warns(NoResultsWarning): + result = mast.MastMissions.download_products(test_dataset_id, + extension='jpg', + download_dir=tmp_path) + ################### # MastClass tests # ################### diff --git a/astroquery/mast/tests/test_mast_remote.py b/astroquery/mast/tests/test_mast_remote.py index b47905cf19..a5272c2efb 100644 --- a/astroquery/mast/tests/test_mast_remote.py +++ b/astroquery/mast/tests/test_mast_remote.py @@ -227,7 +227,7 @@ def test_missions_get_unique_product_list(self, caplog): # Unique product list should have fewer rows assert len(products) > len(unique_products) - # Rows should be unique based on dataURI + # Rows should be unique based on filename assert (unique_products == unique(unique_products, keys='filename')).all() # Check that INFO messages were logged with caplog.at_level('INFO', logger='astroquery'): @@ -313,6 +313,32 @@ def check_result(result, path): result = MastMissions.download_file(uri, local_path=local_path_file) check_result(result, local_path_file) + @pytest.mark.parametrize("mission, query_params", [ + ('jwst', {'fileSetName': 'jw01189001001_02101_00001'}), + ('classy', {'target': 'J0021+0052'}), + ('ullyses', {'host_galaxy_name': 'WLM', 'select_cols': ['observation_id']}) + ]) + def test_missions_workflow(self, tmp_path, mission, query_params): + # Test workflow with other missions + m = MastMissions(mission=mission) + + # Criteria query + datasets = m.query_criteria(**query_params) + assert isinstance(datasets, Table) + assert len(datasets) + + # Get products + prods = m.get_product_list(datasets[0]) + assert isinstance(prods, Table) + assert len(prods) + + # Download products + result = m.download_products(prods[:3], + download_dir=tmp_path) + for row in result: + if row['Status'] == 'COMPLETE': + assert (row['Local Path']).is_file() + ################### # MastClass tests # ################### From e9b2eddbccb59f022c77e39358f6151540a8ef4e Mon Sep 17 00:00:00 2001 From: Sam Bianco Date: Mon, 25 Nov 2024 16:58:50 -0600 Subject: [PATCH 3/7] Authentication --- astroquery/mast/missions.py | 17 ++++++++++++++--- astroquery/mast/tests/test_mast.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/astroquery/mast/missions.py b/astroquery/mast/missions.py index 7cccf6d82e..af6b4e541e 100644 --- a/astroquery/mast/missions.py +++ b/astroquery/mast/missions.py @@ -47,8 +47,8 @@ class MastMissionsClass(MastQueryWithLogin): 'spectral_type', 'bmv0_mag', 'u_mag', 'b_mag', 'v_mag', 'gaia_g_mean_mag', 'star_mass', 'instrument', 'grating', 'filter', 'observation_id'] - def __init__(self, *, mission='hst'): - super().__init__() + def __init__(self, *, mission='hst', mast_token=None): + super().__init__(mast_token=mast_token) self.dataset_kwds = { # column keywords corresponding to dataset ID 'hst': 'sci_data_set_name', @@ -497,8 +497,19 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True): url = data_url except HTTPError as err: + if err.response.status_code == 401: + no_auth_msg = f'You are not authorized to download from {data_url}.' + if self._authenticated: + no_auth_msg += ('\nPlease check your authentication token. You can generate a new ' + 'token at https://auth.mast.stsci.edu/token?suggested_name=Astroquery&' + 'suggested_scope=mast:exclusive_access') + else: + no_auth_msg += ('\nPlease authenticate yourself using the `~astroquery.mast.MastMissions.login` ' + 'function or initialize `~astroquery.mast.MastMissions` with an authentication ' + 'token.') + log.warning(no_auth_msg) status = 'ERROR' - msg = 'HTTPError: {0}'.format(err) + msg = f'HTTPError: {err}' url = data_url return status, msg, url diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index 6616e33f06..caf8333df7 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -12,6 +12,7 @@ from astropy.io import fits import astropy.units as u +from requests import HTTPError, Response from astroquery.mast.services import _json_to_table from astroquery.utils.mocks import MockResponse @@ -147,6 +148,11 @@ def request_mockreturn(url, params={}): def download_mockreturn(*args, **kwargs): + if 'unauthorized' in args[0]: + response = Response() + response.reason = 'Unauthorized' + response.status_code = 401 + raise HTTPError(response=response) return ('COMPLETE', None, None) @@ -376,6 +382,30 @@ def test_missions_download_products(patch_post, tmp_path): extension='jpg', download_dir=tmp_path) + +def test_missions_download_no_auth(patch_post, caplog): + # Exclusive access products should not be downloaded if user is not authenticated + # User is not authenticated + uri = 'unauthorized.fits' + result = mast.MastMissions.download_file(uri) + assert result[0] == 'ERROR' + assert 'HTTPError' in result[1] + with caplog.at_level('WARNING', logger='astroquery'): + assert 'You are not authorized to download' in caplog.text + assert 'Please authenticate yourself' in caplog.text + caplog.clear() + + # User is authenticated, but doesn't have proper permissions + test_token = "56a9cf3df4c04052atest43feb87f282" + mast.MastMissions.login(token=test_token) + result = mast.MastMissions.download_file(uri) + assert result[0] == 'ERROR' + assert 'HTTPError' in result[1] + with caplog.at_level('WARNING', logger='astroquery'): + assert 'You are not authorized to download' in caplog.text + assert 'Please check your authentication token' in caplog.text + + ################### # MastClass tests # ################### From 1248c4c6e90ea23cd7e75779a4b0da5c8f3752ce Mon Sep 17 00:00:00 2001 From: Sam Bianco Date: Wed, 18 Dec 2024 12:21:22 -0500 Subject: [PATCH 4/7] post endpoint, documentation --- astroquery/mast/missions.py | 60 ++++--- astroquery/mast/services.py | 7 +- astroquery/mast/tests/test_mast.py | 24 +++ docs/mast/mast_missions.rst | 242 +++++++++++++++++++++++++---- 4 files changed, 281 insertions(+), 52 deletions(-) diff --git a/astroquery/mast/missions.py b/astroquery/mast/missions.py index af6b4e541e..f81ccad0e9 100644 --- a/astroquery/mast/missions.py +++ b/astroquery/mast/missions.py @@ -40,7 +40,7 @@ class MastMissionsClass(MastQueryWithLogin): # Static class variables _search = 'search' - _list_products = 'list_products' + _list_products = 'post_list_products' # Workaround so that observation_id is returned in ULLYSES queries that do not specify columns _default_ulysses_cols = ['target_name_ulysses', 'target_classification', 'targ_ra', 'targ_dec', 'host_galaxy_name', @@ -59,8 +59,8 @@ def __init__(self, *, mission='hst', mast_token=None): # Service attributes self.service = self._search # current API service - self.service_dict = {self._search: {'path': 'search'}, - self._list_products: {'path': 'list_products'}} + self.service_dict = {self._search: {'path': self._search}, + self._list_products: {'path': self._list_products}} # Search attributes self._search_option_fields = ['limit', 'offset', 'sort_by', 'search_key', 'sort_desc', 'select_cols', @@ -190,7 +190,7 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs # Dataset ID column should always be returned if select_cols: - select_cols.append(self.dataset_kwds[self.mission]) + select_cols.append(self.dataset_kwds.get(self.mission, None)) elif self.mission == 'ullyses': select_cols = self._default_ulysses_cols @@ -267,7 +267,7 @@ def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u. # Dataset ID column should always be returned if select_cols: - select_cols.append(self.dataset_kwds[self.mission]) + select_cols.append(self.dataset_kwds.get(self.mission, None)) elif self.mission == 'ullyses': select_cols = self._default_ulysses_cols @@ -349,32 +349,38 @@ def get_product_list_async(self, datasets): self.service = self._list_products + if isinstance(datasets, Table) or isinstance(datasets, Row): + dataset_kwd = self.get_dataset_kwd() + if not dataset_kwd: + log.warning('Please input dataset IDs as a string, list of strings, or `~astropy.table.Column`.') + return None + # Extract dataset IDs based on input type and mission if isinstance(datasets, Table): - datasets = datasets[self.dataset_kwds[self.mission]] + datasets = datasets[dataset_kwd].tolist() elif isinstance(datasets, Row): - datasets = np.array([datasets[self.dataset_kwds[self.mission]]]) - elif isinstance(datasets, str) or isinstance(datasets, Column): - datasets = np.array([datasets]) - elif isinstance(datasets, list): - datasets = np.array(datasets) - else: + datasets = [datasets[dataset_kwd]] + elif isinstance(datasets, Column): + datasets = datasets.tolist() + elif isinstance(datasets, str): + datasets = [datasets] + elif not isinstance(datasets, list): raise TypeError('Unsupported data type for `datasets`. Expected string, ' - 'list of strings, Astropy row, or Astropy Table.') + 'list of strings, Astropy Row, Astropy Column, or Astropy Table.') # Filter out empty strings from IDs - datasets = datasets[np.char.strip(datasets) != ''] - if datasets.size == 0: + datasets = [item.strip() for item in datasets if item.strip() != '' and item is not None] + if not len(datasets): raise InvalidQueryError("Dataset list is empty, no associated products.") # Send async service request - params = {'dataset_ids': ','.join(datasets)} + params = {'dataset_ids': datasets} return self._service_api_connection.missions_request_async(self.service, params) def get_unique_product_list(self, datasets): """ Given a dataset ID or list of dataset IDs, returns a list of associated data products with unique - URIs. + filenames. Parameters ---------- @@ -443,6 +449,7 @@ def filter_products(self, products, *, extension=None, **filters): def download_file(self, uri, *, local_path=None, cache=True, verbose=True): """ Downloads a single file based on the data URI. + Parameters ---------- uri : str @@ -453,6 +460,7 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True): Default is True. If file is found on disk, it will not be downloaded again. verbose : bool, optional Default is True. Whether to show download progress in the console. + Returns ------- status: str @@ -550,7 +558,7 @@ def _download_files(self, products, base_dir, *, flat=False, cache=True, verbose local_path=local_file_path, cache=cache, verbose=verbose) - manifest_entries.append([local_file_path, status, msg or '', url or '']) + manifest_entries.append([local_file_path, status, msg, url]) # Return manifest as Astropy Table manifest = Table(rows=manifest_entries, names=('Local Path', 'Status', 'Message', 'URL')) @@ -627,7 +635,6 @@ def get_column_list(self): ------- response : `~astropy.table.Table` that contains columns names, types, and descriptions """ - if not self.columns.get(self.mission): try: # Send server request to get column list for current mission @@ -659,5 +666,20 @@ def get_column_list(self): return self.columns[self.mission] + def get_dataset_kwd(self): + """ + Return the Dataset ID keyword for the selected mission. If the keyword is unknown, returns None. + + Returns + ------- + keyword : str or None + Dataset ID keyword or None if unknown. + """ + if self.mission not in self.dataset_kwds: + log.warning('The mission "%s" does not have a known dataset ID keyword.', self.mission) + return None + + return self.dataset_kwds[self.mission] + MastMissions = MastMissionsClass() diff --git a/astroquery/mast/services.py b/astroquery/mast/services.py index 7a20a70baf..b70869a291 100644 --- a/astroquery/mast/services.py +++ b/astroquery/mast/services.py @@ -354,12 +354,9 @@ def missions_request_async(self, service, params): 'Accept': 'application/json' } - # Determine request method and payload based on service - method = 'POST' if service == 'search' else 'GET' - data, params = (params, None) if method == 'POST' else (None, params) - # make request - response = self._request(method=method, + data, params = (params, None) + response = self._request(method='POST', url=request_url, params=params, data=data, diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index caf8333df7..51a8db4093 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -406,6 +406,30 @@ def test_missions_download_no_auth(patch_post, caplog): assert 'Please check your authentication token' in caplog.text +def test_missions_get_dataset_kwd(patch_post, caplog): + m = mast.MastMissions() + + # Default is HST + assert m.mission == 'hst' + assert m.get_dataset_kwd() == 'sci_data_set_name' + + # Switch to JWST + m.mission = 'JWST' # case-insensitive + assert m.mission == 'jwst' + assert m.get_dataset_kwd() == 'fileSetName' + + # Switch to an HLSP + m.mission = 'Classy' + assert m.mission == 'classy' + assert m.get_dataset_kwd() == 'Target' + + # Switch to an unknown + m.mission = 'Unknown' + assert m.mission == 'unknown' + assert m.get_dataset_kwd() is None + with caplog.at_level('WARNING', logger='astroquery'): + assert 'The mission "unknown" does not have a known dataset ID keyword' in caplog.text + ################### # MastClass tests # ################### diff --git a/docs/mast/mast_missions.rst b/docs/mast/mast_missions.rst index 04eb93cda8..41b8c88ec1 100644 --- a/docs/mast/mast_missions.rst +++ b/docs/mast/mast_missions.rst @@ -3,21 +3,24 @@ Mission Searches **************** -Mission-Specific Search Queries -=============================== +The `~astroquery.mast.MastMissionsClass` class allows for search queries based on mission-specific +metadata for a given data collection. This metadata includes header keywords, proposal information, and observational parameters. +The following missions/products are currently available for search: -These queries allow for searches based on mission-specific metadata for a given -data collection. Currently, it provides access to a broad set of Hubble Space -Telescope (HST) and James Webb Space Telescope (JWST) metadata, including header keywords, -proposal information, and observational parameters. +- `Hubble Space Telescope `_ (``'hst'``) -**Note:** This API interface does not yet support data product downloads, only -metadata search access. +- `James Webb Space Telescope `_ (``'jwst'``) + +- `High Level Science Products `_ + + - `COS Legacy Archive Spectroscopic SurveY `_ (``'classy'``) + + - `Hubble UV Legacy Library of Young Stars as Essential Standards `_ (``'ullyses'``) An object of the ``MastMissions`` class is instantiated with a default mission of ``'hst'`` and default service set to ``'search'``. The searchable metadata for Hubble encompasses all information that -was previously accessible through the original HST web search form and is now available in -the current `MAST HST Search Form `__. +was previously accessible through the original HST web search form. The metadata for Hubble and all other +available missions is also available through the `MAST Search UI `_. .. doctest-remote-data:: @@ -28,18 +31,19 @@ the current `MAST HST Search Form `__. >>> missions.service 'search' -To search for JWST metadata, a ``MastMissions`` object is instantiated with a value of ``'jwst'`` for ``mission``. -The searchable metadata for Webb encompasses all information that is available in -the current `MAST JWST Search Form `__. +Each ``MastMissions`` object can only make queries and download products from a single mission at a time. This mission can +be modified with the ``mission`` class attribute. This allows users to make queries to multiple missions with the same object. +To search for JWST metadata, the ``mission`` attribute is reassigned to ``'JWST'``. .. doctest-remote-data:: + >>> m = MastMissions() + >>> print(m.mission) + hst + >>> m.mission = 'JWST' + >>> print(m.mission) + jwst - >>> from astroquery.mast import MastMissions - >>> missions = MastMissions(mission='jwst') - >>> missions.mission - 'jwst' - -The ``missions`` object can be used to search metadata by object name, sky position, or other criteria. +The ``missions`` object can be used to search metadata by sky position, object name, or other criteria. When writing queries, keyword arguments can be used to specify output characteristics and filter on values like instrument, exposure type, and principal investigator. The available column names for a mission are returned by the `~astroquery.mast.MastMissionsClass.get_column_list` function. @@ -50,12 +54,29 @@ mission are returned by the `~astroquery.mast.MastMissionsClass.get_column_list` >>> missions = MastMissions(mission='hst') >>> columns = missions.get_column_list() +Keyword arguments can also be used to refine results further. The following parameters are available: + +- ``radius``: For positional searches only. Only return results within a certain distance from an object or set of coordinates. + Default is 3 arcminutes. + +- ``limit``: The maximum number of results to return. Default is 5000. + +- ``offset``: Skip the first ***n*** results. Useful for paging through results. + +- ``sort_by``: A list of field names to sort by. + +- ``sort_desc``: A list of booleans (one for each field specified in ``sort_by``), + describing if each field should be sorted in descending order (``True``) or ascending order (``False``). + +- ``select_cols``: A list of columns to be returned in the response. + + +Mission Positional Queries +=========================== + Metadata queries can be performed on a particular region in the sky. Passing in a set of coordinates to the `~astroquery.mast.MastMissionsClass.query_region` function returns datasets that fall within a -certain radius value of that point. This type of search is also known as a cone search. - -The ``select_cols`` keyword argument specifies a list of columns to be included in the response. -The ``sort_by`` keyword argument specifies a column (or columns) to sort the results by. +certain radius value of that point. This type of search is also known as a cone search. .. doctest-remote-data:: @@ -81,11 +102,18 @@ The ``sort_by`` keyword argument specifies a column (or columns) to sort the res You may notice that the above query returned more columns than were specified in the ``select_cols`` argument. For each mission, certain columns are automatically returned. -* *HST*: For positional searches, the columns ``ang_sep``, ``sci_data_set_name``, and ``search_pos`` +- *HST*: For positional searches, the columns ``sci_data_set_name``, ``search_pos``, and ``ang_sep`` are always included in the query results. For non-positional searches, ``sci_data_set_name`` is always present. -* *JWST*: For every query, the ``ArchiveFileID`` column is always returned. +- *JWST*: For every query, the ``ArchiveFileID`` column is always returned. + +- *CLASSY*: For positional searches, the columns ``search_pos``, ``Target``, and ``ang_sep`` are always included. + For non-positional searches, ``Target`` is always returned. + +- *ULLYSES*: For positional searches, the columns ``search_pos``, ``target_id``, ``names_search``, ``target_name_hlsp``, + ``simbad_link``, ``ang_sep``, and ``plot_preview`` are always included. For non-positional searches, ``target_id``, + ``target_name_hlsp``, ``simbad_link``, and ``observation_id`` are always returned. Searches can also be run on target names with the `~astroquery.mast.MastMissionsClass.query_object` function. @@ -106,10 +134,12 @@ function. 210.80243 54.34875 JD6V01012 ANY 2017-06-15T18:10:12.037000 2017-06-15T18:33:25.983000 1.1541053362381077 PUBLIC 210.80243 54.34875 JD6V01013 ANY 2017-06-15T19:45:30.023000 2017-06-15T20:08:44.063000 1.15442580192948 PUBLIC + +Mission Criteria Queries +========================= + For non-positional metadata queries, use the `~astroquery.mast.MastMissionsClass.query_criteria` -function. For paging through results, the ``offset`` and ``limit`` keyword arguments can be used -to specify the starting record and the number of returned records. The default values for ``offset`` -and ``limit`` are 0 and 5000, respectively. +function. .. doctest-remote-data:: @@ -121,3 +151,159 @@ and ``limit`` are 0 and 5000, respectively. ... # MaxResultsWarning('Maximum results returned, may not include all sources within radius.') >>> len(results) 1000 + +Here are some tips and tricks for writing more advanced queries: + +- To exclude and filter out a certain value from the results, prepend the value with ``!``. + +- To filter by multiple values for a single column, use a string of values delimited by commas. + +- For columns with numeric or date data types, filter using comparison values (``<``, ``>``, ``<=``, ``>=``). + + - ``<``: Return values less than or before the given number/date + + - ``>``: Return values greater than or after the given number/date + + - ``<=``: Return values less than or equal to the given number/date + + - ``>=``: Return values greater than or equal to the given number/date + +- For columns with numeric or date data types, select a range with the syntax ``'#..#'``. + +- Wildcards are special characters used in search patterns to represent one or more unknown characters, + allowing for flexible matching of strings. The wildcard character is ``*`` and it replaces any number + of characters preceding, following, or in between existing characters, depending on its placement. + +.. doctest-remote-data:: + + >>> results = missions.query_criteria(sci_obs_type="IMAGE", + ... sci_instrume="!COS", + ... sci_spec_1234="F150W, F105W, F110W", + ... sci_dec=">0", + ... sci_actual_duration="1000..2000", + ... sci_targname="*GAL*", + ... select_cols=["sci_obs_type", "sci_spec_1234"]) + >>> results[:5] # doctest: +IGNORE_OUTPUT + + sci_data_set_name sci_targname sci_spec_1234 sci_obs_type + ----------------- ----------------------- ------------- ------------ + N9DB0C010 GAL-023031+002317 F110W IMAGE + N4A701010 GAL-CLUS-0026+1653-ARCA F110W IMAGE + N4A704010 GAL-CLUS-0026+1653-ARCA F110W IMAGE + N4A702010 GAL-CLUS-0026+1653-ARCC F110W IMAGE + N4A705010 GAL-CLUS-0026+1653-ARCC F110W IMAGE + +Downloding Data +=============== + +Getting Product Lists +---------------------- + +Each observation returned from a MAST query can have one or more associated data products. Given +one or more datasets or dataset IDs, the `~astroquery.mast.MastMissionsClass.get_product_list` function +will return a `~astropy.table.Table` containing the associated data products. + +.. doctest-remote-data:: + >>> datasets = missions.query_criteria(sci_pep_id=12451, + ... sci_instrume='ACS', + ... sci_hlsp='>1') + >>> products = missions.get_product_list(datasets[:2]) + >>> print(products[:5]) # doctest: +IGNORE_OUTPUT + product_key access dataset ... category size type + ---------------------------- ------ --------- ... ---------- --------- ------- + JBTAA0010_jbtaa0010_asn.fits PUBLIC JBTAA0010 ... AUX 11520 science + JBTAA0010_jbtaa0010_drz.fits PUBLIC JBTAA0010 ... CALIBRATED 214655040 science + JBTAA0010_jbtaa0010_trl.fits PUBLIC JBTAA0010 ... AUX 630720 science + JBTAA0010_jbtaa0010_drc.fits PUBLIC JBTAA0010 ... CALIBRATED 214657920 science + JBTAA0010_jbtaa0010_log.txt PUBLIC JBTAA0010 ... AUX 204128 science + +The keyword corresponding to the dataset ID varies between missions and can be returned with the +`~astroquery.mast.MastMissionsClass.get_dataset_kwd` method. + +.. doctest-remote-data:: + >>> dataset_id_kwd = missions.get_dataset_kwd() + >>> print(dataset_id_kwd) + sci_data_set_name + >>> products = missions.get_product_list(datasets[:2][dataset_id_kwd]) + +Some products may be associated with multiple datasets, and this table may contain duplicates. +To return a list of products with unique filenames, use the `~astroquery.mast.MastMissionsClass.get_unique_product_list` +function. + +.. doctest-remote-data:: + >>> unique_products = missions.get_unique_product_list(datasets[:2]) # doctest: +IGNORE_OUTPUT + INFO: 16 of 206 products were duplicates. Only returning 190 unique product(s). [astroquery.mast.utils] + INFO: To return all products, use `MastMissions.get_product_list` [astroquery.mast.missions] + +Filtering Data Products +----------------------- + +In many cases, you will not need to download every product that is associated with a dataset. The +`~astroquery.mast.MastMissionsClass.filter_products` function allows for filtering based on file extension (``extension``) +and any other of the product fields. + +The **AND** operation is performed for a list of filters, and the **OR** operation is performed within a filter set. +For example, the filter below will return FITS products that are "science" type **and** have a ``file_suffix`` of "ASN" (association +files) **or** "JIF" (job information files). + +.. doctest-remote-data:: + >>> filtered = missions.filter_products(products, + ... extension='fits', + ... type='science', + ... file_suffix=['ASN', 'JIF']) + >>> print(filtered) # doctest: +IGNORE_OUTPUT + product_key access dataset ... category size type + ---------------------------- ------ --------- ... -------------- ----- ------- + JBTAA0010_jbtaa0010_asn.fits PUBLIC JBTAA0010 ... AUX 11520 science + JBTAA0010_jbtaa0010_jif.fits PUBLIC JBTAA0010 ... JITTER/SUPPORT 60480 science + JBTAA0020_jbtaa0020_asn.fits PUBLIC JBTAA0020 ... AUX 11520 science + JBTAA0020_jbtaa0020_jif.fits PUBLIC JBTAA0020 ... JITTER/SUPPORT 60480 science + +Downloading Data Products +------------------------- + +The `~astroquery.mast.MastMissionsClass.download_products` function accepts a table of products like the one above +and will download the products to your local machine. + +By default, products will be downloaded into the current working directory, in a subdirectory called "mastDownload". +The full local filepaths will have the form "mastDownload///file." You can change the download +directory using the ``download_dir`` parameter. + +.. doctest-remote-data:: + >>> manifest = missions.download_products(filtered) # doctest: +IGNORE_OUTPUT + Downloading URL https://mast.stsci.edu/search/hst/api/v0.1/retrieve_product?product_name=JBTAA0010%2Fjbtaa0010_asn.fits to mastDownload/hst/JBTAA0010/jbtaa0010_asn.fits ... [Done] + Downloading URL https://mast.stsci.edu/search/hst/api/v0.1/retrieve_product?product_name=JBTAA0010%2Fjbtaa0010_jif.fits to mastDownload/hst/JBTAA0010/jbtaa0010_jif.fits ... [Done] + Downloading URL https://mast.stsci.edu/search/hst/api/v0.1/retrieve_product?product_name=JBTAA0020%2Fjbtaa0020_asn.fits to mastDownload/hst/JBTAA0020/jbtaa0020_asn.fits ... [Done] + Downloading URL https://mast.stsci.edu/search/hst/api/v0.1/retrieve_product?product_name=JBTAA0020%2Fjbtaa0020_jif.fits to mastDownload/hst/JBTAA0020/jbtaa0020_jif.fits ... [Done] + >>> print(manifest) # doctest: +IGNORE_OUTPUT + Local Path Status Message URL + --------------------------------------------- -------- ------- ---- + mastDownload/hst/JBTAA0010/jbtaa0010_asn.fits COMPLETE None None + mastDownload/hst/JBTAA0010/jbtaa0010_jif.fits COMPLETE None None + mastDownload/hst/JBTAA0020/jbtaa0020_asn.fits COMPLETE None None + mastDownload/hst/JBTAA0020/jbtaa0020_jif.fits COMPLETE None None + +The function also accepts dataset IDs and product filters as input for a more streamlined workflow. + +.. doctest-remote-data:: + >>> missions.download_products(['JBTAA0010', 'JBTAA0020'], + ... extension='fits', + ... type='science', + ... file_suffix=['ASN', 'JIF']) # doctest: +IGNORE_OUTPUT + Downloading URL https://mast.stsci.edu/search/hst/api/v0.1/retrieve_product?product_name=JBTAA0010%2Fjbtaa0010_asn.fits to mastDownload/hst/JBTAA0010/jbtaa0010_asn.fits ... [Done] + Downloading URL https://mast.stsci.edu/search/hst/api/v0.1/retrieve_product?product_name=JBTAA0010%2Fjbtaa0010_jif.fits to mastDownload/hst/JBTAA0010/jbtaa0010_jif.fits ... [Done] + Downloading URL https://mast.stsci.edu/search/hst/api/v0.1/retrieve_product?product_name=JBTAA0020%2Fjbtaa0020_asn.fits to mastDownload/hst/JBTAA0020/jbtaa0020_asn.fits ... [Done] + Downloading URL https://mast.stsci.edu/search/hst/api/v0.1/retrieve_product?product_name=JBTAA0020%2Fjbtaa0020_jif.fits to mastDownload/hst/JBTAA0020/jbtaa0020_jif.fits ... [Done] + +Downloading a Single File +------------------------- + +To download a single data product file, use the `~astroquery.mast.MastMissionsClass.download_file` function with +a MAST URI as input. The default is to download the file to the current working directory, but +you can specify the download directory or filepath with the ``local_path`` keyword argument. + +.. doctest-remote-data:: + >>> result = missions.download_file('JBTAA0010/jbtaa0010_asn.fits') + Downloading URL https://mast.stsci.edu/search/hst/api/v0.1/retrieve_product?product_name=JBTAA0010%2Fjbtaa0010_asn.fits to jbtaa0010_asn.fits ... [Done] + >>> print(result) + ('COMPLETE', None, None) From ea90a5264e6c9039af2bdc3e27415f6203252b22 Mon Sep 17 00:00:00 2001 From: Sam Bianco Date: Wed, 18 Dec 2024 16:19:18 -0500 Subject: [PATCH 5/7] Changelog, fix other test failures --- CHANGES.rst | 13 +++++++++++++ astroquery/mast/missions.py | 6 ++++++ astroquery/mast/tests/test_mast.py | 6 ------ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 83cd8633de..c47a3a4723 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -19,6 +19,19 @@ jplspec - minor improvement to lookuptable behavior [#3173,#2901] +mast +^^^^ + +- Retrieve data products from the Missions-MAST API with ``mast.MastMissions.get_product_list``. Retrieve unique data + products only with ``mast.MastMissions.get_unique_product_list``. [#3155] + +- Filter data products retrieved from the Missions-MAST API with ``mast.MastMissions.filter_products``. [#3155] + +- Download data products from the Missions-MAST API with ``mast.MastMissions.download_products``. + Download a single data product using ``mast.MastMissions.download_file``. [#3155] + +- Get the keyword corresponding to the dataset ID for a specific mission with ``mast.MastMissions.get_dataset_kwd``. [#3155] + mocserver ^^^^^^^^^ diff --git a/astroquery/mast/missions.py b/astroquery/mast/missions.py index f81ccad0e9..f6f8332038 100644 --- a/astroquery/mast/missions.py +++ b/astroquery/mast/missions.py @@ -402,6 +402,7 @@ def get_unique_product_list(self, datasets): def filter_products(self, products, *, extension=None, **filters): """ Filters an `~astropy.table.Table` of mission data products based on given filters. + Parameters ---------- products : `~astropy.table.Table` @@ -414,6 +415,7 @@ def filter_products(self, products, *, extension=None, **filters): acceptable values for that column. AND logic is applied between filters, OR logic within each filter set. For example: type="science", extension=["fits","jpg"] + Returns ------- response : `~astropy.table.Table` @@ -525,6 +527,7 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True): def _download_files(self, products, base_dir, *, flat=False, cache=True, verbose=True): """ Downloads files listed in an `~astropy.table.Table` of data products to a specified directory. + Parameters ---------- products : `~astropy.table.Table` @@ -538,6 +541,7 @@ def _download_files(self, products, base_dir, *, flat=False, cache=True, verbose Default is True. If file is found on disk, it will not be downloaded again. verbose : bool, optional Default is True. Whether to show download progress in the console. + Returns ------- response : `~astropy.table.Table` @@ -568,6 +572,7 @@ def download_products(self, products, *, download_dir=None, flat=False, cache=True, extension=None, verbose=True, **filters): """ Download specified data products. + Parameters ---------- products : str, list, `~astropy.table.Table` @@ -591,6 +596,7 @@ def download_products(self, products, *, download_dir=None, flat=False, acceptable values for that column. AND logic is applied between filters, OR logic within each filter set. For example: type="science", extension=["fits","jpg"] + Returns ------- manifest : `~astropy.table.Table` diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index 51a8db4093..e9afe30e9f 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -376,12 +376,6 @@ def test_missions_download_products(patch_post, tmp_path): download_dir=tmp_path) assert isinstance(result, Table) - # Warn about no products - with pytest.warns(NoResultsWarning): - result = mast.MastMissions.download_products(test_dataset_id, - extension='jpg', - download_dir=tmp_path) - def test_missions_download_no_auth(patch_post, caplog): # Exclusive access products should not be downloaded if user is not authenticated From db9632e6b39cada33f7b4a11c599bed242be225b Mon Sep 17 00:00:00 2001 From: Sam Bianco Date: Wed, 18 Dec 2024 16:38:43 -0500 Subject: [PATCH 6/7] Style fixes --- astroquery/mast/missions.py | 8 ++++---- astroquery/mast/tests/test_mast.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/astroquery/mast/missions.py b/astroquery/mast/missions.py index f6f8332038..0eb0b604cf 100644 --- a/astroquery/mast/missions.py +++ b/astroquery/mast/missions.py @@ -43,7 +43,7 @@ class MastMissionsClass(MastQueryWithLogin): _list_products = 'post_list_products' # Workaround so that observation_id is returned in ULLYSES queries that do not specify columns - _default_ulysses_cols = ['target_name_ulysses', 'target_classification', 'targ_ra', 'targ_dec', 'host_galaxy_name', + _default_ullyses_cols = ['target_name_ulysses', 'target_classification', 'targ_ra', 'targ_dec', 'host_galaxy_name', 'spectral_type', 'bmv0_mag', 'u_mag', 'b_mag', 'v_mag', 'gaia_g_mean_mag', 'star_mass', 'instrument', 'grating', 'filter', 'observation_id'] @@ -192,7 +192,7 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs if select_cols: select_cols.append(self.dataset_kwds.get(self.mission, None)) elif self.mission == 'ullyses': - select_cols = self._default_ulysses_cols + select_cols = self._default_ullyses_cols # basic params params = {'target': [f"{coordinates.ra.deg} {coordinates.dec.deg}"], @@ -269,7 +269,7 @@ def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u. if select_cols: select_cols.append(self.dataset_kwds.get(self.mission, None)) elif self.mission == 'ullyses': - select_cols = self._default_ulysses_cols + select_cols = self._default_ullyses_cols # build query params = {"limit": self.limit, "offset": offset, 'select_cols': select_cols} @@ -583,7 +583,7 @@ def download_products(self, products, *, download_dir=None, flat=False, flat : bool, optional Default is False. If False, puts files into the standard directory structure of "mastDownload///". - If True, places files directly in `download_dir` without subdirectories. + If True, places files directly in ``download_dir`` without subdirectories. cache : bool, optional Default is True. If file is found on disc, it will not be downloaded again. extension : string or list, optional diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index e9afe30e9f..a82ec63a6d 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -16,7 +16,7 @@ from astroquery.mast.services import _json_to_table from astroquery.utils.mocks import MockResponse -from astroquery.exceptions import InvalidQueryError, InputWarning, MaxResultsWarning, NoResultsWarning +from astroquery.exceptions import InvalidQueryError, InputWarning, MaxResultsWarning from astroquery import mast From aab812a1f6992d322c4d33b65a857cbc25361316 Mon Sep 17 00:00:00 2001 From: Sam Bianco Date: Wed, 18 Dec 2024 19:19:46 -0500 Subject: [PATCH 7/7] clean up --- astroquery/mast/missions.py | 11 ++++++----- astroquery/mast/services.py | 2 -- astroquery/mast/tests/test_mast.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/astroquery/mast/missions.py b/astroquery/mast/missions.py index 0eb0b604cf..ffe804a104 100644 --- a/astroquery/mast/missions.py +++ b/astroquery/mast/missions.py @@ -75,8 +75,8 @@ def mission(self): @mission.setter def mission(self, value): - # Need to update the service parameters if the mission is changed - self._mission = value.lower() + # Setter that updates the service parameters if the mission is changed + self._mission = value.lower() # case-insensitive self._service_api_connection.set_service_params(self.service_dict, f'search/{self.mission}') def _parse_result(self, response, *, verbose=False): # Used by the async_to_sync decorator functionality @@ -100,7 +100,7 @@ def _parse_result(self, response, *, verbose=False): # Used by the async_to_syn if self.service == self._search: results = self._service_api_connection._parse_result(response, verbose, data_key='results') elif self.service == self._list_products: - # Results from list_products endpoint need to be handled differently + # Results from post_list_products endpoint need to be handled differently results = Table(response.json()['products']) if len(results) >= self.limit: @@ -510,8 +510,9 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True): if err.response.status_code == 401: no_auth_msg = f'You are not authorized to download from {data_url}.' if self._authenticated: - no_auth_msg += ('\nPlease check your authentication token. You can generate a new ' - 'token at https://auth.mast.stsci.edu/token?suggested_name=Astroquery&' + no_auth_msg += ('\nYou do not have access to download this data, or your authentication ' + 'token may be expired. You can generate a new token at ' + 'https://auth.mast.stsci.edu/token?suggested_name=Astroquery&' 'suggested_scope=mast:exclusive_access') else: no_auth_msg += ('\nPlease authenticate yourself using the `~astroquery.mast.MastMissions.login` ' diff --git a/astroquery/mast/services.py b/astroquery/mast/services.py index b70869a291..7753903e26 100644 --- a/astroquery/mast/services.py +++ b/astroquery/mast/services.py @@ -130,8 +130,6 @@ def __init__(self, session=None): self.TIMEOUT = conf.timeout - self._column_configs = {} # Dict to hold column configurations for services - def set_service_params(self, service_dict, service_name="", server_prefix=False): """ Initialize the request url and available queries for a given service. diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index a82ec63a6d..b5532b48e0 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -397,7 +397,7 @@ def test_missions_download_no_auth(patch_post, caplog): assert 'HTTPError' in result[1] with caplog.at_level('WARNING', logger='astroquery'): assert 'You are not authorized to download' in caplog.text - assert 'Please check your authentication token' in caplog.text + assert 'You do not have access to download this data' in caplog.text def test_missions_get_dataset_kwd(patch_post, caplog):