diff --git a/caom2/caom2/diff.py b/caom2/caom2/diff.py index 62581210..72cd01af 100644 --- a/caom2/caom2/diff.py +++ b/caom2/caom2/diff.py @@ -413,7 +413,7 @@ def _not_equal(rhs, lhs): else: # if only using python 3.5+, use math.isclose, instead of this # description of math.isclose from the python documentation - result = abs(rhs-lhs) <= max(1e-10 * max(abs(rhs), abs(lhs)), 1e-9) + result = abs(rhs-lhs) <= max(1e-12 * max(abs(rhs), abs(lhs)), 1e-11) else: result = rhs == lhs return not result diff --git a/caom2/caom2/tests/data/diff-expected-CAOM-2.3.xml b/caom2/caom2/tests/data/diff-expected-CAOM-2.3.xml index 046d468a..19d65158 100644 --- a/caom2/caom2/tests/data/diff-expected-CAOM-2.3.xml +++ b/caom2/caom2/tests/data/diff-expected-CAOM-2.3.xml @@ -119,11 +119,11 @@ 58979.5015625 - 58979.5017365 + 58979.50173611111 58979.5015625 - 58979.5017365 + 58979.50173611111 diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 79cb4ba5..2a151f0a 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -71,7 +71,6 @@ from logging.handlers import TimedRotatingFileHandler import math -import numpy from astropy.wcs import Wcsprm, WCS from astropy.io import fits from astropy.time import Time @@ -1835,6 +1834,19 @@ def configure_time_axis(self, axis, override=True): self._time_axis_configed = True + def set(self, caom2_element, value, extension=0): + """ + Sets the value associated with an element in the CAOM2 model. Value + cannot be a tuple. + :param caom2_element: name CAOM2 element (as in + ObsBlueprint.CAOM2_ELEMEMTS) + :param value: new value of the CAOM2 element + :param extension: extension number (used only for Chunk elements) + """ + if hasattr(value, 'decode'): + value = value.decode('utf-8') + super().set(caom2_element, value, extension) + def _guess_axis_info(self): self._guess_axis_info_from_plan() @@ -2071,7 +2083,7 @@ def _to_enum_type(self, value, to_enum_type): if value is None: raise ValueError( f'Must set a value of {to_enum_type.__name__} for ' - f'{self._uri}.') + f'{self.uri}.') elif isinstance(value, to_enum_type): return value else: @@ -3668,18 +3680,15 @@ class Hdf5Parser(ContentParser): """ def __init__( - self, obs_blueprint, uri, local_f_name, find_roots_here='sitedata' + self, obs_blueprint, uri, h5_file, find_roots_here='sitedata' ): """ :param obs_blueprint: Hdf5ObsBlueprint instance :param uri: which artifact augmentation is based on - :param local_f_name: str file name on disk + :param h5_file: h5py file handle :param find_roots_here: str location where Chunk metadata starts """ - # h5py is an extra in this package since most collections do not - # require it - import h5py - self._file = h5py.File(local_f_name) + self._file = h5_file # where N Chunk metadata starts self._find_roots_here = find_roots_here # the length of the array is the number of Parts in an HDF5 file, @@ -3698,7 +3707,7 @@ def apply_blueprint_from_file(self): # h5py is an extra in this package since most collections do not # require it import h5py - individual, multi = self._extract_path_names_from_blueprint() + individual, multi, attributes = self._extract_path_names_from_blueprint() def _extract_from_item(name, object): """ @@ -3765,28 +3774,45 @@ def _extract_from_item(name, object): for jj in individual.get(temp): self._blueprint.set(jj, object[d_name], 0) - self._file.visititems(_extract_from_item) + if len(individual) == 0 and len(multi) == 0: + self._extract_from_attrs(attributes) + else: + self._file.visititems(_extract_from_item) self.logger.debug('Done apply_blueprint_from_file') + def _extract_from_attrs(self, attributes): + # I don't currently see any way to have more than one Part, if relying on + # attrs for metadata + part_index = 0 + # v == list of blueprint keys + for k, v in attributes.items(): + if k in self._file.attrs: + value = self._file.attrs[k] + for entry in v: + self._blueprint.set(entry, value, part_index) + def _extract_path_names_from_blueprint(self): """ - :return: individual - a dictionary of lists, keys are unique path - names for finding metadata once per file. Values are - _CAOM2_ELEMENT strings. - multiple - a dictionary of lists, keys are unique path names for - finding metadata N times per file. Values are _CAOM2_ELEMENT - strings. + :return: individual - a dictionary of lists, keys are unique path names for finding metadata once per file. + Values are _CAOM2_ELEMENT strings. + multiple - a dictionary of lists, keys are unique path names for finding metadata N times per file. Values + are _CAOM2_ELEMENT strings. + attributes - a dictionary of lists, keys reference expected content from the h5py.File().attrs data + structure and its keys. """ individual = defaultdict(list) multi = defaultdict(list) + attributes = defaultdict(list) for key, value in self._blueprint._plan.items(): if ObsBlueprint.needs_lookup(value): for ii in value[0]: if ii.startswith('//'): individual[ii].append(key) - else: + elif ii.startswith('/'): multi[ii].append(key) - return individual, multi + else: + attributes[ii].append(key) + return individual, multi, attributes def apply_blueprint(self): self.logger.debug('Begin apply_blueprint') @@ -4239,8 +4265,6 @@ def _sanitize(self, value): return None elif not str(value): return None # empty string - elif isinstance(value, numpy.bytes_): - return value.decode('utf-8') else: return value @@ -4562,7 +4586,8 @@ def _finish_chunk_time(self, chunk): if not math.isnan(self._wcs.wcs.xposure): chunk.time.exposure = self._wcs.wcs.xposure chunk.time.timesys = self._wcs.wcs.timesys - chunk.time.trefpos = self._wcs.wcs.trefpos + if self._wcs.wcs.trefpos is not None and self._wcs.wcs.trefpos != '': + chunk.time.trefpos = self._wcs.wcs.trefpos # convert from the numpy array length 2 of self._wcs.wcs.mjdref # to a single value # TODO chunk.time.mjdref = self._wcs.to_header().get('MJDREF') @@ -4906,7 +4931,11 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, elif '.h5' in local: logging.debug( f'Using an Hdf5Parser for local file {local}') - parser = Hdf5Parser(blueprint, uri, local) + # h5py is an extra in this package since most collections do + # not require it + import h5py + temp = h5py.File(local) + parser = Hdf5Parser(blueprint, uri, temp) else: # explicitly ignore headers for txt and image files logging.debug(f'Using a BlueprintParser for {local}') diff --git a/caom2utils/caom2utils/data_util.py b/caom2utils/caom2utils/data_util.py index aa80da6b..b73a862a 100644 --- a/caom2utils/caom2utils/data_util.py +++ b/caom2utils/caom2utils/data_util.py @@ -226,7 +226,8 @@ def info(self, uri): id=uri, size=temp.get('size'), file_type=temp.get('type'), - md5sum=temp.get('md5sum').replace('md5:', '') + md5sum=temp.get('md5sum').replace('md5:', ''), + encoding=temp.get('encoding'), ) except exceptions.NotFoundException: self._logger.info(f'cadcinfo:: {uri} not found') diff --git a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007777/dao_c122_2016_007777.xml b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007777/dao_c122_2016_007777.xml index f2736fd6..a1759979 100644 --- a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007777/dao_c122_2016_007777.xml +++ b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007777/dao_c122_2016_007777.xml @@ -20,7 +20,7 @@ DAO 1.2-m - -2331226.78834 + -2331226.7883358444 -3532798.9829 4755607.32183 diff --git a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007830/dao_c122_2016_007830.xml b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007830/dao_c122_2016_007830.xml index 945b61d6..a48babf3 100644 --- a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007830/dao_c122_2016_007830.xml +++ b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007830/dao_c122_2016_007830.xml @@ -20,7 +20,7 @@ DAO 1.2-m - -2331226.78834 + -2331226.7883358444 -3532798.9829 4755607.32183 diff --git a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007939/dao_c122_2016_007939.xml b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007939/dao_c122_2016_007939.xml index 0b0a1152..7544e18a 100644 --- a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007939/dao_c122_2016_007939.xml +++ b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007939/dao_c122_2016_007939.xml @@ -19,7 +19,7 @@ DAO 1.2-m - -2331226.78834 + -2331226.7883358444 -3532798.9829 4755607.32183 diff --git a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007942/dao_c122_2016_007942.xml b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007942/dao_c122_2016_007942.xml index 8de706e7..dafb284c 100644 --- a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007942/dao_c122_2016_007942.xml +++ b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007942/dao_c122_2016_007942.xml @@ -19,7 +19,7 @@ DAO 1.2-m - -2331226.78834 + -2331226.7883358444 -3532798.9829 4755607.32183 diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml index bb179533..c1cb7bed 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml @@ -66,9 +66,9 @@ 0.36083190511928515 - -3.056099929280465e-06 - 5.26221921634137e-09 - -8.906590162255861e-09 + -3.055789796926023e-06 + 5.17684229864217e-09 + -8.84247710317511e-09 -3.0546760780702482e-06 @@ -128,10 +128,10 @@ 0.3608318611649132 - -3.056099929280465e-06 - 5.26221921634137e-09 - -8.906590162255861e-09 - -3.0546760780702482e-06 + -3.056038798826708e-06 + 5.311589798573419e-09 + -8.530093204777965e-09 + -3.054771726598754e-06 2000.0 diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/attrs.blueprint b/caom2utils/caom2utils/tests/data/taos_h5file/def/attrs.blueprint new file mode 100644 index 00000000..6089b467 --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/attrs.blueprint @@ -0,0 +1,51 @@ +Observation.algorithm.name = ['PROGRAM'] +Observation.metaRelease = 2018-05-21T02:07:22.0 +Observation.type = OBJECT +Observation.target.name = ['object_name'] +Plane.calibrationLevel = 2 +Plane.dataProductType = timeseries + +Chunk.position.axis.function.dimension.naxis1 = ['NAXIS1'] +Chunk.position.axis.function.dimension.naxis2 = ['NAXIS2'] +Chunk.position.axis.function.refCoord.coord1.pix = ['CRPIX1'] +Chunk.position.axis.function.refCoord.coord1.val = ['CRVAL1'] +Chunk.position.axis.function.refCoord.coord2.pix = ['CRPIX2'] +Chunk.position.axis.function.refCoord.coord2.val = ['CRVAL2'] +Chunk.position.axis.axis1.ctype = ['CTYPE1'] +Chunk.position.axis.axis1.cunit = ['CUNIT1'] +Chunk.position.axis.axis2.ctype = ['CTYPE2'] +Chunk.position.axis.axis2.cunit = ['CUNIT2'] +Chunk.position.axis.function.cd11 = ['PC1_1'] +Chunk.position.axis.function.cd12 = ['PC1_2'] +Chunk.position.axis.function.cd21 = ['PC2_1'] +Chunk.position.axis.function.cd22 = ['PC2_2'] +Chunk.position.equinox = ['EQUINOX'] +Chunk.position.axis.error1.syser = None +Chunk.position.axis.error1.rnder= None +Chunk.position.axis.error2.syser = None +Chunk.position.axis.error2.rnder = None +Chunk.position.coordsys = None + +Chunk.energyAxis = 4 +Chunk.energy.bandpassName = ['filter_name'] +Chunk.energy.resolvingPower = _get_energy_resolving_power() +Chunk.energy.specsys = TOPOCENT +Chunk.energy.axis.function.naxis = 1 +Chunk.energy.axis.axis.ctype = WAVE +Chunk.energy.axis.axis.cunit = nm +Chunk.energy.axis.function.delta = ['exposure_time'] +Chunk.energy.axis.function.refCoord.pix = 0.5 +Chunk.energy.axis.function.refCoord.val = _get_fwhm() + +Chunk.timeAxis = 3 +Chunk.time.axis.axis.ctype = TIME +Chunk.time.axis.axis.cunit = s +Chunk.time.axis.function.naxis = 1 +Chunk.time.axis.function.delta = ['exposure_time'] +Chunk.time.axis.function.refCoord.pix = 0.5 +Chunk.time.axis.function.refCoord.val = _get_datetime() +Chunk.time.axis.error.rnder = None +Chunk.time.axis.error.syser = None +Chunk.time.exposure = _get_exposure() +Chunk.time.timesys = MJD +Chunk.time.mjdref = None diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.h5 b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.h5 new file mode 100644 index 00000000..937a2aea Binary files /dev/null and b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.h5 differ diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module new file mode 100644 index 00000000..2bf6d8ef --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module @@ -0,0 +1,62 @@ +from astropy.time import Time +from datetime import datetime + + +def _get_datetime(base): + b = base.get('base').attrs + result = None + d = b.get('OBS_DATE') + t = b.get('OBS_TIME') + if d is not None and t is not None: + dt = f'{d} {t}' + result = Time(datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f')) + result.format = 'mjd' + result = result.value + return result + + +def _get_energy_resolving_power(base): + b = base.get('base').attrs + result = None + # Laurie Rousseau-Nepton - 11-08-22 + # Resolving Power could be given at the central wavelength of the filter. + # The formula is R = 1/lambda[nm]* (2*(STEP[nm]*(NAXIS3-zpd_index))/1.2067 + step = b.get('STEP') + zpd_index = b.get('zpd_index') + naxis_3 = b.get('step_nb') + filter_max = b.get('filter_nm_max') + filter_min = b.get('filter_nm_min') + wl = None + if filter_max is not None and filter_min is not None: + wl = (filter_min + filter_max) / 2 + if ( + step is not None + and zpd_index is not None + and naxis_3 is not None + and wl is not None + ): + result = 1 / wl * 2 * (step * (naxis_3 - zpd_index)) / 1.2067 + return result + + +def _get_exposure(base): + b = base.get('base').attrs + # Laurie Rousseau-Nepton - 11-08-22 + # Int. Time could be the total (multiplied by the cube spectral dimension + # f.attrs.get(‘NAXIS3’) + result = None + exposure = b.get('exposure_time') + naxis_3 = b.get('step_nb') + if exposure is not None and naxis_3 is not None: + result = exposure * naxis_3 + return result + + +def _get_fwhm(base): + b = base.get('base').attrs + minimum = b.get('filter_nm_min') + maximum = b.get('filter_nm_max') + result = None + if minimum is not None and maximum is not None: + result = (maximum - minimum) / 2 + return result diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py new file mode 100644 index 00000000..2bf6d8ef --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py @@ -0,0 +1,62 @@ +from astropy.time import Time +from datetime import datetime + + +def _get_datetime(base): + b = base.get('base').attrs + result = None + d = b.get('OBS_DATE') + t = b.get('OBS_TIME') + if d is not None and t is not None: + dt = f'{d} {t}' + result = Time(datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f')) + result.format = 'mjd' + result = result.value + return result + + +def _get_energy_resolving_power(base): + b = base.get('base').attrs + result = None + # Laurie Rousseau-Nepton - 11-08-22 + # Resolving Power could be given at the central wavelength of the filter. + # The formula is R = 1/lambda[nm]* (2*(STEP[nm]*(NAXIS3-zpd_index))/1.2067 + step = b.get('STEP') + zpd_index = b.get('zpd_index') + naxis_3 = b.get('step_nb') + filter_max = b.get('filter_nm_max') + filter_min = b.get('filter_nm_min') + wl = None + if filter_max is not None and filter_min is not None: + wl = (filter_min + filter_max) / 2 + if ( + step is not None + and zpd_index is not None + and naxis_3 is not None + and wl is not None + ): + result = 1 / wl * 2 * (step * (naxis_3 - zpd_index)) / 1.2067 + return result + + +def _get_exposure(base): + b = base.get('base').attrs + # Laurie Rousseau-Nepton - 11-08-22 + # Int. Time could be the total (multiplied by the cube spectral dimension + # f.attrs.get(‘NAXIS3’) + result = None + exposure = b.get('exposure_time') + naxis_3 = b.get('step_nb') + if exposure is not None and naxis_3 is not None: + result = exposure * naxis_3 + return result + + +def _get_fwhm(base): + b = base.get('base').attrs + minimum = b.get('filter_nm_min') + maximum = b.get('filter_nm_max') + result = None + if minimum is not None and maximum is not None: + result = (maximum - minimum) / 2 + return result diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.xml b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.xml new file mode 100644 index 00000000..e2ca5eaf --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.xml @@ -0,0 +1,114 @@ + + + def + def + 2018-05-21T02:07:22.0 + + ORB + + OBJECT + science + + M101_Field4 + + + + def + timeseries + 2 + + + cadc:def/def.h5 + science + data + application/x-hdf5 + 32784 + md5:5a28f24e62324c1a12ff76a46c59bc54 + + + 0 + + + 1 + 2 + 4 + 3 + + + + RA---TAN-SIP + deg + + + DEC--TAN-SIP + deg + + + + 2048 + 2064 + + + + 1073.5 + 210.97094060537 + + + 1031.5 + 54.268513730755 + + + -8.7769937953286e-05 + -6.8586770533601e-07 + -6.8634246910166e-07 + 8.7709224842659e-05 + + + 2000.0 + + + + + WAVE + nm + + + 1 + 0.0 + + 0.5 + 18.649999999999977 + + + + TOPOCENT + SN3 + 4926.487818556442 + + + + + TIME + s + + + 1 + 0.0 + + 0.5 + 59694.3770853125 + + + + MJD + 11198.6 + + + + + + + + + + diff --git a/caom2utils/caom2utils/tests/test_collections.py b/caom2utils/caom2utils/tests/test_collections.py index 59964286..802c9c06 100644 --- a/caom2utils/caom2utils/tests/test_collections.py +++ b/caom2utils/caom2utils/tests/test_collections.py @@ -232,8 +232,10 @@ def _get_cardinality(directory): return '--lineage catalog/vos://cadc.nrc.ca!vospace/CAOMworkshop/' \ 'Examples/DAO/dao_c122_2016_012725.fits' elif 'taos_' in directory: - return '--lineage star04239531/' \ - 'cadc:TAOSII/taos2_20220201T201317Z_star04239531.h5' + if 'def' in directory: + return '--lineage def/cadc:def/def.h5' + else: + return '--lineage star04239531/cadc:TAOSII/taos2_20220201T201317Z_star04239531.h5' else: return '' diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index 1c0b3e0c..d71d9841 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -188,7 +188,10 @@ def test_hdf5_wcs_parser_set_wcs(): test_observable_bp, test_custom_bp, ]: - test_subject = Hdf5Parser(bp, test_uri, test_fqn) + # limit the cases where h5py needs to be installed + import h5py + temp = h5py.File(test_fqn) + test_subject = Hdf5Parser(bp, test_uri, temp) assert test_subject is not None, 'expect a result' test_subject.augment_artifact(test_artifact) if bp == test_position_bp: diff --git a/caom2utils/setup.cfg b/caom2utils/setup.cfg index 4b70fefe..462b5417 100644 --- a/caom2utils/setup.cfg +++ b/caom2utils/setup.cfg @@ -33,7 +33,7 @@ url = https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/caom2 edit_on_github = False github_project = opencadc/caom2tools # version should be PEP386 compatible (http://www.python.org/dev/peps/pep-0386) -version = 1.6.5 +version = 1.6.6 [options] install_requires = @@ -55,4 +55,4 @@ test = [entry_points] fits2caom2 = caom2utils.legacy:main_app -caom2gen = caom2utils.fits2caom2:caom2gen +caom2gen = caom2utils.caom2blueprint:caom2gen