Skip to content

Commit

Permalink
CADC-11894 - introduce CFHT handling for metadata from HDF5 files. (#160
Browse files Browse the repository at this point in the history
)

*introduce CFHT handling for metadata from HDF5 files
  • Loading branch information
SharonGoliath authored Oct 31, 2022
1 parent 1ab4153 commit 0ce0bf6
Show file tree
Hide file tree
Showing 17 changed files with 366 additions and 42 deletions.
2 changes: 1 addition & 1 deletion caom2/caom2/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def _not_equal(rhs, lhs):
else:
# if only using python 3.5+, use math.isclose, instead of this
# description of math.isclose from the python documentation
result = abs(rhs-lhs) <= max(1e-10 * max(abs(rhs), abs(lhs)), 1e-9)
result = abs(rhs-lhs) <= max(1e-12 * max(abs(rhs), abs(lhs)), 1e-11)
else:
result = rhs == lhs
return not result
Expand Down
4 changes: 2 additions & 2 deletions caom2/caom2/tests/data/diff-expected-CAOM-2.3.xml
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,11 @@
<caom2:time>
<caom2:bounds>
<caom2:lower>58979.5015625</caom2:lower>
<caom2:upper>58979.5017365</caom2:upper>
<caom2:upper>58979.50173611111</caom2:upper>
<caom2:samples>
<caom2:sample>
<caom2:lower>58979.5015625</caom2:lower>
<caom2:upper>58979.5017365</caom2:upper>
<caom2:upper>58979.50173611111</caom2:upper>
</caom2:sample>
</caom2:samples>
</caom2:bounds>
Expand Down
73 changes: 51 additions & 22 deletions caom2utils/caom2utils/caom2blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
from logging.handlers import TimedRotatingFileHandler

import math
import numpy
from astropy.wcs import Wcsprm, WCS
from astropy.io import fits
from astropy.time import Time
Expand Down Expand Up @@ -1835,6 +1834,19 @@ def configure_time_axis(self, axis, override=True):

self._time_axis_configed = True

def set(self, caom2_element, value, extension=0):
"""
Sets the value associated with an element in the CAOM2 model. Value
cannot be a tuple.
:param caom2_element: name CAOM2 element (as in
ObsBlueprint.CAOM2_ELEMEMTS)
:param value: new value of the CAOM2 element
:param extension: extension number (used only for Chunk elements)
"""
if hasattr(value, 'decode'):
value = value.decode('utf-8')
super().set(caom2_element, value, extension)

def _guess_axis_info(self):
self._guess_axis_info_from_plan()

Expand Down Expand Up @@ -2071,7 +2083,7 @@ def _to_enum_type(self, value, to_enum_type):
if value is None:
raise ValueError(
f'Must set a value of {to_enum_type.__name__} for '
f'{self._uri}.')
f'{self.uri}.')
elif isinstance(value, to_enum_type):
return value
else:
Expand Down Expand Up @@ -3668,18 +3680,15 @@ class Hdf5Parser(ContentParser):
"""

def __init__(
self, obs_blueprint, uri, local_f_name, find_roots_here='sitedata'
self, obs_blueprint, uri, h5_file, find_roots_here='sitedata'
):
"""
:param obs_blueprint: Hdf5ObsBlueprint instance
:param uri: which artifact augmentation is based on
:param local_f_name: str file name on disk
:param h5_file: h5py file handle
:param find_roots_here: str location where Chunk metadata starts
"""
# h5py is an extra in this package since most collections do not
# require it
import h5py
self._file = h5py.File(local_f_name)
self._file = h5_file
# where N Chunk metadata starts
self._find_roots_here = find_roots_here
# the length of the array is the number of Parts in an HDF5 file,
Expand All @@ -3698,7 +3707,7 @@ def apply_blueprint_from_file(self):
# h5py is an extra in this package since most collections do not
# require it
import h5py
individual, multi = self._extract_path_names_from_blueprint()
individual, multi, attributes = self._extract_path_names_from_blueprint()

def _extract_from_item(name, object):
"""
Expand Down Expand Up @@ -3765,28 +3774,45 @@ def _extract_from_item(name, object):
for jj in individual.get(temp):
self._blueprint.set(jj, object[d_name], 0)

self._file.visititems(_extract_from_item)
if len(individual) == 0 and len(multi) == 0:
self._extract_from_attrs(attributes)
else:
self._file.visititems(_extract_from_item)
self.logger.debug('Done apply_blueprint_from_file')

def _extract_from_attrs(self, attributes):
# I don't currently see any way to have more than one Part, if relying on
# attrs for metadata
part_index = 0
# v == list of blueprint keys
for k, v in attributes.items():
if k in self._file.attrs:
value = self._file.attrs[k]
for entry in v:
self._blueprint.set(entry, value, part_index)

def _extract_path_names_from_blueprint(self):
"""
:return: individual - a dictionary of lists, keys are unique path
names for finding metadata once per file. Values are
_CAOM2_ELEMENT strings.
multiple - a dictionary of lists, keys are unique path names for
finding metadata N times per file. Values are _CAOM2_ELEMENT
strings.
:return: individual - a dictionary of lists, keys are unique path names for finding metadata once per file.
Values are _CAOM2_ELEMENT strings.
multiple - a dictionary of lists, keys are unique path names for finding metadata N times per file. Values
are _CAOM2_ELEMENT strings.
attributes - a dictionary of lists, keys reference expected content from the h5py.File().attrs data
structure and its keys.
"""
individual = defaultdict(list)
multi = defaultdict(list)
attributes = defaultdict(list)
for key, value in self._blueprint._plan.items():
if ObsBlueprint.needs_lookup(value):
for ii in value[0]:
if ii.startswith('//'):
individual[ii].append(key)
else:
elif ii.startswith('/'):
multi[ii].append(key)
return individual, multi
else:
attributes[ii].append(key)
return individual, multi, attributes

def apply_blueprint(self):
self.logger.debug('Begin apply_blueprint')
Expand Down Expand Up @@ -4239,8 +4265,6 @@ def _sanitize(self, value):
return None
elif not str(value):
return None # empty string
elif isinstance(value, numpy.bytes_):
return value.decode('utf-8')
else:
return value

Expand Down Expand Up @@ -4562,7 +4586,8 @@ def _finish_chunk_time(self, chunk):
if not math.isnan(self._wcs.wcs.xposure):
chunk.time.exposure = self._wcs.wcs.xposure
chunk.time.timesys = self._wcs.wcs.timesys
chunk.time.trefpos = self._wcs.wcs.trefpos
if self._wcs.wcs.trefpos is not None and self._wcs.wcs.trefpos != '':
chunk.time.trefpos = self._wcs.wcs.trefpos
# convert from the numpy array length 2 of self._wcs.wcs.mjdref
# to a single value
# TODO chunk.time.mjdref = self._wcs.to_header().get('MJDREF')
Expand Down Expand Up @@ -4906,7 +4931,11 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False,
elif '.h5' in local:
logging.debug(
f'Using an Hdf5Parser for local file {local}')
parser = Hdf5Parser(blueprint, uri, local)
# h5py is an extra in this package since most collections do
# not require it
import h5py
temp = h5py.File(local)
parser = Hdf5Parser(blueprint, uri, temp)
else:
# explicitly ignore headers for txt and image files
logging.debug(f'Using a BlueprintParser for {local}')
Expand Down
3 changes: 2 additions & 1 deletion caom2utils/caom2utils/data_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,8 @@ def info(self, uri):
id=uri,
size=temp.get('size'),
file_type=temp.get('type'),
md5sum=temp.get('md5sum').replace('md5:', '')
md5sum=temp.get('md5sum').replace('md5:', ''),
encoding=temp.get('encoding'),
)
except exceptions.NotFoundException:
self._logger.info(f'cadcinfo:: {uri} not found')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
</caom2:target>
<caom2:telescope>
<caom2:name>DAO 1.2-m</caom2:name>
<caom2:geoLocationX>-2331226.78834</caom2:geoLocationX>
<caom2:geoLocationX>-2331226.7883358444</caom2:geoLocationX>
<caom2:geoLocationY>-3532798.9829</caom2:geoLocationY>
<caom2:geoLocationZ>4755607.32183</caom2:geoLocationZ>
</caom2:telescope>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
</caom2:target>
<caom2:telescope>
<caom2:name>DAO 1.2-m</caom2:name>
<caom2:geoLocationX>-2331226.78834</caom2:geoLocationX>
<caom2:geoLocationX>-2331226.7883358444</caom2:geoLocationX>
<caom2:geoLocationY>-3532798.9829</caom2:geoLocationY>
<caom2:geoLocationZ>4755607.32183</caom2:geoLocationZ>
</caom2:telescope>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
</caom2:target>
<caom2:telescope>
<caom2:name>DAO 1.2-m</caom2:name>
<caom2:geoLocationX>-2331226.78834</caom2:geoLocationX>
<caom2:geoLocationX>-2331226.7883358444</caom2:geoLocationX>
<caom2:geoLocationY>-3532798.9829</caom2:geoLocationY>
<caom2:geoLocationZ>4755607.32183</caom2:geoLocationZ>
</caom2:telescope>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
</caom2:target>
<caom2:telescope>
<caom2:name>DAO 1.2-m</caom2:name>
<caom2:geoLocationX>-2331226.78834</caom2:geoLocationX>
<caom2:geoLocationX>-2331226.7883358444</caom2:geoLocationX>
<caom2:geoLocationY>-3532798.9829</caom2:geoLocationY>
<caom2:geoLocationZ>4755607.32183</caom2:geoLocationZ>
</caom2:telescope>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@
<caom2:val>0.36083190511928515</caom2:val>
</caom2:coord2>
</caom2:refCoord>
<caom2:cd11>-3.056099929280465e-06</caom2:cd11>
<caom2:cd12>5.26221921634137e-09</caom2:cd12>
<caom2:cd21>-8.906590162255861e-09</caom2:cd21>
<caom2:cd11>-3.055789796926023e-06</caom2:cd11>
<caom2:cd12>5.17684229864217e-09</caom2:cd12>
<caom2:cd21>-8.84247710317511e-09</caom2:cd21>
<caom2:cd22>-3.0546760780702482e-06</caom2:cd22>
</caom2:function>
</caom2:axis>
Expand Down Expand Up @@ -128,10 +128,10 @@
<caom2:val>0.3608318611649132</caom2:val>
</caom2:coord2>
</caom2:refCoord>
<caom2:cd11>-3.056099929280465e-06</caom2:cd11>
<caom2:cd12>5.26221921634137e-09</caom2:cd12>
<caom2:cd21>-8.906590162255861e-09</caom2:cd21>
<caom2:cd22>-3.0546760780702482e-06</caom2:cd22>
<caom2:cd11>-3.056038798826708e-06</caom2:cd11>
<caom2:cd12>5.311589798573419e-09</caom2:cd12>
<caom2:cd21>-8.530093204777965e-09</caom2:cd21>
<caom2:cd22>-3.054771726598754e-06</caom2:cd22>
</caom2:function>
</caom2:axis>
<caom2:equinox>2000.0</caom2:equinox>
Expand Down
51 changes: 51 additions & 0 deletions caom2utils/caom2utils/tests/data/taos_h5file/def/attrs.blueprint
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
Observation.algorithm.name = ['PROGRAM']
Observation.metaRelease = 2018-05-21T02:07:22.0
Observation.type = OBJECT
Observation.target.name = ['object_name']
Plane.calibrationLevel = 2
Plane.dataProductType = timeseries

Chunk.position.axis.function.dimension.naxis1 = ['NAXIS1']
Chunk.position.axis.function.dimension.naxis2 = ['NAXIS2']
Chunk.position.axis.function.refCoord.coord1.pix = ['CRPIX1']
Chunk.position.axis.function.refCoord.coord1.val = ['CRVAL1']
Chunk.position.axis.function.refCoord.coord2.pix = ['CRPIX2']
Chunk.position.axis.function.refCoord.coord2.val = ['CRVAL2']
Chunk.position.axis.axis1.ctype = ['CTYPE1']
Chunk.position.axis.axis1.cunit = ['CUNIT1']
Chunk.position.axis.axis2.ctype = ['CTYPE2']
Chunk.position.axis.axis2.cunit = ['CUNIT2']
Chunk.position.axis.function.cd11 = ['PC1_1']
Chunk.position.axis.function.cd12 = ['PC1_2']
Chunk.position.axis.function.cd21 = ['PC2_1']
Chunk.position.axis.function.cd22 = ['PC2_2']
Chunk.position.equinox = ['EQUINOX']
Chunk.position.axis.error1.syser = None
Chunk.position.axis.error1.rnder= None
Chunk.position.axis.error2.syser = None
Chunk.position.axis.error2.rnder = None
Chunk.position.coordsys = None

Chunk.energyAxis = 4
Chunk.energy.bandpassName = ['filter_name']
Chunk.energy.resolvingPower = _get_energy_resolving_power()
Chunk.energy.specsys = TOPOCENT
Chunk.energy.axis.function.naxis = 1
Chunk.energy.axis.axis.ctype = WAVE
Chunk.energy.axis.axis.cunit = nm
Chunk.energy.axis.function.delta = ['exposure_time']
Chunk.energy.axis.function.refCoord.pix = 0.5
Chunk.energy.axis.function.refCoord.val = _get_fwhm()

Chunk.timeAxis = 3
Chunk.time.axis.axis.ctype = TIME
Chunk.time.axis.axis.cunit = s
Chunk.time.axis.function.naxis = 1
Chunk.time.axis.function.delta = ['exposure_time']
Chunk.time.axis.function.refCoord.pix = 0.5
Chunk.time.axis.function.refCoord.val = _get_datetime()
Chunk.time.axis.error.rnder = None
Chunk.time.axis.error.syser = None
Chunk.time.exposure = _get_exposure()
Chunk.time.timesys = MJD
Chunk.time.mjdref = None
Binary file not shown.
62 changes: 62 additions & 0 deletions caom2utils/caom2utils/tests/data/taos_h5file/def/def.module
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from astropy.time import Time
from datetime import datetime


def _get_datetime(base):
b = base.get('base').attrs
result = None
d = b.get('OBS_DATE')
t = b.get('OBS_TIME')
if d is not None and t is not None:
dt = f'{d} {t}'
result = Time(datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f'))
result.format = 'mjd'
result = result.value
return result


def _get_energy_resolving_power(base):
b = base.get('base').attrs
result = None
# Laurie Rousseau-Nepton - 11-08-22
# Resolving Power could be given at the central wavelength of the filter.
# The formula is R = 1/lambda[nm]* (2*(STEP[nm]*(NAXIS3-zpd_index))/1.2067
step = b.get('STEP')
zpd_index = b.get('zpd_index')
naxis_3 = b.get('step_nb')
filter_max = b.get('filter_nm_max')
filter_min = b.get('filter_nm_min')
wl = None
if filter_max is not None and filter_min is not None:
wl = (filter_min + filter_max) / 2
if (
step is not None
and zpd_index is not None
and naxis_3 is not None
and wl is not None
):
result = 1 / wl * 2 * (step * (naxis_3 - zpd_index)) / 1.2067
return result


def _get_exposure(base):
b = base.get('base').attrs
# Laurie Rousseau-Nepton - 11-08-22
# Int. Time could be the total (multiplied by the cube spectral dimension
# f.attrs.get(‘NAXIS3’)
result = None
exposure = b.get('exposure_time')
naxis_3 = b.get('step_nb')
if exposure is not None and naxis_3 is not None:
result = exposure * naxis_3
return result


def _get_fwhm(base):
b = base.get('base').attrs
minimum = b.get('filter_nm_min')
maximum = b.get('filter_nm_max')
result = None
if minimum is not None and maximum is not None:
result = (maximum - minimum) / 2
return result
Loading

0 comments on commit 0ce0bf6

Please sign in to comment.